From 5c2c3f598ee08875c273e78db7755e1306bea46e Mon Sep 17 00:00:00 2001 From: Andreas Hansson Date: Fri, 9 May 2014 18:58:48 -0400 Subject: [PATCH] mem: Make DRAM read/write switching less conservative This patch changes the read/write event loop to use a single event (nextReqEvent), along with a state variable, thus joining the two control flows. This change makes it easier to follow the state transitions, and control what happens when. With the new loop we modify the overly conservative switching times such that the write-to-read switch allows bank preparation to happen in parallel with the bus turn around. Similarly, the read-to-write switch uses the introduced tRTW constraint. --- src/mem/DRAMCtrl.py | 22 ++- src/mem/dram_ctrl.cc | 368 +++++++++++++++++++++---------------------- src/mem/dram_ctrl.hh | 62 +++----- 3 files changed, 221 insertions(+), 231 deletions(-) diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py index 3237e602d..895b9624d 100644 --- a/src/mem/DRAMCtrl.py +++ b/src/mem/DRAMCtrl.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013 ARM Limited +# Copyright (c) 2012-2014 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -147,9 +147,12 @@ class DRAMCtrl(AbstractMemory): # to be sent. It is 7.8 us for a 64ms refresh requirement tREFI = Param.Latency("Refresh command interval") - # write-to-read turn around penalty, assumed same as read-to-write + # write-to-read turn around penalty tWTR = Param.Latency("Write to read switching time") + # read-to-write turn around penalty, bus turnaround delay + tRTW = Param.Latency("Read to write switching time") + # minimum row activate to row activate delay time tRRD = Param.Latency("ACT to ACT delay") @@ -205,6 +208,9 @@ class DDR3_1600_x64(DRAMCtrl): # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns tWTR = '7.5ns' + # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns + tRTW = '2.5ns' + # Assume 5 CK for activate to activate for different banks tRRD = '6.25ns' @@ -259,6 +265,9 @@ class DDR3_1333_x64_DRAMSim2(DRAMCtrl): # Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns tWTR = '7.5ns' + # Default read-to-write bus around to 2 CK, @666.66 MHz = 3 ns + tRTW = '3ns' + tRRD = '6.0ns' tXAW = '30ns' @@ -312,6 +321,9 @@ class LPDDR2_S4_1066_x32(DRAMCtrl): # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' + # Default read-to-write bus around to 2 CK, @533 MHz = 3.75 ns + tRTW = '3.75ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' @@ -360,6 +372,9 @@ class WideIO_200_x128(DRAMCtrl): # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns tWTR = '15ns' + # Default read-to-write bus around to 2 CK, @200 MHz = 10 ns + tRTW = '10ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' @@ -413,6 +428,9 @@ class LPDDR3_1600_x32(DRAMCtrl): # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' + # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns + tRTW = '2.5ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index 289763218..c701ac616 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2014 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -55,8 +55,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : AbstractMemory(p), port(name() + ".port", *this), retryRdReq(false), retryWrReq(false), - rowHitFlag(false), stopReads(false), - writeEvent(this), respondEvent(this), + rowHitFlag(false), busState(READ), + respondEvent(this), refreshEvent(this), nextReqEvent(this), drainManager(NULL), deviceBusWidth(p->device_bus_width), burstLength(p->burst_length), deviceRowBufferSize(p->device_rowbuffer_size), @@ -72,7 +72,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), minWritesPerSwitch(p->min_writes_per_switch), writesThisTime(0), readsThisTime(0), - tWTR(p->tWTR), tBURST(p->tBURST), + tWTR(p->tWTR), tRTW(p->tRTW), tBURST(p->tBURST), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), tXAW(p->tXAW), activationLimit(p->activation_limit), @@ -82,7 +82,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : frontendLatency(p->static_frontend_latency), backendLatency(p->static_backend_latency), busBusyUntil(0), prevArrival(0), - newTime(0), startTickPrechargeAll(0), numBanksActive(0) + nextReqTime(0), startTickPrechargeAll(0), numBanksActive(0) { // create the bank states based on the dimensions of the ranks and // banks @@ -150,6 +150,12 @@ DRAMCtrl::startup() // current tick startTickPrechargeAll = curTick(); + // shift the bus busy time sufficiently far ahead that we never + // have to worry about negative values when computing the time for + // the next request, this will add an insignificant bubble at the + // start of simulation + busBusyUntil = curTick() + tRP + tRCD + tCL; + // print the configuration of the controller printParams(); @@ -374,99 +380,14 @@ DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount) if (burst_helper != NULL) burst_helper->burstsServiced = pktsServicedByWrQ; - // If we are not already scheduled to get the read request out of - // the queue, do so now - if (!nextReqEvent.scheduled() && !stopReads) { + // If we are not already scheduled to get a request out of the + // queue, do so now + if (!nextReqEvent.scheduled()) { DPRINTF(DRAM, "Request scheduled immediately\n"); schedule(nextReqEvent, curTick()); } } -void -DRAMCtrl::processWriteEvent() -{ - assert(!writeQueue.empty()); - - DPRINTF(DRAM, "Beginning DRAM Write\n"); - Tick temp1 M5_VAR_USED = std::max(curTick(), busBusyUntil); - Tick temp2 M5_VAR_USED = std::max(curTick(), maxBankFreeAt()); - - chooseNextWrite(); - DRAMPacket* dram_pkt = writeQueue.front(); - // sanity check - assert(dram_pkt->size <= burstSize); - doDRAMAccess(dram_pkt); - - writeQueue.pop_front(); - delete dram_pkt; - - DPRINTF(DRAM, "Writing, bus busy for %lld ticks, banks busy " - "for %lld ticks\n", busBusyUntil - temp1, maxBankFreeAt() - temp2); - - // If we emptied the write queue, or got sufficiently below the - // threshold (using the minWritesPerSwitch as the hysteresis) and - // are not draining, or we have reads waiting and have done enough - // writes, then switch to reads. The retry above could already - // have caused it to be scheduled, so first check - if (writeQueue.empty() || - (writeQueue.size() + minWritesPerSwitch < writeLowThreshold && - !drainManager) || - (!readQueue.empty() && writesThisTime >= minWritesPerSwitch)) { - // turn the bus back around for reads again - busBusyUntil += tWTR; - stopReads = false; - - DPRINTF(DRAM, "Switching to reads after %d writes with %d writes " - "waiting\n", writesThisTime, writeQueue.size()); - - wrPerTurnAround.sample(writesThisTime); - writesThisTime = 0; - - if (!nextReqEvent.scheduled()) - schedule(nextReqEvent, busBusyUntil); - } else { - assert(!writeEvent.scheduled()); - DPRINTF(DRAM, "Next write scheduled at %lld\n", newTime); - schedule(writeEvent, newTime); - } - - if (retryWrReq) { - retryWrReq = false; - port.sendRetry(); - } - - // if there is nothing left in any queue, signal a drain - if (writeQueue.empty() && readQueue.empty() && - respQueue.empty () && drainManager) { - drainManager->signalDrainDone(); - drainManager = NULL; - } -} - - -void -DRAMCtrl::triggerWrites() -{ - DPRINTF(DRAM, "Switching to writes after %d reads with %d reads " - "waiting\n", readsThisTime, readQueue.size()); - - // Flag variable to stop any more read scheduling - stopReads = true; - - Tick write_start_time = std::max(busBusyUntil, curTick()) + tWTR; - - DPRINTF(DRAM, "Writes scheduled at %lld\n", write_start_time); - - // there is some danger here as there might still be reads - // happening before the switch actually takes place - rdPerTurnAround.sample(readsThisTime); - readsThisTime = 0; - - assert(write_start_time >= curTick()); - assert(!writeEvent.scheduled()); - schedule(writeEvent, write_start_time); -} - void DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pktCount) { @@ -573,9 +494,11 @@ DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pktCount) // different front end latency accessAndRespond(pkt, frontendLatency); - // If your write buffer is starting to fill up, drain it! - if (writeQueue.size() >= writeHighThreshold && !stopReads){ - triggerWrites(); + // If we are not already scheduled to get a request out of the + // queue, do so now + if (!nextReqEvent.scheduled()) { + DPRINTF(DRAM, "Request scheduled immediately\n"); + schedule(nextReqEvent, curTick()); } } @@ -625,9 +548,10 @@ DRAMCtrl::printParams() const "tRFC %d ticks\n" \ "tREFI %d ticks\n" \ "tWTR %d ticks\n" \ + "tRTW %d ticks\n" \ "tXAW (%d) %d ticks\n", name(), tRCD, tCL, tRP, tBURST, tRFC, tREFI, tWTR, - activationLimit, tXAW); + tRTW, activationLimit, tXAW); } void @@ -768,55 +692,25 @@ DRAMCtrl::processRespondEvent() } void -DRAMCtrl::chooseNextWrite() +DRAMCtrl::chooseNext(std::deque& queue) { - // This method does the arbitration between write requests. The - // chosen packet is simply moved to the head of the write - // queue. The other methods know that this is the place to - // look. For example, with FCFS, this method does nothing - assert(!writeQueue.empty()); + // This method does the arbitration between requests. The chosen + // packet is simply moved to the head of the queue. The other + // methods know that this is the place to look. For example, with + // FCFS, this method does nothing + assert(!queue.empty()); - if (writeQueue.size() == 1) { - DPRINTF(DRAM, "Single write request, nothing to do\n"); + if (queue.size() == 1) { + DPRINTF(DRAM, "Single request, nothing to do\n"); return; } if (memSchedPolicy == Enums::fcfs) { // Do nothing, since the correct request is already head } else if (memSchedPolicy == Enums::frfcfs) { - reorderQueue(writeQueue); + reorderQueue(queue); } else panic("No scheduling policy chosen\n"); - - DPRINTF(DRAM, "Selected next write request\n"); -} - -bool -DRAMCtrl::chooseNextRead() -{ - // This method does the arbitration between read requests. The - // chosen packet is simply moved to the head of the queue. The - // other methods know that this is the place to look. For example, - // with FCFS, this method does nothing - if (readQueue.empty()) { - DPRINTF(DRAM, "No read request to select\n"); - return false; - } - - // If there is only one request then there is nothing left to do - if (readQueue.size() == 1) - return true; - - if (memSchedPolicy == Enums::fcfs) { - // Do nothing, since the request to serve is already the first - // one in the read queue - } else if (memSchedPolicy == Enums::frfcfs) { - reorderQueue(readQueue); - } else - panic("No scheduling policy chosen!\n"); - - DPRINTF(DRAM, "Selected next read request\n"); - return true; } void @@ -975,12 +869,6 @@ DRAMCtrl::estimateLatency(DRAMPacket* dram_pkt, Tick inTime) return make_pair(bankLat, accLat); } -void -DRAMCtrl::processNextReqEvent() -{ - scheduleNextReq(); -} - void DRAMCtrl::recordActivate(Tick act_tick, uint8_t rank, uint8_t bank) { @@ -1197,7 +1085,7 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) curTick(), accessLat, dram_pkt->readyTime, busBusyUntil); // Make sure requests are not overlapping on the databus - assert (dram_pkt->readyTime - busBusyUntil >= tBURST); + assert(dram_pkt->readyTime - busBusyUntil >= tBURST); // Update bus state busBusyUntil = dram_pkt->readyTime; @@ -1205,49 +1093,32 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) DPRINTF(DRAM,"Access time is %lld\n", dram_pkt->readyTime - dram_pkt->entryTime); - // Update the minimum timing between the requests - newTime = (busBusyUntil > tRP + tRCD + tCL) ? - std::max(busBusyUntil - (tRP + tRCD + tCL), curTick()) : curTick(); + // Update the minimum timing between the requests, this is a + // conservative estimate of when we have to schedule the next + // request to not introduce any unecessary bubbles. In most cases + // we will wake up sooner than we have to. + nextReqTime = busBusyUntil - (tRP + tRCD + tCL); - // Update the access related stats + // Update the stats and schedule the next request if (dram_pkt->isRead) { ++readsThisTime; if (rowHitFlag) readRowHits++; bytesReadDRAM += burstSize; perBankRdBursts[dram_pkt->bankId]++; + + // Update latency stats + totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime; + totBankLat += bankLat; + totBusLat += tBURST; + totQLat += dram_pkt->readyTime - dram_pkt->entryTime - bankLat - + tBURST; } else { ++writesThisTime; if (rowHitFlag) writeRowHits++; bytesWritten += burstSize; perBankWrBursts[dram_pkt->bankId]++; - - // At this point, commonality between reads and writes ends. - // For writes, we are done since we long ago responded to the - // requestor. - return; - } - - // Update latency stats - totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime; - totBankLat += bankLat; - totBusLat += tBURST; - totQLat += dram_pkt->readyTime - dram_pkt->entryTime - bankLat - tBURST; - - - // At this point we're done dealing with the request - // It will be moved to a separate response queue with a - // correct readyTime, and eventually be sent back at that - //time - moveToRespQ(); - - // Schedule the next read event - if (!nextReqEvent.scheduled() && !stopReads) { - schedule(nextReqEvent, newTime); - } else { - if (newTime < nextReqEvent.when()) - reschedule(nextReqEvent, newTime); } } @@ -1293,21 +1164,137 @@ DRAMCtrl::moveToRespQ() } void -DRAMCtrl::scheduleNextReq() +DRAMCtrl::processNextReqEvent() { - DPRINTF(DRAM, "Reached scheduleNextReq()\n"); + if (busState == READ_TO_WRITE) { + DPRINTF(DRAM, "Switching to writes after %d reads with %d reads " + "waiting\n", readsThisTime, readQueue.size()); - // Figure out which read request goes next, and move it to the - // front of the read queue - if (!chooseNextRead()) { - // In the case there is no read request to go next, trigger - // writes if we have passed the low threshold (or if we are - // draining) - if (!writeQueue.empty() && !writeEvent.scheduled() && - (writeQueue.size() > writeLowThreshold || drainManager)) - triggerWrites(); + // sample and reset the read-related stats as we are now + // transitioning to writes, and all reads are done + rdPerTurnAround.sample(readsThisTime); + readsThisTime = 0; + + // now proceed to do the actual writes + busState = WRITE; + } else if (busState == WRITE_TO_READ) { + DPRINTF(DRAM, "Switching to reads after %d writes with %d writes " + "waiting\n", writesThisTime, writeQueue.size()); + + wrPerTurnAround.sample(writesThisTime); + writesThisTime = 0; + + busState = READ; + } + + // when we get here it is either a read or a write + if (busState == READ) { + + // track if we should switch or not + bool switch_to_writes = false; + + if (readQueue.empty()) { + // In the case there is no read request to go next, + // trigger writes if we have passed the low threshold (or + // if we are draining) + if (!writeQueue.empty() && + (drainManager || writeQueue.size() > writeLowThreshold)) { + + switch_to_writes = true; + } else { + // check if we are drained + if (respQueue.empty () && drainManager) { + drainManager->signalDrainDone(); + drainManager = NULL; + } + + // nothing to do, not even any point in scheduling an + // event for the next request + return; + } + } else { + // Figure out which read request goes next, and move it to the + // front of the read queue + chooseNext(readQueue); + + doDRAMAccess(readQueue.front()); + + // At this point we're done dealing with the request + // It will be moved to a separate response queue with a + // correct readyTime, and eventually be sent back at that + // time + moveToRespQ(); + + // we have so many writes that we have to transition + if (writeQueue.size() > writeHighThreshold) { + switch_to_writes = true; + } + } + + // switching to writes, either because the read queue is empty + // and the writes have passed the low threshold (or we are + // draining), or because the writes hit the hight threshold + if (switch_to_writes) { + // transition to writing + busState = READ_TO_WRITE; + + // add a bubble to the data bus, as defined by the + // tRTW parameter + busBusyUntil += tRTW; + + // update the minimum timing between the requests, + // this shifts us back in time far enough to do any + // bank preparation + nextReqTime = busBusyUntil - (tRP + tRCD + tCL); + } } else { - doDRAMAccess(readQueue.front()); + chooseNext(writeQueue); + DRAMPacket* dram_pkt = writeQueue.front(); + // sanity check + assert(dram_pkt->size <= burstSize); + doDRAMAccess(dram_pkt); + + writeQueue.pop_front(); + delete dram_pkt; + + // If we emptied the write queue, or got sufficiently below the + // threshold (using the minWritesPerSwitch as the hysteresis) and + // are not draining, or we have reads waiting and have done enough + // writes, then switch to reads. + if (writeQueue.empty() || + (writeQueue.size() + minWritesPerSwitch < writeLowThreshold && + !drainManager) || + (!readQueue.empty() && writesThisTime >= minWritesPerSwitch)) { + // turn the bus back around for reads again + busState = WRITE_TO_READ; + + // note that the we switch back to reads also in the idle + // case, which eventually will check for any draining and + // also pause any further scheduling if there is really + // nothing to do + + // here we get a bit creative and shift the bus busy time not + // just the tWTR, but also a CAS latency to capture the fact + // that we are allowed to prepare a new bank, but not issue a + // read command until after tWTR, in essence we capture a + // bubble on the data bus that is tWTR + tCL + busBusyUntil += tWTR + tCL; + + // update the minimum timing between the requests, this shifts + // us back in time far enough to do any bank preparation + nextReqTime = busBusyUntil - (tRP + tRCD + tCL); + } + } + + schedule(nextReqEvent, std::max(nextReqTime, curTick())); + + // If there is space available and we have writes waiting then let + // them retry. This is done here to ensure that the retry does not + // cause a nextReqEvent to be scheduled before we do so as part of + // the next request processing + if (retryWrReq && writeQueue.size() < writeBufferSize) { + retryWrReq = false; + port.sendRetry(); } } @@ -1681,13 +1668,12 @@ DRAMCtrl::drain(DrainManager *dm) respQueue.size()); ++count; drainManager = dm; + // the only part that is not drained automatically over time - // is the write queue, thus trigger writes if there are any - // waiting and no reads waiting, otherwise wait until the - // reads are done - if (readQueue.empty() && !writeQueue.empty() && - !writeEvent.scheduled()) - triggerWrites(); + // is the write queue, thus kick things into action if needed + if (!writeQueue.empty() && !nextReqEvent.scheduled()) { + schedule(nextReqEvent, curTick()); + } } if (count) diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index 749296634..8f2e4825e 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012-2014 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -127,11 +127,17 @@ class DRAMCtrl : public AbstractMemory bool rowHitFlag; /** - * Use this flag to shutoff reads, i.e. do not schedule any reads - * beyond those already done so that we can turn the bus around - * and do a few writes, or refresh, or whatever + * Bus state used to control the read/write switching and drive + * the scheduling of the next request. */ - bool stopReads; + enum BusState { + READ = 0, + READ_TO_WRITE, + WRITE, + WRITE_TO_READ + }; + + BusState busState; /** List to keep track of activate ticks */ std::vector> actTicks; @@ -250,13 +256,10 @@ class DRAMCtrl : public AbstractMemory /** * Bunch of things requires to setup "events" in gem5 - * When event "writeEvent" occurs for example, the method - * processWriteEvent is called; no parameters are allowed + * When event "respondEvent" occurs for example, the method + * processRespondEvent is called; no parameters are allowed * in these methods */ - void processWriteEvent(); - EventWrapper writeEvent; - void processRespondEvent(); EventWrapper respondEvent; @@ -324,12 +327,6 @@ class DRAMCtrl : public AbstractMemory */ void doDRAMAccess(DRAMPacket* dram_pkt); - /** - * Check when the channel is free to turnaround, add turnaround - * delay and schedule a whole bunch of writes. - */ - void triggerWrites(); - /** * When a packet reaches its "readyTime" in the response Q, * use the "access()" method in AbstractMemory to actually @@ -357,20 +354,11 @@ class DRAMCtrl : public AbstractMemory bool isRead); /** - * The memory schduler/arbiter - picks which read request needs to + * The memory schduler/arbiter - picks which request needs to * go next, based on the specified policy such as FCFS or FR-FCFS - * and moves it to the head of the read queue. - * - * @return True if a request was chosen and false if queue is empty + * and moves it to the head of the queue. */ - bool chooseNextRead(); - - /** - * Calls chooseNextReq() to pick the right request, then calls - * doDRAMAccess on that request in order to actually service - * that request - */ - void scheduleNextReq(); + void chooseNext(std::deque& queue); /** *Looks at the state of the banks, channels, row buffer hits etc @@ -394,11 +382,6 @@ class DRAMCtrl : public AbstractMemory */ void moveToRespQ(); - /** - * Scheduling policy within the write queue - */ - void chooseNextWrite(); - /** * For FR-FCFS policy reorder the read/write queue depending on row buffer * hits and earliest banks available in DRAM @@ -495,6 +478,7 @@ class DRAMCtrl : public AbstractMemory * values. */ const Tick tWTR; + const Tick tRTW; const Tick tBURST; const Tick tRCD; const Tick tCL; @@ -541,11 +525,13 @@ class DRAMCtrl : public AbstractMemory Tick prevArrival; - // The absolute soonest you have to start thinking about the - // next request is the longest access time that can occur before - // busBusyUntil. Assuming you need to precharge, - // open a new row, and access, it is tRP + tRCD + tCL - Tick newTime; + /** + * The soonest you have to start thinking about the next request + * is the longest access time that can occur before + * busBusyUntil. Assuming you need to precharge, open a new row, + * and access, it is tRP + tRCD + tCL. + */ + Tick nextReqTime; // All statistics that the model needs to capture Stats::Scalar readReqs;