mem: Add memory rank-to-rank delay

Add the following delay to the DRAM controller:
 - tCS : Different rank bus turnaround delay

This will be applied for
 1) read-to-read,
 2) write-to-write,
 3) write-to-read, and
 4) read-to-write
command sequences, where the new command accesses a different rank
than the previous burst.

The delay defaults to 2*tCK for each defined memory class. Note that
this does not correspond to one particular timing constraint, but is a
way of modelling all the associated constraints.

The DRAM controller has some minor changes to prioritize commands to
the same rank. This prioritization will only occur when the command
stream is not switching from a read to write or vice versa (in the
case of switching we have a gap in any case).

To prioritize commands to the same rank, the model will determine if there are
any commands queued (same type) to the same rank as the previous command.
This check will ensure that the 'same rank' command will be able to execute
without adding bubbles to the command flow, e.g. any ACT delay requirements
can be done under the hoods, allowing the burst to issue seamlessly.
This commit is contained in:
Wendy Elsasser 2014-09-20 17:17:57 -04:00
parent a384525355
commit b6ecfe9183
3 changed files with 179 additions and 68 deletions

View file

@ -156,11 +156,17 @@ class DRAMCtrl(AbstractMemory):
# to be sent. It is 7.8 us for a 64ms refresh requirement
tREFI = Param.Latency("Refresh command interval")
# write-to-read turn around penalty
tWTR = Param.Latency("Write to read switching time")
# write-to-read, same rank turnaround penalty
tWTR = Param.Latency("Write to read, same rank switching time")
# read-to-write turn around penalty, bus turnaround delay
tRTW = Param.Latency("Read to write switching time")
# read-to-write, same rank turnaround penalty
tRTW = Param.Latency("Read to write, same rank switching time")
# rank-to-rank bus delay penalty
# this does not correlate to a memory timing parameter and encompasses:
# 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
# different rank bus delay
tCS = Param.Latency("Rank to rank switching time")
# minimum row activate to row activate delay time
tRRD = Param.Latency("ACT to ACT delay")
@ -221,9 +227,12 @@ class DDR3_1600_x64(DRAMCtrl):
# Greater of 4 CK or 7.5 ns
tRTP = '7.5ns'
# Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
tRTW = '2.5ns'
# Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
tCS = '2.5ns'
# <=85C, half for >85C
tREFI = '7.8us'
@ -296,9 +305,12 @@ class DDR4_2400_x64(DRAMCtrl):
# Greater of 4 CK or 7.5 ns
tRTP = '7.5ns'
# Default read-to-write bus around to 2 CK, @1200 MHz = 1.666 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns
tRTW = '1.666ns'
# Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
tCS = '1.666ns'
# <=85C, half for >85C
tREFI = '7.8us'
@ -353,9 +365,12 @@ class DDR3_1333_x64_DRAMSim2(DRAMCtrl):
# Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns
tWTR = '7.5ns'
# Default read-to-write bus around to 2 CK, @666.66 MHz = 3 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @666.66 MHz = 3 ns
tRTW = '3ns'
# Default different rank bus delay to 2 CK, @666.66 MHz = 3 ns
tCS = '3ns'
tRRD = '6.0ns'
tXAW = '30ns'
@ -416,9 +431,12 @@ class LPDDR2_S4_1066_x32(DRAMCtrl):
# Irrespective of speed grade, tWTR is 7.5 ns
tWTR = '7.5ns'
# Default read-to-write bus around to 2 CK, @533 MHz = 3.75 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
tRTW = '3.75ns'
# Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
tCS = '3.75ns'
# Activate to activate irrespective of density and speed grade
tRRD = '10.0ns'
@ -473,9 +491,12 @@ class WideIO_200_x128(DRAMCtrl):
# Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
tWTR = '15ns'
# Default read-to-write bus around to 2 CK, @200 MHz = 10 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
tRTW = '10ns'
# Default different rank bus delay to 2 CK, @200 MHz = 10 ns
tCS = '10ns'
# Activate to activate irrespective of density and speed grade
tRRD = '10.0ns'
@ -536,9 +557,12 @@ class LPDDR3_1600_x32(DRAMCtrl):
# Irrespective of speed grade, tWTR is 7.5 ns
tWTR = '7.5ns'
# Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns
# Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
tRTW = '2.5ns'
# Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
tCS = '2.5ns'
# Activate to activate irrespective of density and speed grade
tRRD = '10.0ns'

View file

@ -76,7 +76,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
writesThisTime(0), readsThisTime(0),
tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tBURST(p->tBURST),
tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR),
tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
tXAW(p->tXAW), activationLimit(p->activation_limit),
@ -87,7 +87,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
backendLatency(p->static_backend_latency),
busBusyUntil(0), refreshDueAt(0), refreshState(REF_IDLE),
pwrStateTrans(PWR_IDLE), pwrState(PWR_IDLE), prevArrival(0),
nextReqTime(0), pwrStateTick(0), numBanksActive(0)
nextReqTime(0), pwrStateTick(0), numBanksActive(0),
activeRank(0)
{
// create the bank states based on the dimensions of the ranks and
// banks
@ -683,7 +684,7 @@ DRAMCtrl::processRespondEvent()
}
void
DRAMCtrl::chooseNext(std::deque<DRAMPacket*>& queue)
DRAMCtrl::chooseNext(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
{
// This method does the arbitration between requests. The chosen
// packet is simply moved to the head of the queue. The other
@ -699,13 +700,13 @@ DRAMCtrl::chooseNext(std::deque<DRAMPacket*>& queue)
if (memSchedPolicy == Enums::fcfs) {
// Do nothing, since the correct request is already head
} else if (memSchedPolicy == Enums::frfcfs) {
reorderQueue(queue);
reorderQueue(queue, switched_cmd_type);
} else
panic("No scheduling policy chosen\n");
}
void
DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue)
DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue, bool switched_cmd_type)
{
// Only determine this when needed
uint64_t earliest_banks = 0;
@ -713,6 +714,7 @@ DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue)
// Search for row hits first, if no row hit is found then schedule the
// packet to one of the earliest banks available
bool found_earliest_pkt = false;
bool found_prepped_diff_rank_pkt = false;
auto selected_pkt_it = queue.begin();
for (auto i = queue.begin(); i != queue.end() ; ++i) {
@ -720,25 +722,30 @@ DRAMCtrl::reorderQueue(std::deque<DRAMPacket*>& queue)
const Bank& bank = dram_pkt->bankRef;
// Check if it is a row hit
if (bank.openRow == dram_pkt->row) {
// FCFS within the hits
DPRINTF(DRAM, "Row buffer hit\n");
selected_pkt_it = i;
break;
} else if (!found_earliest_pkt) {
// No row hit, go for first ready
if (dram_pkt->rank == activeRank || switched_cmd_type) {
// FCFS within the hits, giving priority to commands
// that access the same rank as the previous burst
// to minimize bus turnaround delays
// Only give rank prioity when command type is not changing
DPRINTF(DRAM, "Row buffer hit\n");
selected_pkt_it = i;
break;
} else if (!found_prepped_diff_rank_pkt) {
// found row hit for command on different rank than prev burst
selected_pkt_it = i;
found_prepped_diff_rank_pkt = true;
}
} else if (!found_earliest_pkt & !found_prepped_diff_rank_pkt) {
// No row hit and
// haven't found an entry with a row hit to a new rank
if (earliest_banks == 0)
earliest_banks = minBankActAt(queue);
// Determine entries with earliest bank prep delay
// Function will give priority to commands that access the
// same rank as previous burst and can prep the bank seamlessly
earliest_banks = minBankPrep(queue, switched_cmd_type);
// simplistic approximation of when the bank can issue an
// activate, this is calculated in minBankActAt and could
// be cached
Tick act_at = bank.openRow == Bank::NO_ROW ?
bank.actAllowedAt :
std::max(bank.preAllowedAt, curTick()) + tRP;
// Bank is ready or is the first available bank
if (act_at <= curTick() ||
bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) {
// FCFS - Bank is first available bank
if (bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) {
// Remember the packet to be scheduled to one of the earliest
// banks available, FCFS amongst the earliest banks
selected_pkt_it = i;
@ -983,6 +990,9 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
// read/write (add a max with tCCD here)
bank.colAllowedAt = cmd_at + tBURST;
// Save rank of current access
activeRank = dram_pkt->rank;
// If this is a write, we also need to respect the write recovery
// time before a precharge, in the case of a read, respect the
// read to precharge constraint
@ -1095,6 +1105,9 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
void
DRAMCtrl::processNextReqEvent()
{
// pre-emptively set to false. Overwrite if in READ_TO_WRITE
// or WRITE_TO_READ state
bool switched_cmd_type = false;
if (busState == READ_TO_WRITE) {
DPRINTF(DRAM, "Switching to writes after %d reads with %d reads "
"waiting\n", readsThisTime, readQueue.size());
@ -1106,6 +1119,7 @@ DRAMCtrl::processNextReqEvent()
// now proceed to do the actual writes
busState = WRITE;
switched_cmd_type = true;
} else if (busState == WRITE_TO_READ) {
DPRINTF(DRAM, "Switching to reads after %d writes with %d writes "
"waiting\n", writesThisTime, writeQueue.size());
@ -1114,6 +1128,7 @@ DRAMCtrl::processNextReqEvent()
writesThisTime = 0;
busState = READ;
switched_cmd_type = true;
}
if (refreshState != REF_IDLE) {
@ -1160,10 +1175,26 @@ DRAMCtrl::processNextReqEvent()
} else {
// Figure out which read request goes next, and move it to the
// front of the read queue
chooseNext(readQueue);
chooseNext(readQueue, switched_cmd_type);
DRAMPacket* dram_pkt = readQueue.front();
// here we get a bit creative and shift the bus busy time not
// just the tWTR, but also a CAS latency to capture the fact
// that we are allowed to prepare a new bank, but not issue a
// read command until after tWTR, in essence we capture a
// bubble on the data bus that is tWTR + tCL
if (switched_cmd_type) {
// add a bubble to the data bus for write-to-read turn around
// or tCS (different rank bus delay).
busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL :
tCS;
} else if (dram_pkt->rank != activeRank) {
// add a bubble to the data bus, as defined by the
// tCS parameter for rank-to-rank delay
busBusyUntil += tCS;
}
doDRAMAccess(dram_pkt);
// At this point we're done dealing with the request
@ -1197,21 +1228,23 @@ DRAMCtrl::processNextReqEvent()
if (switch_to_writes) {
// transition to writing
busState = READ_TO_WRITE;
// add a bubble to the data bus, as defined by the
// tRTW parameter
busBusyUntil += tRTW;
// update the minimum timing between the requests,
// this shifts us back in time far enough to do any
// bank preparation
nextReqTime = busBusyUntil - (tRP + tRCD + tCL);
}
} else {
chooseNext(writeQueue);
chooseNext(writeQueue, switched_cmd_type);
DRAMPacket* dram_pkt = writeQueue.front();
// sanity check
assert(dram_pkt->size <= burstSize);
if (switched_cmd_type) {
// add a bubble to the data bus, as defined by the
// tRTW or tCS parameter, depending on whether changing ranks
busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS;
} else if (dram_pkt->rank != activeRank) {
// add a bubble to the data bus, as defined by the
// tCS parameter for rank-to-rank delay
busBusyUntil += tCS;
}
doDRAMAccess(dram_pkt);
writeQueue.pop_front();
@ -1232,17 +1265,6 @@ DRAMCtrl::processNextReqEvent()
// case, which eventually will check for any draining and
// also pause any further scheduling if there is really
// nothing to do
// here we get a bit creative and shift the bus busy time not
// just the tWTR, but also a CAS latency to capture the fact
// that we are allowed to prepare a new bank, but not issue a
// read command until after tWTR, in essence we capture a
// bubble on the data bus that is tWTR + tCL
busBusyUntil += tWTR + tCL;
// update the minimum timing between the requests, this shifts
// us back in time far enough to do any bank preparation
nextReqTime = busBusyUntil - (tRP + tRCD + tCL);
}
}
@ -1259,12 +1281,19 @@ DRAMCtrl::processNextReqEvent()
}
uint64_t
DRAMCtrl::minBankActAt(const deque<DRAMPacket*>& queue) const
DRAMCtrl::minBankPrep(const deque<DRAMPacket*>& queue,
bool switched_cmd_type) const
{
uint64_t bank_mask = 0;
Tick min_act_at = MaxTick;
// deterimne if we have queued transactions targetting a
uint64_t bank_mask_same_rank = 0;
Tick min_act_at_same_rank = MaxTick;
// Give precedence to commands that access same rank as previous command
bool same_rank_match = false;
// determine if we have queued transactions targetting the
// bank in question
vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
for (auto p = queue.begin(); p != queue.end(); ++p) {
@ -1280,23 +1309,64 @@ DRAMCtrl::minBankActAt(const deque<DRAMPacket*>& queue) const
if (got_waiting[bank_id]) {
// simplistic approximation of when the bank can issue
// an activate, ignoring any rank-to-rank switching
// cost
// cost in this calculation
Tick act_at = banks[i][j].openRow == Bank::NO_ROW ?
banks[i][j].actAllowedAt :
std::max(banks[i][j].preAllowedAt, curTick()) + tRP;
if (act_at <= min_act_at) {
// reset bank mask if new minimum is found
if (act_at < min_act_at)
bank_mask = 0;
// set the bit corresponding to the available bank
replaceBits(bank_mask, bank_id, bank_id, 1);
min_act_at = act_at;
// prioritize commands that access the
// same rank as previous burst
// Calculate bank mask separately for the case and
// evaluate after loop iterations complete
if (i == activeRank && ranksPerChannel > 1) {
if (act_at <= min_act_at_same_rank) {
// reset same rank bank mask if new minimum is found
// and previous minimum could not immediately send ACT
if (act_at < min_act_at_same_rank &&
min_act_at_same_rank > curTick())
bank_mask_same_rank = 0;
// Set flag indicating that a same rank
// opportunity was found
same_rank_match = true;
// set the bit corresponding to the available bank
replaceBits(bank_mask_same_rank, bank_id, bank_id, 1);
min_act_at_same_rank = act_at;
}
} else {
if (act_at <= min_act_at) {
// reset bank mask if new minimum is found
// and either previous minimum could not immediately send ACT
if (act_at < min_act_at && min_act_at > curTick())
bank_mask = 0;
// set the bit corresponding to the available bank
replaceBits(bank_mask, bank_id, bank_id, 1);
min_act_at = act_at;
}
}
}
}
}
// Determine the earliest time when the next burst can issue based
// on the current busBusyUntil delay.
// Offset by tRCD to correlate with ACT timing variables
Tick min_cmd_at = busBusyUntil - tCL - tRCD;
// Prioritize same rank accesses that can issue B2B
// Only optimize for same ranks when the command type
// does not change; do not want to unnecessarily incur tWTR
//
// Resulting FCFS prioritization Order is:
// 1) Commands that access the same rank as previous burst
// and can prep the bank seamlessly.
// 2) Commands (any rank) with earliest bank prep
if (!switched_cmd_type && same_rank_match &&
min_act_at_same_rank <= min_cmd_at) {
bank_mask = bank_mask_same_rank;
}
return bank_mask;
}

View file

@ -368,23 +368,36 @@ class DRAMCtrl : public AbstractMemory
* The memory schduler/arbiter - picks which request needs to
* go next, based on the specified policy such as FCFS or FR-FCFS
* and moves it to the head of the queue.
* Prioritizes accesses to the same rank as previous burst unless
* controller is switching command type.
*
* @param queue Queued requests to consider
* @param switched_cmd_type Command type is changing
*/
void chooseNext(std::deque<DRAMPacket*>& queue);
void chooseNext(std::deque<DRAMPacket*>& queue, bool switched_cmd_type);
/**
* For FR-FCFS policy reorder the read/write queue depending on row buffer
* hits and earliest banks available in DRAM
* Prioritizes accesses to the same rank as previous burst unless
* controller is switching command type.
*
* @param queue Queued requests to consider
* @param switched_cmd_type Command type is changing
*/
void reorderQueue(std::deque<DRAMPacket*>& queue);
void reorderQueue(std::deque<DRAMPacket*>& queue, bool switched_cmd_type);
/**
* Find which are the earliest banks ready to issue an activate
* for the enqueued requests. Assumes maximum of 64 banks per DIMM
* Also checks if the bank is already prepped.
*
* @param Queued requests to consider
* @param queue Queued requests to consider
* @param switched_cmd_type Command type is changing
* @return One-hot encoded mask of bank indices
*/
uint64_t minBankActAt(const std::deque<DRAMPacket*>& queue) const;
uint64_t minBankPrep(const std::deque<DRAMPacket*>& queue,
bool switched_cmd_type) const;
/**
* Keep track of when row activations happen, in order to enforce
@ -475,6 +488,7 @@ class DRAMCtrl : public AbstractMemory
const Tick M5_CLASS_VAR_USED tCK;
const Tick tWTR;
const Tick tRTW;
const Tick tCS;
const Tick tBURST;
const Tick tRCD;
const Tick tCL;
@ -664,6 +678,9 @@ class DRAMCtrl : public AbstractMemory
// To track number of banks which are currently active
unsigned int numBanksActive;
// Holds the value of the rank of burst issued
uint8_t activeRank;
/** @todo this is a temporary workaround until the 4-phase code is
* committed. upstream caches needs this packet until true is returned, so
* hold onto it for deletion until a subsequent call