mem: Add DDR4 bank group timing

Added the following parameter to the DRAMCtrl class:
 - bank_groups_per_rank

This defaults to 1. For the DDR4 case, the default is overridden to indicate
bank group architecture, with multiple bank groups per rank.

Added the following delays to the DRAMCtrl class:
 - tCCD_L : CAS-to-CAS, same bank group delay
 - tRRD_L : RAS-to-RAS, same bank group delay

These parameters are only applied when bank group timing is enabled.  Bank
group timing is currently enabled only for DDR4 memories.

For all other memories, these delays will default to '0 ns'

In the DRAM controller model, applied the bank group timing to the per bank
parameters actAllowedAt and colAllowedAt.
The actAllowedAt will be updated based on bank group when an ACT is issued.
The colAllowedAt will be updated based on bank group when a RD/WR burst is
issued.

At the moment no modifications are made to the scheduling.
This commit is contained in:
Wendy Elsasser 2014-09-20 17:18:21 -04:00
parent b6ecfe9183
commit bf23847072
3 changed files with 143 additions and 29 deletions

View file

@ -111,6 +111,11 @@ class DRAMCtrl(AbstractMemory):
"device/chip")
devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
ranks_per_channel = Param.Unsigned("Number of ranks per channel")
# default to 0 bank groups per rank, indicating bank group architecture
# is not used
# update per memory class when bank group architecture is supported
bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
banks_per_rank = Param.Unsigned("Number of banks per rank")
# only used for the address mapping as the controller by
# construction is a single channel and multiple controllers have
@ -147,8 +152,17 @@ class DRAMCtrl(AbstractMemory):
# This parameter has to account for burst length.
# Read/Write requests with data size larger than one full burst are broken
# down into multiple requests in the controller
# tBURST is equivalent to the CAS-to-CAS delay (tCCD)
# With bank group architectures, tBURST represents the CAS-to-CAS
# delay for bursts to different bank groups (tCCD_S)
tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
# CAS-to-CAS delay for bursts to the same bank group
# only utilized with bank group architectures; set to 0 for default case
# tBURST is equivalent to tCCD_S; no explicit parameter required
# for CAS-to-CAS delay for bursts to different bank groups
tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
# time taken to complete one refresh cycle (N rows in all banks)
tRFC = Param.Latency("Refresh cycle time")
@ -171,6 +185,9 @@ class DRAMCtrl(AbstractMemory):
# minimum row activate to row activate delay time
tRRD = Param.Latency("ACT to ACT delay")
# only utilized with bank group architectures; set to 0 for default case
tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
# time window in which a maximum number of activates are allowed
# to take place, set to 0 to disable
tXAW = Param.Latency("X activation window")
@ -274,6 +291,10 @@ class DDR4_2400_x64(DRAMCtrl):
# Use a single rank
ranks_per_channel = 1
# DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
# Set to 4 for x4, x8 case
bank_groups_per_rank = 4
# DDR4 has 16 banks (4 bank groups) in all
# configurations. Currently we do not capture the additional
# constraints incurred by the bank groups
@ -283,16 +304,29 @@ class DDR4_2400_x64(DRAMCtrl):
tCK = '0.833ns'
# 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
# tBURST is equivalent to the CAS-to-CAS delay (tCCD)
# With bank group architectures, tBURST represents the CAS-to-CAS
# delay for bursts to different bank groups (tCCD_S)
tBURST = '3.333ns'
# @2400 data rate, tCCD_L is 6 CK
# CAS-to-CAS delay for bursts to the same bank group
# tBURST is equivalent to tCCD_S; no explicit parameter required
# for CAS-to-CAS delay for bursts to different bank groups
tCCD_L = '5ns';
# DDR4-2400 17-17-17
tRCD = '14.16ns'
tCL = '14.16ns'
tRP = '14.16ns'
tRAS = '32ns'
# Here using the average of RRD_S and RRD_L
tRRD = '4.1ns'
# RRD_S (different bank group) for 1K page is MAX(4 CK, 3.3ns)
tRRD = '3.3ns'
# RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
tRRD_L = '4.9ns';
tXAW = '21ns'
activation_limit = 4
tRFC = '260ns'

View file

@ -69,6 +69,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
columnsPerRowBuffer(rowBufferSize / burstSize),
columnsPerStripe(range.granularity() / burstSize),
ranksPerChannel(p->ranks_per_channel),
bankGroupsPerRank(p->bank_groups_per_rank),
bankGroupArch(p->bank_groups_per_rank > 0),
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size),
@ -77,9 +79,9 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
minWritesPerSwitch(p->min_writes_per_switch),
writesThisTime(0), readsThisTime(0),
tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR),
tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
tXAW(p->tXAW), activationLimit(p->activation_limit),
tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit),
memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
pageMgmt(p->page_policy),
maxAccessesPerRow(p->max_accesses_per_row),
@ -104,6 +106,19 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
for (int b = 0; b < banksPerRank; b++) {
banks[r][b].rank = r;
banks[r][b].bank = b;
if (bankGroupArch) {
// Simply assign lower bits to bank group in order to
// rotate across bank groups as banks are incremented
// e.g. with 4 banks per bank group and 16 banks total:
// banks 0,4,8,12 are in bank group 0
// banks 1,5,9,13 are in bank group 1
// banks 2,6,10,14 are in bank group 2
// banks 3,7,11,15 are in bank group 3
banks[r][b].bankgr = b % bankGroupsPerRank;
} else {
// No bank groups; simply assign to bank number
banks[r][b].bankgr = b;
}
}
}
@ -168,6 +183,35 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
tREFI, tRP, tRFC);
}
// basic bank group architecture checks ->
if (bankGroupArch) {
// must have at least one bank per bank group
if (bankGroupsPerRank > banksPerRank) {
fatal("banks per rank (%d) must be equal to or larger than "
"banks groups per rank (%d)\n",
banksPerRank, bankGroupsPerRank);
}
// must have same number of banks in each bank group
if ((banksPerRank % bankGroupsPerRank) != 0) {
fatal("Banks per rank (%d) must be evenly divisible by bank groups "
"per rank (%d) for equal banks per bank group\n",
banksPerRank, bankGroupsPerRank);
}
// tCCD_L should be greater than minimal, back-to-back burst delay
if (tCCD_L <= tBURST) {
fatal("tCCD_L (%d) should be larger than tBURST (%d) when "
"bank groups per rank (%d) is greater than 1\n",
tCCD_L, tBURST, bankGroupsPerRank);
}
// tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
if (tRRD_L <= tRRD) {
fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
"bank groups per rank (%d) is greater than 1\n",
tRRD_L, tRRD, bankGroupsPerRank);
}
}
}
void
@ -824,14 +868,25 @@ DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row)
bank.preAllowedAt = act_tick + tRAS;
// Respect the row-to-column command delay
bank.colAllowedAt = act_tick + tRCD;
bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt);
// start by enforcing tRRD
for(int i = 0; i < banksPerRank; i++) {
// next activate to any bank in this rank must not happen
// before tRRD
banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
banks[rank][i].actAllowedAt);
if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) {
// bank group architecture requires longer delays between
// ACT commands within the same bank group. Use tRRD_L
// in this case
banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L,
banks[rank][i].actAllowedAt);
} else {
// use shorter tRRD value when either
// 1) bank group architecture is not supportted
// 2) bank is in a different bank group
banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
banks[rank][i].actAllowedAt);
}
}
// next, we deal with tXAW, if the activation limit is disabled
@ -986,9 +1041,38 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
// only one burst can use the bus at any one point in time
assert(dram_pkt->readyTime - busBusyUntil >= tBURST);
// not strictly necessary, but update the time for the next
// read/write (add a max with tCCD here)
bank.colAllowedAt = cmd_at + tBURST;
// update the time for the next read/write burst for each
// bank (add a max with tCCD/tCCD_L here)
Tick cmd_dly;
for(int j = 0; j < ranksPerChannel; j++) {
for(int i = 0; i < banksPerRank; i++) {
// next burst to same bank group in this rank must not happen
// before tCCD_L. Different bank group timing requirement is
// tBURST; Add tCS for different ranks
if (dram_pkt->rank == j) {
if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) {
// bank group architecture requires longer delays between
// RD/WR burst commands to the same bank group.
// Use tCCD_L in this case
cmd_dly = tCCD_L;
} else {
// use tBURST (equivalent to tCCD_S), the shorter
// cas-to-cas delay value, when either:
// 1) bank group architecture is not supportted
// 2) bank is in a different bank group
cmd_dly = tBURST;
}
} else {
// different rank is by default in a different bank group
// use tBURST (equivalent to tCCD_S), which is the shorter
// cas-to-cas delay in this case
// Add tCS to account for rank-to-rank bus delay requirements
cmd_dly = tBURST + tCS;
}
banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly,
banks[j][i].colAllowedAt);
}
}
// Save rank of current access
activeRank = dram_pkt->rank;
@ -1184,15 +1268,8 @@ DRAMCtrl::processNextReqEvent()
// that we are allowed to prepare a new bank, but not issue a
// read command until after tWTR, in essence we capture a
// bubble on the data bus that is tWTR + tCL
if (switched_cmd_type) {
// add a bubble to the data bus for write-to-read turn around
// or tCS (different rank bus delay).
busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL :
tCS;
} else if (dram_pkt->rank != activeRank) {
// add a bubble to the data bus, as defined by the
// tCS parameter for rank-to-rank delay
busBusyUntil += tCS;
if (switched_cmd_type && dram_pkt->rank == activeRank) {
busBusyUntil += tWTR + tCL;
}
doDRAMAccess(dram_pkt);
@ -1235,14 +1312,12 @@ DRAMCtrl::processNextReqEvent()
// sanity check
assert(dram_pkt->size <= burstSize);
if (switched_cmd_type) {
// add a bubble to the data bus, as defined by the
// tRTW or tCS parameter, depending on whether changing ranks
busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS;
} else if (dram_pkt->rank != activeRank) {
// add a bubble to the data bus, as defined by the
// tCS parameter for rank-to-rank delay
busBusyUntil += tCS;
// add a bubble to the data bus, as defined by the
// tRTW when access is to the same rank as previous burst
// Different rank timing is handled with tCS, which is
// applied to colAllowedAt
if (switched_cmd_type && dram_pkt->rank == activeRank) {
busBusyUntil += tRTW;
}
doDRAMAccess(dram_pkt);

View file

@ -158,6 +158,7 @@ class DRAMCtrl : public AbstractMemory
uint32_t openRow;
uint8_t rank;
uint8_t bank;
uint8_t bankgr;
Tick colAllowedAt;
Tick preAllowedAt;
@ -167,7 +168,7 @@ class DRAMCtrl : public AbstractMemory
uint32_t bytesAccessed;
Bank() :
openRow(NO_ROW), rank(0), bank(0),
openRow(NO_ROW), rank(0), bank(0), bankgr(0),
colAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
rowAccesses(0), bytesAccessed(0)
{ }
@ -470,6 +471,8 @@ class DRAMCtrl : public AbstractMemory
const uint32_t columnsPerRowBuffer;
const uint32_t columnsPerStripe;
const uint32_t ranksPerChannel;
const uint32_t bankGroupsPerRank;
const bool bankGroupArch;
const uint32_t banksPerRank;
const uint32_t channels;
uint32_t rowsPerBank;
@ -490,6 +493,7 @@ class DRAMCtrl : public AbstractMemory
const Tick tRTW;
const Tick tCS;
const Tick tBURST;
const Tick tCCD_L;
const Tick tRCD;
const Tick tCL;
const Tick tRP;
@ -499,6 +503,7 @@ class DRAMCtrl : public AbstractMemory
const Tick tRFC;
const Tick tREFI;
const Tick tRRD;
const Tick tRRD_L;
const Tick tXAW;
const uint32_t activationLimit;