mem: Fix address interleaving bug in DRAM controller
This patch fixes a bug in the DRAM controller address decoding. In cases where the DRAM burst size (e.g. 32 bytes in a rank with a single LPDDR3 x32) was smaller than the channel interleaving size (e.g. systems with a 64-byte cache line) one address bit effectively got used as a channel bit when it should have been a low-order column bit. This patch adds a notion of "columns per stripe", and more clearly deals with the low-order column bits and high-order column bits. The patch also relaxes the granularity check such that it is possible to use interleaving granularities other than the cache line size. The patch also adds a missing M5_CLASS_VAR_USED to the tCK member as it is only used in the debug build for now.
This commit is contained in:
parent
04d1f61ae8
commit
56b7796e0d
|
@ -67,6 +67,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
|
||||||
burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
|
burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
|
||||||
rowBufferSize(devicesPerRank * deviceRowBufferSize),
|
rowBufferSize(devicesPerRank * deviceRowBufferSize),
|
||||||
columnsPerRowBuffer(rowBufferSize / burstSize),
|
columnsPerRowBuffer(rowBufferSize / burstSize),
|
||||||
|
columnsPerStripe(range.granularity() / burstSize),
|
||||||
ranksPerChannel(p->ranks_per_channel),
|
ranksPerChannel(p->ranks_per_channel),
|
||||||
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
|
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
|
||||||
readBufferSize(p->read_buffer_size),
|
readBufferSize(p->read_buffer_size),
|
||||||
|
@ -122,6 +123,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
|
||||||
|
|
||||||
rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
|
rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
|
||||||
|
|
||||||
|
// a bit of sanity checks on the interleaving
|
||||||
if (range.interleaved()) {
|
if (range.interleaved()) {
|
||||||
if (channels != range.stripes())
|
if (channels != range.stripes())
|
||||||
fatal("%s has %d interleaved address stripes but %d channel(s)\n",
|
fatal("%s has %d interleaved address stripes but %d channel(s)\n",
|
||||||
|
@ -129,18 +131,34 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
|
||||||
|
|
||||||
if (addrMapping == Enums::RoRaBaChCo) {
|
if (addrMapping == Enums::RoRaBaChCo) {
|
||||||
if (rowBufferSize != range.granularity()) {
|
if (rowBufferSize != range.granularity()) {
|
||||||
fatal("Interleaving of %s doesn't match RoRaBaChCo "
|
fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
|
||||||
"address map\n", name());
|
"address map\n", name());
|
||||||
}
|
}
|
||||||
} else if (addrMapping == Enums::RoRaBaCoCh) {
|
} else if (addrMapping == Enums::RoRaBaCoCh ||
|
||||||
if (system()->cacheLineSize() != range.granularity()) {
|
addrMapping == Enums::RoCoRaBaCh) {
|
||||||
fatal("Interleaving of %s doesn't match RoRaBaCoCh "
|
// for the interleavings with channel bits in the bottom,
|
||||||
"address map\n", name());
|
// if the system uses a channel striping granularity that
|
||||||
|
// is larger than the DRAM burst size, then map the
|
||||||
|
// sequential accesses within a stripe to a number of
|
||||||
|
// columns in the DRAM, effectively placing some of the
|
||||||
|
// lower-order column bits as the least-significant bits
|
||||||
|
// of the address (above the ones denoting the burst size)
|
||||||
|
assert(columnsPerStripe >= 1);
|
||||||
|
|
||||||
|
// channel striping has to be done at a granularity that
|
||||||
|
// is equal or larger to a cache line
|
||||||
|
if (system()->cacheLineSize() > range.granularity()) {
|
||||||
|
fatal("Channel interleaving of %s must be at least as large "
|
||||||
|
"as the cache line size\n", name());
|
||||||
}
|
}
|
||||||
} else if (addrMapping == Enums::RoCoRaBaCh) {
|
|
||||||
if (system()->cacheLineSize() != range.granularity())
|
// ...and equal or smaller than the row-buffer size
|
||||||
fatal("Interleaving of %s doesn't match RoCoRaBaCh "
|
if (rowBufferSize < range.granularity()) {
|
||||||
"address map\n", name());
|
fatal("Channel interleaving of %s must be at most as large "
|
||||||
|
"as the row-buffer size\n", name());
|
||||||
|
}
|
||||||
|
// this is essentially the check above, so just to be sure
|
||||||
|
assert(columnsPerStripe <= columnsPerRowBuffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,7 +246,8 @@ DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
|
||||||
// always the top bits, and check before creating the DRAMPacket
|
// always the top bits, and check before creating the DRAMPacket
|
||||||
uint64_t row;
|
uint64_t row;
|
||||||
|
|
||||||
// truncate the address to the access granularity
|
// truncate the address to a DRAM burst, which makes it unique to
|
||||||
|
// a specific column, row, bank, rank and channel
|
||||||
Addr addr = dramPktAddr / burstSize;
|
Addr addr = dramPktAddr / burstSize;
|
||||||
|
|
||||||
// we have removed the lowest order address bits that denote the
|
// we have removed the lowest order address bits that denote the
|
||||||
|
@ -255,11 +274,14 @@ DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
|
||||||
row = addr % rowsPerBank;
|
row = addr % rowsPerBank;
|
||||||
addr = addr / rowsPerBank;
|
addr = addr / rowsPerBank;
|
||||||
} else if (addrMapping == Enums::RoRaBaCoCh) {
|
} else if (addrMapping == Enums::RoRaBaCoCh) {
|
||||||
|
// take out the lower-order column bits
|
||||||
|
addr = addr / columnsPerStripe;
|
||||||
|
|
||||||
// take out the channel part of the address
|
// take out the channel part of the address
|
||||||
addr = addr / channels;
|
addr = addr / channels;
|
||||||
|
|
||||||
// next, the column
|
// next, the higher-order column bites
|
||||||
addr = addr / columnsPerRowBuffer;
|
addr = addr / (columnsPerRowBuffer / columnsPerStripe);
|
||||||
|
|
||||||
// after the column bits, we get the bank bits to interleave
|
// after the column bits, we get the bank bits to interleave
|
||||||
// over the banks
|
// over the banks
|
||||||
|
@ -278,6 +300,9 @@ DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
|
||||||
// optimise for closed page mode and utilise maximum
|
// optimise for closed page mode and utilise maximum
|
||||||
// parallelism of the DRAM (at the cost of power)
|
// parallelism of the DRAM (at the cost of power)
|
||||||
|
|
||||||
|
// take out the lower-order column bits
|
||||||
|
addr = addr / columnsPerStripe;
|
||||||
|
|
||||||
// take out the channel part of the address, not that this has
|
// take out the channel part of the address, not that this has
|
||||||
// to match with how accesses are interleaved between the
|
// to match with how accesses are interleaved between the
|
||||||
// controllers in the address mapping
|
// controllers in the address mapping
|
||||||
|
@ -292,9 +317,8 @@ DRAMCtrl::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size,
|
||||||
rank = addr % ranksPerChannel;
|
rank = addr % ranksPerChannel;
|
||||||
addr = addr / ranksPerChannel;
|
addr = addr / ranksPerChannel;
|
||||||
|
|
||||||
// next the column bits which we do not need to keep track of
|
// next, the higher-order column bites
|
||||||
// and simply skip past
|
addr = addr / (columnsPerRowBuffer / columnsPerStripe);
|
||||||
addr = addr / columnsPerRowBuffer;
|
|
||||||
|
|
||||||
// lastly, get the row bits
|
// lastly, get the row bits
|
||||||
row = addr % rowsPerBank;
|
row = addr % rowsPerBank;
|
||||||
|
|
|
@ -453,6 +453,7 @@ class DRAMCtrl : public AbstractMemory
|
||||||
const uint32_t burstSize;
|
const uint32_t burstSize;
|
||||||
const uint32_t rowBufferSize;
|
const uint32_t rowBufferSize;
|
||||||
const uint32_t columnsPerRowBuffer;
|
const uint32_t columnsPerRowBuffer;
|
||||||
|
const uint32_t columnsPerStripe;
|
||||||
const uint32_t ranksPerChannel;
|
const uint32_t ranksPerChannel;
|
||||||
const uint32_t banksPerRank;
|
const uint32_t banksPerRank;
|
||||||
const uint32_t channels;
|
const uint32_t channels;
|
||||||
|
@ -469,7 +470,7 @@ class DRAMCtrl : public AbstractMemory
|
||||||
* Basic memory timing parameters initialized based on parameter
|
* Basic memory timing parameters initialized based on parameter
|
||||||
* values.
|
* values.
|
||||||
*/
|
*/
|
||||||
const Tick tCK;
|
const Tick M5_CLASS_VAR_USED tCK;
|
||||||
const Tick tWTR;
|
const Tick tWTR;
|
||||||
const Tick tRTW;
|
const Tick tRTW;
|
||||||
const Tick tBURST;
|
const Tick tBURST;
|
||||||
|
|
Loading…
Reference in a new issue