mem: Replacing bytesPerCacheLine with DRAM burstLength in SimpleDRAM

This patch gets rid of bytesPerCacheLine parameter and makes the DRAM
configuration separate from cache line size. Instead of
bytesPerCacheLine, we define a parameter for the DRAM called
burst_length. The burst_length parameter shows the length of a DRAM
device burst in bits. Also, lines_per_rowbuffer is replaced with
device_rowbuffer_size to improve code portablity.

This patch adds a burst length in beats for each memory type, an
interface width for each memory type, and the memory controller model
is extended to reason about "system" packets vs "dram" packets and
assemble the responses properly. It means that system packets larger
than a full burst are split into multiple dram packets.
This commit is contained in:
Amin Farmahini 2013-08-19 03:52:30 -04:00
parent 7a61f667f0
commit 243f135e5f
3 changed files with 387 additions and 191 deletions

View file

@ -10,6 +10,9 @@
# unmodified and in its entirety in all distributions of the software, # unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form. # modified or unmodified, in source code or in binary form.
# #
# Copyright (c) 2013 Amin Farmahini-Farahani
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without # Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are # modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright # met: redistributions of source code must retain the above copyright
@ -118,7 +121,12 @@ class SimpleDRAM(AbstractMemory):
static_backend_latency = Param.Latency("10ns", "Static backend latency") static_backend_latency = Param.Latency("10ns", "Static backend latency")
# the physical organisation of the DRAM # the physical organisation of the DRAM
lines_per_rowbuffer = Param.Unsigned("Row buffer size in cache lines") device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
"device/chip")
burst_length = Param.Unsigned("Burst lenght (BL) in beats")
device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
"device/chip")
devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
ranks_per_channel = Param.Unsigned("Number of ranks per channel") ranks_per_channel = Param.Unsigned("Number of ranks per channel")
banks_per_rank = Param.Unsigned("Number of banks per rank") banks_per_rank = Param.Unsigned("Number of banks per rank")
# only used for the address mapping as the controller by # only used for the address mapping as the controller by
@ -141,9 +149,9 @@ class SimpleDRAM(AbstractMemory):
# time to complete a burst transfer, typically the burst length # time to complete a burst transfer, typically the burst length
# divided by two due to the DDR bus, but by making it a parameter # divided by two due to the DDR bus, but by making it a parameter
# it is easier to also evaluate SDR memories like WideIO. # it is easier to also evaluate SDR memories like WideIO.
# This parameter has to account for bus width and burst length. # This parameter has to account for burst length.
# Adjustment also necessary if cache line size is greater than # Read/Write requests with data size larger than one full burst are broken
# data size read/written by one full burst. # down into multiple requests in the SimpleDRAM controller
tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)") tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
# time taken to complete one refresh cycle (N rows in all banks) # time taken to complete one refresh cycle (N rows in all banks)
@ -170,15 +178,22 @@ class SimpleDRAM(AbstractMemory):
# tRC - assumed to be 4 * tRP # tRC - assumed to be 4 * tRP
# burst length for an access derived from the cache line size
# A single DDR3 x64 interface (one command and address bus), with # A single DDR3 x64 interface (one command and address bus), with
# default timings based on DDR3-1600 4 Gbit parts in an 8x8 # default timings based on DDR3-1600 4 Gbit parts in an 8x8
# configuration, which would amount to 4 Gbyte of memory. # configuration, which would amount to 4 Gbyte of memory.
class DDR3_1600_x64(SimpleDRAM): class DDR3_1600_x64(SimpleDRAM):
# Assuming 64 byte cache lines, and a 1kbyte page size per module # 8x8 configuration, 8 devices each with an 8-bit interface
device_bus_width = 8
# DDR3 is a BL8 device
burst_length = 8
# Each device has a page (row buffer) size of 1KB
# (this depends on the memory density) # (this depends on the memory density)
lines_per_rowbuffer = 128 device_rowbuffer_size = '1kB'
# 8x8 configuration, so 8 devices
devices_per_rank = 8
# Use two ranks # Use two ranks
ranks_per_channel = 2 ranks_per_channel = 2
@ -191,8 +206,8 @@ class DDR3_1600_x64(SimpleDRAM):
tCL = '13.75ns' tCL = '13.75ns'
tRP = '13.75ns' tRP = '13.75ns'
# Assuming 64 byte cache lines, across an x64 # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz.
# interface, translates to BL8, 4 clocks @ 800 MHz # Note this is a BL8 DDR device.
tBURST = '5ns' tBURST = '5ns'
# DDR3, 4 Gbit has a tRFC of 240 CK and tCK = 1.25 ns # DDR3, 4 Gbit has a tRFC of 240 CK and tCK = 1.25 ns
@ -213,9 +228,18 @@ class DDR3_1600_x64(SimpleDRAM):
# default timings based on a LPDDR2-1066 4 Gbit part in a 1x32 # default timings based on a LPDDR2-1066 4 Gbit part in a 1x32
# configuration. # configuration.
class LPDDR2_S4_1066_x32(SimpleDRAM): class LPDDR2_S4_1066_x32(SimpleDRAM):
# Assuming 64 byte cache lines, use a 1kbyte page size, this # 1x32 configuration, 1 device with a 32-bit interface
# depends on the memory density device_bus_width = 32
lines_per_rowbuffer = 16
# LPDDR2_S4 is a BL4 and BL8 device
burst_length = 8
# Each device has a page (row buffer) size of 1KB
# (this depends on the memory density)
device_rowbuffer_size = '1kB'
# 1x32 configuration, so 1 device
devices_per_rank = 1
# Use a single rank # Use a single rank
ranks_per_channel = 1 ranks_per_channel = 1
@ -232,10 +256,11 @@ class LPDDR2_S4_1066_x32(SimpleDRAM):
# Pre-charge one bank 15 ns (all banks 18 ns) # Pre-charge one bank 15 ns (all banks 18 ns)
tRP = '15ns' tRP = '15ns'
# Assuming 64 byte cache lines, across a x32 DDR interface # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
# translates to two BL8, 8 clocks @ 533 MHz. Note that this is a # Note this is a BL8 DDR device.
# simplification # Requests larger than 32 bytes are broken down into multiple requests
tBURST = '15ns' # in the SimpleDRAM controller
tBURST = '7.5ns'
# LPDDR2-S4, 4 Gbit # LPDDR2-S4, 4 Gbit
tRFC = '130ns' tRFC = '130ns'
@ -251,9 +276,18 @@ class LPDDR2_S4_1066_x32(SimpleDRAM):
# A single WideIO x128 interface (one command and address bus), with # A single WideIO x128 interface (one command and address bus), with
# default timings based on an estimated WIO-200 8 Gbit part. # default timings based on an estimated WIO-200 8 Gbit part.
class WideIO_200_x128(SimpleDRAM): class WideIO_200_x128(SimpleDRAM):
# Assuming 64 byte cache lines, use a 4kbyte page size, this # 1x128 configuration, 1 device with a 128-bit interface
# depends on the memory density device_bus_width = 128
lines_per_rowbuffer = 64
# This is a BL4 device
burst_length = 4
# Each device has a page (row buffer) size of 4KB
# (this depends on the memory density)
device_rowbuffer_size = '4kB'
# 1x128 configuration, so 1 device
devices_per_rank = 1
# Use one rank for a one-high die stack # Use one rank for a one-high die stack
ranks_per_channel = 1 ranks_per_channel = 1
@ -266,8 +300,8 @@ class WideIO_200_x128(SimpleDRAM):
tCL = '18ns' tCL = '18ns'
tRP = '18ns' tRP = '18ns'
# Assuming 64 byte cache lines, across an x128 SDR interface, # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
# translates to BL4, 4 clocks @ 200 MHz # Note this is a BL4 SDR device.
tBURST = '20ns' tBURST = '20ns'
# WIO 8 Gb # WIO 8 Gb
@ -287,9 +321,18 @@ class WideIO_200_x128(SimpleDRAM):
# default timings based on a LPDDR3-1600 4 Gbit part in a 1x32 # default timings based on a LPDDR3-1600 4 Gbit part in a 1x32
# configuration # configuration
class LPDDR3_1600_x32(SimpleDRAM): class LPDDR3_1600_x32(SimpleDRAM):
# 4 Gbit and 8 Gbit devices use a 1 kByte page size, so ssuming 64 # 1x32 configuration, 1 device with a 32-bit interface
# byte cache lines, that is 16 lines device_bus_width = 32
lines_per_rowbuffer = 16
# LPDDR3 is a BL8 device
burst_length = 8
# Each device has a page (row buffer) size of 1KB
# (this depends on the memory density)
device_rowbuffer_size = '1kB'
# 1x32 configuration, so 1 device
devices_per_rank = 1
# Use a single rank # Use a single rank
ranks_per_channel = 1 ranks_per_channel = 1
@ -306,9 +349,11 @@ class LPDDR3_1600_x32(SimpleDRAM):
# Pre-charge one bank 15 ns (all banks 18 ns) # Pre-charge one bank 15 ns (all banks 18 ns)
tRP = '15ns' tRP = '15ns'
# Assuming 64 byte cache lines, across a x32 DDR interface # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
# translates to two bursts of BL8, 8 clocks @ 800 MHz # Note this is a BL8 DDR device.
tBURST = '10ns' # Requests larger than 32 bytes are broken down into multiple requests
# in the SimpleDRAM controller
tBURST = '5ns'
# LPDDR3, 4 Gb # LPDDR3, 4 Gb
tRFC = '130ns' tRFC = '130ns'

View file

@ -11,6 +11,9 @@
* unmodified and in its entirety in all distributions of the software, * unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form. * modified or unmodified, in source code or in binary form.
* *
* Copyright (c) 2013 Amin Farmahini-Farahani
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are * modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright * met: redistributions of source code must retain the above copyright
@ -54,8 +57,11 @@ SimpleDRAM::SimpleDRAM(const SimpleDRAMParams* p) :
rowHitFlag(false), stopReads(false), actTicks(p->activation_limit, 0), rowHitFlag(false), stopReads(false), actTicks(p->activation_limit, 0),
writeEvent(this), respondEvent(this), writeEvent(this), respondEvent(this),
refreshEvent(this), nextReqEvent(this), drainManager(NULL), refreshEvent(this), nextReqEvent(this), drainManager(NULL),
bytesPerCacheLine(0), deviceBusWidth(p->device_bus_width), burstLength(p->burst_length),
linesPerRowBuffer(p->lines_per_rowbuffer), deviceRowBufferSize(p->device_rowbuffer_size),
devicesPerRank(p->devices_per_rank),
burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
rowBufferSize(devicesPerRank * deviceRowBufferSize),
ranksPerChannel(p->ranks_per_channel), ranksPerChannel(p->ranks_per_channel),
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0), banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
readBufferSize(p->read_buffer_size), readBufferSize(p->read_buffer_size),
@ -93,22 +99,22 @@ SimpleDRAM::init()
port.sendRangeChange(); port.sendRangeChange();
} }
// get the burst size from the connected port as it is currently
// assumed to be equal to the cache line size
bytesPerCacheLine = _system->cacheLineSize();
// we could deal with plenty options here, but for now do a quick // we could deal with plenty options here, but for now do a quick
// sanity check // sanity check
if (bytesPerCacheLine != 64 && bytesPerCacheLine != 32) DPRINTF(DRAM, "Burst size %d bytes\n", burstSize);
panic("Unexpected burst size %d", bytesPerCacheLine);
// determine the rows per bank by looking at the total capacity // determine the rows per bank by looking at the total capacity
uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity, DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
AbstractMemory::size()); AbstractMemory::size());
rowsPerBank = capacity / (bytesPerCacheLine * linesPerRowBuffer *
banksPerRank * ranksPerChannel); columnsPerRowBuffer = rowBufferSize / burstSize;
DPRINTF(DRAM, "Row buffer size %d bytes with %d columns per row buffer\n",
rowBufferSize, columnsPerRowBuffer);
rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
if (range.interleaved()) { if (range.interleaved()) {
if (channels != range.stripes()) if (channels != range.stripes())
@ -116,18 +122,17 @@ SimpleDRAM::init()
name(), range.stripes(), channels); name(), range.stripes(), channels);
if (addrMapping == Enums::RaBaChCo) { if (addrMapping == Enums::RaBaChCo) {
if (bytesPerCacheLine * linesPerRowBuffer != if (rowBufferSize != range.granularity()) {
range.granularity()) {
panic("Interleaving of %s doesn't match RaBaChCo address map\n", panic("Interleaving of %s doesn't match RaBaChCo address map\n",
name()); name());
} }
} else if (addrMapping == Enums::RaBaCoCh) { } else if (addrMapping == Enums::RaBaCoCh) {
if (bytesPerCacheLine != range.granularity()) { if (burstSize != range.granularity()) {
panic("Interleaving of %s doesn't match RaBaCoCh address map\n", panic("Interleaving of %s doesn't match RaBaCoCh address map\n",
name()); name());
} }
} else if (addrMapping == Enums::CoRaBaCh) { } else if (addrMapping == Enums::CoRaBaCh) {
if (bytesPerCacheLine != range.granularity()) if (burstSize != range.granularity())
panic("Interleaving of %s doesn't match CoRaBaCh address map\n", panic("Interleaving of %s doesn't match CoRaBaCh address map\n",
name()); name());
} }
@ -162,24 +167,26 @@ SimpleDRAM::recvAtomic(PacketPtr pkt)
} }
bool bool
SimpleDRAM::readQueueFull() const SimpleDRAM::readQueueFull(unsigned int neededEntries) const
{ {
DPRINTF(DRAM, "Read queue limit %d current size %d\n", DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
readBufferSize, readQueue.size() + respQueue.size()); readBufferSize, readQueue.size() + respQueue.size(),
neededEntries);
return (readQueue.size() + respQueue.size()) == readBufferSize; return
(readQueue.size() + respQueue.size() + neededEntries) > readBufferSize;
} }
bool bool
SimpleDRAM::writeQueueFull() const SimpleDRAM::writeQueueFull(unsigned int neededEntries) const
{ {
DPRINTF(DRAM, "Write queue limit %d current size %d\n", DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
writeBufferSize, writeQueue.size()); writeBufferSize, writeQueue.size(), neededEntries);
return writeQueue.size() == writeBufferSize; return (writeQueue.size() + neededEntries) > writeBufferSize;
} }
SimpleDRAM::DRAMPacket* SimpleDRAM::DRAMPacket*
SimpleDRAM::decodeAddr(PacketPtr pkt) SimpleDRAM::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size)
{ {
// decode the address based on the address mapping scheme, with // decode the address based on the address mapping scheme, with
// Ra, Co, Ba and Ch denoting rank, column, bank and channel, // Ra, Co, Ba and Ch denoting rank, column, bank and channel,
@ -188,17 +195,15 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
uint16_t bank; uint16_t bank;
uint16_t row; uint16_t row;
Addr addr = pkt->getAddr();
// truncate the address to the access granularity // truncate the address to the access granularity
addr = addr / bytesPerCacheLine; Addr addr = dramPktAddr / burstSize;
// we have removed the lowest order address bits that denote the // we have removed the lowest order address bits that denote the
// position within the cache line // position within the column
if (addrMapping == Enums::RaBaChCo) { if (addrMapping == Enums::RaBaChCo) {
// the lowest order bits denote the column to ensure that // the lowest order bits denote the column to ensure that
// sequential cache lines occupy the same row // sequential cache lines occupy the same row
addr = addr / linesPerRowBuffer; addr = addr / columnsPerRowBuffer;
// take out the channel part of the address // take out the channel part of the address
addr = addr / channels; addr = addr / channels;
@ -221,7 +226,7 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
addr = addr / channels; addr = addr / channels;
// next, the column // next, the column
addr = addr / linesPerRowBuffer; addr = addr / columnsPerRowBuffer;
// after the column bits, we get the bank bits to interleave // after the column bits, we get the bank bits to interleave
// over the banks // over the banks
@ -256,7 +261,7 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
// next the column bits which we do not need to keep track of // next the column bits which we do not need to keep track of
// and simply skip past // and simply skip past
addr = addr / linesPerRowBuffer; addr = addr / columnsPerRowBuffer;
// lastly, get the row bits // lastly, get the row bits
row = addr % rowsPerBank; row = addr % rowsPerBank;
@ -269,54 +274,98 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
assert(row < rowsPerBank); assert(row < rowsPerBank);
DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n", DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
pkt->getAddr(), rank, bank, row); dramPktAddr, rank, bank, row);
// create the corresponding DRAM packet with the entry time and // create the corresponding DRAM packet with the entry time and
// ready time set to the current tick, the latter will be updated // ready time set to the current tick, the latter will be updated
// later // later
return new DRAMPacket(pkt, rank, bank, row, pkt->getAddr(), return new DRAMPacket(pkt, rank, bank, row, dramPktAddr, size,
banks[rank][bank]); banks[rank][bank]);
} }
void void
SimpleDRAM::addToReadQueue(PacketPtr pkt) SimpleDRAM::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
{ {
// only add to the read queue here. whenever the request is // only add to the read queue here. whenever the request is
// eventually done, set the readyTime, and call schedule() // eventually done, set the readyTime, and call schedule()
assert(!pkt->isWrite()); assert(!pkt->isWrite());
// First check write buffer to see if the data is already at assert(pktCount != 0);
// the controller
list<DRAMPacket*>::const_iterator i;
Addr addr = pkt->getAddr();
// @todo: add size check // if the request size is larger than burst size, the pkt is split into
for (i = writeQueue.begin(); i != writeQueue.end(); ++i) { // multiple DRAM packets
if ((*i)->addr == addr){ // Note if the pkt starting address is not aligened to burst size, the
servicedByWrQ++; // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
DPRINTF(DRAM, "Read to %lld serviced by write queue\n", addr); // are aligned to burst size boundaries. This is to ensure we accurately
bytesRead += bytesPerCacheLine; // check read packets against packets in write queue.
bytesConsumedRd += pkt->getSize(); Addr addr = pkt->getAddr();
accessAndRespond(pkt, frontendLatency); unsigned pktsServicedByWrQ = 0;
return; BurstHelper* burst_helper = NULL;
for (int cnt = 0; cnt < pktCount; ++cnt) {
unsigned size = std::min((addr | (burstSize - 1)) + 1,
pkt->getAddr() + pkt->getSize()) - addr;
readPktSize[ceilLog2(size)]++;
readBursts++;
// First check write buffer to see if the data is already at
// the controller
bool foundInWrQ = false;
list<DRAMPacket*>::const_iterator i;
for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
if ((*i)->addr == addr && (*i)->size >= size){
foundInWrQ = true;
servicedByWrQ++;
pktsServicedByWrQ++;
DPRINTF(DRAM, "Read to addr %lld with size %d serviced by "
"write queue\n", addr, size);
bytesRead += burstSize;
bytesConsumedRd += size;
break;
}
} }
// If not found in the write q, make a DRAM packet and
// push it onto the read queue
if (!foundInWrQ) {
// Make the burst helper for split packets
if (pktCount > 1 && burst_helper == NULL) {
DPRINTF(DRAM, "Read to addr %lld translates to %d "
"dram requests\n", pkt->getAddr(), pktCount);
burst_helper = new BurstHelper(pktCount);
}
DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
dram_pkt->burstHelper = burst_helper;
assert(!readQueueFull(1));
rdQLenPdf[readQueue.size() + respQueue.size()]++;
DPRINTF(DRAM, "Adding to read queue\n");
readQueue.push_back(dram_pkt);
// Update stats
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankRdReqs[bank_id]++;
avgRdQLen = readQueue.size() + respQueue.size();
}
// Starting address of next dram pkt (aligend to burstSize boundary)
addr = (addr | (burstSize - 1)) + 1;
} }
DRAMPacket* dram_pkt = decodeAddr(pkt); // If all packets are serviced by write queue, we send the repsonse back
if (pktsServicedByWrQ == pktCount) {
accessAndRespond(pkt, frontendLatency);
return;
}
assert(readQueue.size() + respQueue.size() < readBufferSize); // Update how many split packets are serviced by write queue
rdQLenPdf[readQueue.size() + respQueue.size()]++; if (burst_helper != NULL)
burst_helper->burstsServiced = pktsServicedByWrQ;
DPRINTF(DRAM, "Adding to read queue\n");
readQueue.push_back(dram_pkt);
// Update stats
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankRdReqs[bank_id]++;
avgRdQLen = readQueue.size() + respQueue.size();
// If we are not already scheduled to get the read request out of // If we are not already scheduled to get the read request out of
// the queue, do so now // the queue, do so now
@ -364,7 +413,7 @@ SimpleDRAM::processWriteEvent()
bank.openRow = dram_pkt->row; bank.openRow = dram_pkt->row;
bank.freeAt = schedTime + tBURST + std::max(accessLat, tCL); bank.freeAt = schedTime + tBURST + std::max(accessLat, tCL);
busBusyUntil = bank.freeAt - tCL; busBusyUntil = bank.freeAt - tCL;
bank.bytesAccessed += bytesPerCacheLine; bank.bytesAccessed += burstSize;
if (!rowHitFlag) { if (!rowHitFlag) {
bank.tRASDoneAt = bank.freeAt + tRP; bank.tRASDoneAt = bank.freeAt + tRP;
@ -385,7 +434,7 @@ SimpleDRAM::processWriteEvent()
"banks_id %d is %lld\n", "banks_id %d is %lld\n",
dram_pkt->rank * banksPerRank + dram_pkt->bank, dram_pkt->rank * banksPerRank + dram_pkt->bank,
bank.freeAt); bank.freeAt);
bytesPerActivate.sample(bytesPerCacheLine); bytesPerActivate.sample(burstSize);
} else } else
panic("Unknown page management policy chosen\n"); panic("Unknown page management policy chosen\n");
@ -449,34 +498,49 @@ SimpleDRAM::triggerWrites()
} }
void void
SimpleDRAM::addToWriteQueue(PacketPtr pkt) SimpleDRAM::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
{ {
// only add to the write queue here. whenever the request is // only add to the write queue here. whenever the request is
// eventually done, set the readyTime, and call schedule() // eventually done, set the readyTime, and call schedule()
assert(pkt->isWrite()); assert(pkt->isWrite());
DRAMPacket* dram_pkt = decodeAddr(pkt); // if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
Addr addr = pkt->getAddr();
for (int cnt = 0; cnt < pktCount; ++cnt) {
unsigned size = std::min((addr | (burstSize - 1)) + 1,
pkt->getAddr() + pkt->getSize()) - addr;
writePktSize[ceilLog2(size)]++;
writeBursts++;
assert(writeQueue.size() < writeBufferSize); DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
wrQLenPdf[writeQueue.size()]++;
DPRINTF(DRAM, "Adding to write queue\n"); assert(writeQueue.size() < writeBufferSize);
wrQLenPdf[writeQueue.size()]++;
writeQueue.push_back(dram_pkt); DPRINTF(DRAM, "Adding to write queue\n");
// Update stats writeQueue.push_back(dram_pkt);
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankWrReqs[bank_id]++;
avgWrQLen = writeQueue.size(); // Update stats
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankWrReqs[bank_id]++;
avgWrQLen = writeQueue.size();
bytesConsumedWr += dram_pkt->size;
bytesWritten += burstSize;
// Starting address of next dram pkt (aligend to burstSize boundary)
addr = (addr | (burstSize - 1)) + 1;
}
// we do not wait for the writes to be send to the actual memory, // we do not wait for the writes to be send to the actual memory,
// but instead take responsibility for the consistency here and // but instead take responsibility for the consistency here and
// snoop the write queue for any upcoming reads // snoop the write queue for any upcoming reads
// @todo, if a pkt size is larger than burst size, we might need a
bytesConsumedWr += pkt->getSize(); // different front end latency
bytesWritten += bytesPerCacheLine;
accessAndRespond(pkt, frontendLatency); accessAndRespond(pkt, frontendLatency);
// If your write buffer is starting to fill up, drain it! // If your write buffer is starting to fill up, drain it!
@ -491,15 +555,18 @@ SimpleDRAM::printParams() const
// Sanity check print of important parameters // Sanity check print of important parameters
DPRINTF(DRAM, DPRINTF(DRAM,
"Memory controller %s physical organization\n" \ "Memory controller %s physical organization\n" \
"Bytes per cacheline %d\n" \ "Number of devices per rank %d\n" \
"Lines per row buffer %d\n" \ "Device bus width (in bits) %d\n" \
"Rows per bank %d\n" \ "DRAM data bus burst %d\n" \
"Banks per rank %d\n" \ "Row buffer size %d\n" \
"Ranks per channel %d\n" \ "Columns per row buffer %d\n" \
"Total mem capacity %u\n", "Rows per bank %d\n" \
name(), bytesPerCacheLine, linesPerRowBuffer, rowsPerBank, "Banks per rank %d\n" \
banksPerRank, ranksPerChannel, bytesPerCacheLine * "Ranks per channel %d\n" \
linesPerRowBuffer * rowsPerBank * banksPerRank * ranksPerChannel); "Total mem capacity %u\n",
name(), devicesPerRank, deviceBusWidth, burstSize, rowBufferSize,
columnsPerRowBuffer, rowsPerBank, banksPerRank, ranksPerChannel,
rowBufferSize * rowsPerBank * banksPerRank * ranksPerChannel);
string scheduler = memSchedPolicy == Enums::fcfs ? "FCFS" : "FR-FCFS"; string scheduler = memSchedPolicy == Enums::fcfs ? "FCFS" : "FR-FCFS";
string address_mapping = addrMapping == Enums::RaBaChCo ? "RaBaChCo" : string address_mapping = addrMapping == Enums::RaBaChCo ? "RaBaChCo" :
@ -560,7 +627,7 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
// This is where we enter from the outside world // This is where we enter from the outside world
DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n", DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
pkt->cmdString(),pkt->getAddr(), pkt->getSize()); pkt->cmdString(), pkt->getAddr(), pkt->getSize());
// simply drop inhibited packets for now // simply drop inhibited packets for now
if (pkt->memInhibitAsserted()) { if (pkt->memInhibitAsserted()) {
@ -569,9 +636,6 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
return true; return true;
} }
if (pkt->getSize() == bytesPerCacheLine)
cpuReqs++;
// Every million accesses, print the state of the queues // Every million accesses, print the state of the queues
if (numReqs % 1000000 == 0) if (numReqs % 1000000 == 0)
printQs(); printQs();
@ -582,37 +646,39 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
} }
prevArrival = curTick(); prevArrival = curTick();
// Find out how many dram packets a pkt translates to
// If the burst size is equal or larger than the pkt size, then a pkt
// translates to only one dram packet. Otherwise, a pkt translates to
// multiple dram packets
unsigned size = pkt->getSize(); unsigned size = pkt->getSize();
if (size > bytesPerCacheLine) unsigned offset = pkt->getAddr() & (burstSize - 1);
panic("Request size %d is greater than burst size %d", unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
size, bytesPerCacheLine);
// check local buffers and do not accept if full // check local buffers and do not accept if full
if (pkt->isRead()) { if (pkt->isRead()) {
assert(size != 0); assert(size != 0);
if (readQueueFull()) { if (readQueueFull(dram_pkt_count)) {
DPRINTF(DRAM, "Read queue full, not accepting\n"); DPRINTF(DRAM, "Read queue full, not accepting\n");
// remember that we have to retry this port // remember that we have to retry this port
retryRdReq = true; retryRdReq = true;
numRdRetry++; numRdRetry++;
return false; return false;
} else { } else {
readPktSize[ceilLog2(size)]++; addToReadQueue(pkt, dram_pkt_count);
addToReadQueue(pkt);
readReqs++; readReqs++;
numReqs++; numReqs++;
} }
} else if (pkt->isWrite()) { } else if (pkt->isWrite()) {
assert(size != 0); assert(size != 0);
if (writeQueueFull()) { if (writeQueueFull(dram_pkt_count)) {
DPRINTF(DRAM, "Write queue full, not accepting\n"); DPRINTF(DRAM, "Write queue full, not accepting\n");
// remember that we have to retry this port // remember that we have to retry this port
retryWrReq = true; retryWrReq = true;
numWrRetry++; numWrRetry++;
return false; return false;
} else { } else {
writePktSize[ceilLog2(size)]++; addToWriteQueue(pkt, dram_pkt_count);
addToWriteQueue(pkt);
writeReqs++; writeReqs++;
numReqs++; numReqs++;
} }
@ -633,38 +699,54 @@ SimpleDRAM::processRespondEvent()
DPRINTF(DRAM, DPRINTF(DRAM,
"processRespondEvent(): Some req has reached its readyTime\n"); "processRespondEvent(): Some req has reached its readyTime\n");
PacketPtr pkt = respQueue.front()->pkt; DRAMPacket* dram_pkt = respQueue.front();
// Actually responds to the requestor // Actually responds to the requestor
bytesConsumedRd += pkt->getSize(); bytesConsumedRd += dram_pkt->size;
bytesRead += bytesPerCacheLine; bytesRead += burstSize;
accessAndRespond(pkt, frontendLatency + backendLatency); if (dram_pkt->burstHelper) {
// it is a split packet
dram_pkt->burstHelper->burstsServiced++;
if (dram_pkt->burstHelper->burstsServiced ==
dram_pkt->burstHelper->burstCount) {
// we have now serviced all children packets of a system packet
// so we can now respond to the requester
// @todo we probably want to have a different front end and back
// end latency for split packets
accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
delete dram_pkt->burstHelper;
dram_pkt->burstHelper = NULL;
}
} else {
// it is not a split packet
accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
}
delete respQueue.front(); delete respQueue.front();
respQueue.pop_front(); respQueue.pop_front();
// Update stats // Update stats
avgRdQLen = readQueue.size() + respQueue.size(); avgRdQLen = readQueue.size() + respQueue.size();
if (!respQueue.empty()) { if (!respQueue.empty()) {
assert(respQueue.front()->readyTime >= curTick()); assert(respQueue.front()->readyTime >= curTick());
assert(!respondEvent.scheduled()); assert(!respondEvent.scheduled());
schedule(respondEvent, respQueue.front()->readyTime); schedule(respondEvent, respQueue.front()->readyTime);
} else { } else {
// if there is nothing left in any queue, signal a drain // if there is nothing left in any queue, signal a drain
if (writeQueue.empty() && readQueue.empty() && if (writeQueue.empty() && readQueue.empty() &&
drainManager) { drainManager) {
drainManager->signalDrainDone(); drainManager->signalDrainDone();
drainManager = NULL; drainManager = NULL;
} }
} }
// We have made a location in the queue available at this point, // We have made a location in the queue available at this point,
// so if there is a read that was forced to wait, retry now // so if there is a read that was forced to wait, retry now
if (retryRdReq) { if (retryRdReq) {
retryRdReq = false; retryRdReq = false;
port.sendRetry(); port.sendRetry();
} }
} }
void void
@ -911,7 +993,7 @@ SimpleDRAM::doDRAMAccess(DRAMPacket* dram_pkt)
if (pageMgmt == Enums::open) { if (pageMgmt == Enums::open) {
bank.openRow = dram_pkt->row; bank.openRow = dram_pkt->row;
bank.freeAt = curTick() + addDelay + accessLat; bank.freeAt = curTick() + addDelay + accessLat;
bank.bytesAccessed += bytesPerCacheLine; bank.bytesAccessed += burstSize;
// If you activated a new row do to this access, the next access // If you activated a new row do to this access, the next access
// will have to respect tRAS for this bank. Assume tRAS ~= 3 * tRP. // will have to respect tRAS for this bank. Assume tRAS ~= 3 * tRP.
@ -931,7 +1013,7 @@ SimpleDRAM::doDRAMAccess(DRAMPacket* dram_pkt)
bank.freeAt = curTick() + addDelay + accessLat + tRP + tRP; bank.freeAt = curTick() + addDelay + accessLat + tRP + tRP;
recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD); //essentially (freeAt - tRC) recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD); //essentially (freeAt - tRC)
DPRINTF(DRAM,"doDRAMAccess::bank.freeAt is %lld\n",bank.freeAt); DPRINTF(DRAM,"doDRAMAccess::bank.freeAt is %lld\n",bank.freeAt);
bytesPerActivate.sample(bytesPerCacheLine); bytesPerActivate.sample(burstSize);
} else } else
panic("No page management policy chosen\n"); panic("No page management policy chosen\n");
@ -1080,19 +1162,27 @@ SimpleDRAM::regStats()
readReqs readReqs
.name(name() + ".readReqs") .name(name() + ".readReqs")
.desc("Total number of read requests seen"); .desc("Total number of read requests accepted by DRAM controller");
writeReqs writeReqs
.name(name() + ".writeReqs") .name(name() + ".writeReqs")
.desc("Total number of write requests seen"); .desc("Total number of write requests accepted by DRAM controller");
readBursts
.name(name() + ".readBursts")
.desc("Total number of DRAM read bursts. "
"Each DRAM read request translates to either one or multiple "
"DRAM read bursts");
writeBursts
.name(name() + ".writeBursts")
.desc("Total number of DRAM write bursts. "
"Each DRAM write request translates to either one or multiple "
"DRAM write bursts");
servicedByWrQ servicedByWrQ
.name(name() + ".servicedByWrQ") .name(name() + ".servicedByWrQ")
.desc("Number of read reqs serviced by write Q"); .desc("Number of DRAM read bursts serviced by write Q");
cpuReqs
.name(name() + ".cpureqs")
.desc("Reqs generatd by CPU via cache - shady");
neitherReadNorWrite neitherReadNorWrite
.name(name() + ".neitherReadNorWrite") .name(name() + ".neitherReadNorWrite")
@ -1139,28 +1229,28 @@ SimpleDRAM::regStats()
.desc("Average queueing delay per request") .desc("Average queueing delay per request")
.precision(2); .precision(2);
avgQLat = totQLat / (readReqs - servicedByWrQ); avgQLat = totQLat / (readBursts - servicedByWrQ);
avgBankLat avgBankLat
.name(name() + ".avgBankLat") .name(name() + ".avgBankLat")
.desc("Average bank access latency per request") .desc("Average bank access latency per request")
.precision(2); .precision(2);
avgBankLat = totBankLat / (readReqs - servicedByWrQ); avgBankLat = totBankLat / (readBursts - servicedByWrQ);
avgBusLat avgBusLat
.name(name() + ".avgBusLat") .name(name() + ".avgBusLat")
.desc("Average bus latency per request") .desc("Average bus latency per request")
.precision(2); .precision(2);
avgBusLat = totBusLat / (readReqs - servicedByWrQ); avgBusLat = totBusLat / (readBursts - servicedByWrQ);
avgMemAccLat avgMemAccLat
.name(name() + ".avgMemAccLat") .name(name() + ".avgMemAccLat")
.desc("Average memory access latency") .desc("Average memory access latency")
.precision(2); .precision(2);
avgMemAccLat = totMemAccLat / (readReqs - servicedByWrQ); avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ);
numRdRetry numRdRetry
.name(name() + ".numRdRetry") .name(name() + ".numRdRetry")
@ -1183,22 +1273,22 @@ SimpleDRAM::regStats()
.desc("Row buffer hit rate for reads") .desc("Row buffer hit rate for reads")
.precision(2); .precision(2);
readRowHitRate = (readRowHits / (readReqs - servicedByWrQ)) * 100; readRowHitRate = (readRowHits / (readBursts - servicedByWrQ)) * 100;
writeRowHitRate writeRowHitRate
.name(name() + ".writeRowHitRate") .name(name() + ".writeRowHitRate")
.desc("Row buffer hit rate for writes") .desc("Row buffer hit rate for writes")
.precision(2); .precision(2);
writeRowHitRate = (writeRowHits / writeReqs) * 100; writeRowHitRate = (writeRowHits / writeBursts) * 100;
readPktSize readPktSize
.init(ceilLog2(bytesPerCacheLine) + 1) .init(ceilLog2(burstSize) + 1)
.name(name() + ".readPktSize") .name(name() + ".readPktSize")
.desc("Categorize read packet sizes"); .desc("Categorize read packet sizes");
writePktSize writePktSize
.init(ceilLog2(bytesPerCacheLine) + 1) .init(ceilLog2(burstSize) + 1)
.name(name() + ".writePktSize") .name(name() + ".writePktSize")
.desc("Categorize write packet sizes"); .desc("Categorize write packet sizes");
@ -1213,7 +1303,7 @@ SimpleDRAM::regStats()
.desc("What write queue length does an incoming req see"); .desc("What write queue length does an incoming req see");
bytesPerActivate bytesPerActivate
.init(bytesPerCacheLine * linesPerRowBuffer) .init(rowBufferSize)
.name(name() + ".bytesPerActivate") .name(name() + ".bytesPerActivate")
.desc("Bytes accessed per row activation") .desc("Bytes accessed per row activation")
.flags(nozero); .flags(nozero);
@ -1267,7 +1357,7 @@ SimpleDRAM::regStats()
.desc("Theoretical peak bandwidth in MB/s") .desc("Theoretical peak bandwidth in MB/s")
.precision(2); .precision(2);
peakBW = (SimClock::Frequency / tBURST) * bytesPerCacheLine / 1000000; peakBW = (SimClock::Frequency / tBURST) * burstSize / 1000000;
busUtil busUtil
.name(name() + ".busUtil") .name(name() + ".busUtil")

View file

@ -11,6 +11,9 @@
* unmodified and in its entirety in all distributions of the software, * unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form. * modified or unmodified, in source code or in binary form.
* *
* Copyright (c) 2013 Amin Farmahini-Farahani
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are * modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright * met: redistributions of source code must retain the above copyright
@ -157,6 +160,27 @@ class SimpleDRAM : public AbstractMemory
{ } { }
}; };
/**
* A burst helper helps organize and manage a packet that is larger than
* the DRAM burst size. A system packet that is larger than the burst size
* is split into multiple DRAM packets and all those DRAM packets point to
* a single burst helper such that we know when the whole packet is served.
*/
class BurstHelper {
public:
/** Number of DRAM bursts requred for a system packet **/
const unsigned int burstCount;
/** Number of DRAM bursts serviced so far for a system packet **/
unsigned int burstsServiced;
BurstHelper(unsigned int _burstCount)
: burstCount(_burstCount), burstsServiced(0)
{ }
};
/** /**
* A DRAM packet stores packets along with the timestamp of when * A DRAM packet stores packets along with the timestamp of when
* the packet entered the queue, and also the decoded address. * the packet entered the queue, and also the decoded address.
@ -178,14 +202,34 @@ class SimpleDRAM : public AbstractMemory
const uint8_t rank; const uint8_t rank;
const uint16_t bank; const uint16_t bank;
const uint16_t row; const uint16_t row;
/**
* The starting address of the DRAM packet.
* This address could be unaligned to burst size boundaries. The
* reason is to keep the address offset so we can accurately check
* incoming read packets with packets in the write queue.
*/
const Addr addr; const Addr addr;
/**
* The size of this dram packet in bytes
* It is always equal or smaller than DRAM burst size
*/
const unsigned int size;
/**
* A pointer to the BurstHelper if this DRAMPacket is a split packet
* If not a split packet (common case), this is set to NULL
*/
BurstHelper* burstHelper;
Bank& bank_ref; Bank& bank_ref;
DRAMPacket(PacketPtr _pkt, uint8_t _rank, DRAMPacket(PacketPtr _pkt, uint8_t _rank, uint16_t _bank,
uint16_t _bank, uint16_t _row, Addr _addr, Bank& _bank_ref) uint16_t _row, Addr _addr, unsigned int _size,
Bank& _bank_ref)
: entryTime(curTick()), readyTime(curTick()), : entryTime(curTick()), readyTime(curTick()),
pkt(_pkt), rank(_rank), bank(_bank), row(_row), addr(_addr), pkt(_pkt), rank(_rank), bank(_bank), row(_row), addr(_addr),
bank_ref(_bank_ref) size(_size), burstHelper(NULL), bank_ref(_bank_ref)
{ } { }
}; };
@ -212,28 +256,34 @@ class SimpleDRAM : public AbstractMemory
/** /**
* Check if the read queue has room for more entries * Check if the read queue has room for more entries
* *
* @param pktCount The number of entries needed in the read queue
* @return true if read queue is full, false otherwise * @return true if read queue is full, false otherwise
*/ */
bool readQueueFull() const; bool readQueueFull(unsigned int pktCount) const;
/** /**
* Check if the write queue has room for more entries * Check if the write queue has room for more entries
* *
* @param pktCount The number of entries needed in the write queue
* @return true if write queue is full, false otherwise * @return true if write queue is full, false otherwise
*/ */
bool writeQueueFull() const; bool writeQueueFull(unsigned int pktCount) const;
/** /**
* When a new read comes in, first check if the write q has a * When a new read comes in, first check if the write q has a
* pending request to the same address.\ If not, decode the * pending request to the same address.\ If not, decode the
* address to populate rank/bank/row, create a "dram_pkt", and * address to populate rank/bank/row, create one or mutliple
* push it to the back of the read queue.\ If this is the only * "dram_pkt", and push them to the back of the read queue.\
* If this is the only
* read request in the system, schedule an event to start * read request in the system, schedule an event to start
* servicing it. * servicing it.
* *
* @param pkt The request packet from the outside world * @param pkt The request packet from the outside world
* @param pktCount The number of DRAM bursts the pkt
* translate to. If pkt size is larger then one full burst,
* then pktCount is greater than one.
*/ */
void addToReadQueue(PacketPtr pkt); void addToReadQueue(PacketPtr pkt, unsigned int pktCount);
/** /**
* Decode the incoming pkt, create a dram_pkt and push to the * Decode the incoming pkt, create a dram_pkt and push to the
@ -242,8 +292,11 @@ class SimpleDRAM : public AbstractMemory
* to get full, stop reads, and start draining writes. * to get full, stop reads, and start draining writes.
* *
* @param pkt The request packet from the outside world * @param pkt The request packet from the outside world
* @param pktCount The number of DRAM bursts the pkt
* translate to. If pkt size is larger then one full burst,
* then pktCount is greater than one.
*/ */
void addToWriteQueue(PacketPtr pkt); void addToWriteQueue(PacketPtr pkt, unsigned int pktCount);
/** /**
* Actually do the DRAM access - figure out the latency it * Actually do the DRAM access - figure out the latency it
@ -276,12 +329,16 @@ class SimpleDRAM : public AbstractMemory
/** /**
* Address decoder to figure out physical mapping onto ranks, * Address decoder to figure out physical mapping onto ranks,
* banks, and rows. * banks, and rows. This function is called multiple times on the same
* system packet if the pakcet is larger than burst of the memory. The
* dramPktAddr is used for the offset within the packet.
* *
* @param pkt The packet from the outside world * @param pkt The packet from the outside world
* @param dramPktAddr The starting address of the DRAM packet
* @param size The size of the DRAM packet in bytes
* @return A DRAMPacket pointer with the decoded information * @return A DRAMPacket pointer with the decoded information
*/ */
DRAMPacket* decodeAddr(PacketPtr pkt); DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size);
/** /**
* The memory schduler/arbiter - picks which read request needs to * The memory schduler/arbiter - picks which read request needs to
@ -376,18 +433,21 @@ class SimpleDRAM : public AbstractMemory
/** /**
* The following are basic design parameters of the memory * The following are basic design parameters of the memory
* controller, and are initialized based on parameter values. The * controller, and are initialized based on parameter values.
* bytesPerCacheLine is based on the neighbouring ports cache line * The rowsPerBank is determined based on the capacity, number of
* size and thus determined outside the constructor. Similarly, * ranks and banks, the burst size, and the row buffer size.
* the rowsPerBank is determined based on the capacity, number of
* ranks and banks, the cache line size, and the row buffer size.
*/ */
uint32_t bytesPerCacheLine; const uint32_t deviceBusWidth;
const uint32_t linesPerRowBuffer; const uint32_t burstLength;
const uint32_t deviceRowBufferSize;
const uint32_t devicesPerRank;
const uint32_t burstSize;
const uint32_t rowBufferSize;
const uint32_t ranksPerChannel; const uint32_t ranksPerChannel;
const uint32_t banksPerRank; const uint32_t banksPerRank;
const uint32_t channels; const uint32_t channels;
uint32_t rowsPerBank; uint32_t rowsPerBank;
uint32_t columnsPerRowBuffer;
const uint32_t readBufferSize; const uint32_t readBufferSize;
const uint32_t writeBufferSize; const uint32_t writeBufferSize;
const double writeThresholdPerc; const double writeThresholdPerc;
@ -441,7 +501,8 @@ class SimpleDRAM : public AbstractMemory
// All statistics that the model needs to capture // All statistics that the model needs to capture
Stats::Scalar readReqs; Stats::Scalar readReqs;
Stats::Scalar writeReqs; Stats::Scalar writeReqs;
Stats::Scalar cpuReqs; Stats::Scalar readBursts;
Stats::Scalar writeBursts;
Stats::Scalar bytesRead; Stats::Scalar bytesRead;
Stats::Scalar bytesWritten; Stats::Scalar bytesWritten;
Stats::Scalar bytesConsumedRd; Stats::Scalar bytesConsumedRd;