mem: prefetcher: add options, support for unaligned addresses

This patch extends the classic prefetcher to work on non-block aligned
addresses.  Because the existing prefetchers in gem5 mask off the lower
address bits of cache accesses, many predictable strides fail to be
detected.  For example, if a load were to stride by 48 bytes, with 64 byte
cachelines, the current stride based prefetcher would see an access pattern
of 0, 64, 64, 128, 192.... Thus not detecting a constant stride pattern.  This
patch fixes this, by training the prefetcher on access and not masking off the
lower address bits.

It also adds the following configuration options:
1) Training/prefetching only on cache misses,
2) Training/prefetching only on data acceses,
3) Optionally tagging prefetches with a PC address.
#3 allows prefetchers to train off of prefetch requests in systems with
multiple cache levels and PC-based prefetchers present at multiple levels.
It also effectively allows a pipelining of prefetch requests (like in POWER4)
across multiple levels of cache hierarchy.

Improves performance on my gem5 configuration by 4.3% for SPECINT and 4.7%  for SPECFP (geomean).
This commit is contained in:
Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) 2014-01-29 23:21:25 -06:00
parent 32cc2ea8b9
commit 95735e10e7
6 changed files with 64 additions and 16 deletions

View file

@ -556,6 +556,17 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// move it ahead of mshrs that are ready // move it ahead of mshrs that are ready
// mshrQueue.moveToFront(mshr); // mshrQueue.moveToFront(mshr);
} }
// We should call the prefetcher reguardless if the request is
// satisfied or not, reguardless if the request is in the MSHR or
// not. The request could be a ReadReq hit, but still not
// satisfied (potentially because of a prior write to the same
// cache line. So, even when not satisfied, tehre is an MSHR
// already allocated for this, we need to let the prefetcher know
// about the request
if (prefetcher) {
next_pf_time = prefetcher->notify(pkt, time);
}
} else { } else {
// no MSHR // no MSHR
assert(pkt->req->masterId() < system->maxMasters()); assert(pkt->req->masterId() < system->maxMasters());

View file

@ -59,6 +59,12 @@ class BasePrefetcher(ClockedObject):
"Use the master id to separate calculations of prefetches") "Use the master id to separate calculations of prefetches")
data_accesses_only = Param.Bool(False, data_accesses_only = Param.Bool(False,
"Only prefetch on data not on instruction accesses") "Only prefetch on data not on instruction accesses")
on_miss_only = Param.Bool(False,
"Only prefetch on miss (as opposed to always)")
on_read_only = Param.Bool(False,
"Only prefetch on read requests (write requests ignored)")
on_prefetch = Param.Bool(True,
"Let lower cache prefetcher train on prefetch requests")
sys = Param.System(Parent.any, "System this device belongs to") sys = Param.System(Parent.any, "System this device belongs to")
class GHBPrefetcher(BasePrefetcher): class GHBPrefetcher(BasePrefetcher):

View file

@ -60,7 +60,9 @@ BasePrefetcher::BasePrefetcher(const Params *p)
: ClockedObject(p), size(p->size), latency(p->latency), degree(p->degree), : ClockedObject(p), size(p->size), latency(p->latency), degree(p->degree),
useMasterId(p->use_master_id), pageStop(!p->cross_pages), useMasterId(p->use_master_id), pageStop(!p->cross_pages),
serialSquash(p->serial_squash), onlyData(p->data_accesses_only), serialSquash(p->serial_squash), onlyData(p->data_accesses_only),
system(p->sys), masterId(system->getMasterId(name())) onMissOnly(p->on_miss_only), onReadOnly(p->on_read_only),
onPrefetch(p->on_prefetch), system(p->sys),
masterId(system->getMasterId(name()))
{ {
} }
@ -185,7 +187,14 @@ BasePrefetcher::getPacket()
Tick Tick
BasePrefetcher::notify(PacketPtr &pkt, Tick tick) BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
{ {
if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData)) { // Don't consult the prefetcher if any of the following conditons are true
// 1) The request is uncacheable
// 2) The request is a fetch, but we are only prefeching data
// 3) The request is a cache hit, but we are only training on misses
// 4) THe request is a write, but we are only training on reads
if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData) &&
!(onMissOnly && inCache(pkt->getAddr(), true)) &&
!(onReadOnly && !pkt->isRead())) {
// Calculate the blk address // Calculate the blk address
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
bool is_secure = pkt->isSecure(); bool is_secure = pkt->isSecure();
@ -262,6 +271,11 @@ BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
prefetch->req->setThreadContext(pkt->req->contextId(), prefetch->req->setThreadContext(pkt->req->contextId(),
pkt->req->threadId()); pkt->req->threadId());
// Tag orefetch reqeuests with corresponding PC to train lower
// cache-level prefetchers
if (onPrefetch && pkt->req->hasPC())
prefetch->req->setPC(pkt->req->getPC());
// We just remove the head if we are full // We just remove the head if we are full
if (pf.size() == size) { if (pf.size() == size) {
pfRemovedFull++; pfRemovedFull++;

View file

@ -89,18 +89,28 @@ class BasePrefetcher : public ClockedObject
const Cycles latency; const Cycles latency;
/** The number of prefetches to issue */ /** The number of prefetches to issue */
unsigned degree; const unsigned degree;
/** If patterns should be found per context id */ /** If patterns should be found per context id */
bool useMasterId; const bool useMasterId;
/** Do we prefetch across page boundaries. */ /** Do we prefetch across page boundaries. */
bool pageStop; const bool pageStop;
/** Do we remove prefetches with later times than a new miss.*/ /** Do we remove prefetches with later times than a new miss.*/
bool serialSquash; const bool serialSquash;
/** Do we prefetch on only data reads, or on inst reads as well. */ /** Do we prefetch on only data reads, or on inst reads as well. */
bool onlyData; const bool onlyData;
/** Do we trigger/train prefetch on cache misses only, or all accesses. */
const bool onMissOnly;
/** Do we trigger/train prefetch on reads only, or all accesses. */
const bool onReadOnly;
/** Do we tag prefetch's with PC addresses, allowing lower pc-based
prefetchers to prefetch on prefetch requests */
const bool onPrefetch;
/** System we belong to */ /** System we belong to */
System* system; System* system;

View file

@ -59,7 +59,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
return; return;
} }
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); Addr data_addr = pkt->getAddr();
bool is_secure = pkt->isSecure(); bool is_secure = pkt->isSecure();
MasterID master_id = useMasterId ? pkt->req->masterId() : 0; MasterID master_id = useMasterId ? pkt->req->masterId() : 0;
Addr pc = pkt->req->getPC(); Addr pc = pkt->req->getPC();
@ -77,7 +77,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
if (iter != tab.end()) { if (iter != tab.end()) {
// Hit in table // Hit in table
int new_stride = blk_addr - (*iter)->missAddr; int new_stride = data_addr - (*iter)->missAddr;
bool stride_match = (new_stride == (*iter)->stride); bool stride_match = (new_stride == (*iter)->stride);
if (stride_match && new_stride != 0) { if (stride_match && new_stride != 0) {
@ -89,20 +89,20 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
(*iter)->confidence = 0; (*iter)->confidence = 0;
} }
DPRINTF(HWPrefetch, "hit: PC %x blk_addr %x (%s) stride %d (%s), " DPRINTF(HWPrefetch, "hit: PC %x data_addr %x (%s) stride %d (%s), "
"conf %d\n", pc, blk_addr, is_secure ? "s" : "ns", new_stride, "conf %d\n", pc, data_addr, is_secure ? "s" : "ns", new_stride,
stride_match ? "match" : "change", stride_match ? "match" : "change",
(*iter)->confidence); (*iter)->confidence);
(*iter)->missAddr = blk_addr; (*iter)->missAddr = data_addr;
(*iter)->isSecure = is_secure; (*iter)->isSecure = is_secure;
if ((*iter)->confidence <= 0) if ((*iter)->confidence <= 0)
return; return;
for (int d = 1; d <= degree; d++) { for (int d = 1; d <= degree; d++) {
Addr new_addr = blk_addr + d * new_stride; Addr new_addr = data_addr + d * new_stride;
if (pageStop && !samePage(blk_addr, new_addr)) { if (pageStop && !samePage(data_addr, new_addr)) {
// Spanned the page, so now stop // Spanned the page, so now stop
pfSpanPage += degree - d + 1; pfSpanPage += degree - d + 1;
return; return;
@ -117,7 +117,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
// Miss in table // Miss in table
// Find lowest confidence and replace // Find lowest confidence and replace
DPRINTF(HWPrefetch, "miss: PC %x blk_addr %x (%s)\n", pc, blk_addr, DPRINTF(HWPrefetch, "miss: PC %x data_addr %x (%s)\n", pc, data_addr,
is_secure ? "s" : "ns"); is_secure ? "s" : "ns");
if (tab.size() >= 256) { //set default table size is 256 if (tab.size() >= 256) { //set default table size is 256
@ -139,7 +139,7 @@ StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
StrideEntry *new_entry = new StrideEntry; StrideEntry *new_entry = new StrideEntry;
new_entry->instAddr = pc; new_entry->instAddr = pc;
new_entry->missAddr = blk_addr; new_entry->missAddr = data_addr;
new_entry->isSecure = is_secure; new_entry->isSecure = is_secure;
new_entry->stride = 0; new_entry->stride = 0;
new_entry->confidence = 0; new_entry->confidence = 0;

View file

@ -576,6 +576,13 @@ class Request
return _threadId; return _threadId;
} }
void
setPC(Addr pc)
{
privateFlags.set(VALID_PC);
_pc = pc;
}
bool bool
hasPC() const hasPC() const
{ {