mem: Split the hit_latency into tag_latency and data_latency

If the cache access mode is parallel, i.e. "sequential_access" parameter
is set to "False", tags and data are accessed in parallel. Therefore,
the hit_latency is the maximum latency between tag_latency and
data_latency. On the other hand, if the cache access mode is
sequential, i.e. "sequential_access" parameter is set to "True",
tags and data are accessed sequentially. Therefore, the hit_latency
is the sum of tag_latency plus data_latency.

Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
Sophiane Senni 2016-11-30 17:10:27 -05:00
parent 047caf24ba
commit ce2722cdd9
15 changed files with 95 additions and 36 deletions

View file

@ -48,7 +48,8 @@ from m5.objects import *
class L1Cache(Cache):
assoc = 2
hit_latency = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 4
tgts_per_mshr = 20
@ -63,7 +64,8 @@ class L1_DCache(L1Cache):
class L2Cache(Cache):
assoc = 8
hit_latency = 20
tag_latency = 20
data_latency = 20
response_latency = 20
mshrs = 20
tgts_per_mshr = 12
@ -71,7 +73,8 @@ class L2Cache(Cache):
class IOCache(Cache):
assoc = 8
hit_latency = 50
tag_latency = 50
data_latency = 50
response_latency = 50
mshrs = 20
size = '1kB'
@ -79,7 +82,8 @@ class IOCache(Cache):
class PageTableWalkerCache(Cache):
assoc = 2
hit_latency = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 10
size = '1kB'

View file

@ -147,7 +147,8 @@ class O3_ARM_v7a_3(DerivO3CPU):
# Instruction Cache
class O3_ARM_v7a_ICache(Cache):
hit_latency = 1
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 2
tgts_per_mshr = 8
@ -159,7 +160,8 @@ class O3_ARM_v7a_ICache(Cache):
# Data Cache
class O3_ARM_v7a_DCache(Cache):
hit_latency = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 6
tgts_per_mshr = 8
@ -172,7 +174,8 @@ class O3_ARM_v7a_DCache(Cache):
# TLB Cache
# Use a cache as a L2 TLB
class O3_ARM_v7aWalkCache(Cache):
hit_latency = 4
tag_latency = 4
data_latency = 4
response_latency = 4
mshrs = 6
tgts_per_mshr = 8
@ -185,7 +188,8 @@ class O3_ARM_v7aWalkCache(Cache):
# L2 Cache
class O3_ARM_v7aL2(Cache):
hit_latency = 12
tag_latency = 12
data_latency = 12
response_latency = 12
mshrs = 16
tgts_per_mshr = 8

View file

@ -45,7 +45,8 @@ from common.Caches import *
from common import CpuConfig
class L1I(L1_ICache):
hit_latency = 1
tag_latency = 1
data_latency = 1
response_latency = 1
mshrs = 4
tgts_per_mshr = 8
@ -54,7 +55,8 @@ class L1I(L1_ICache):
class L1D(L1_DCache):
hit_latency = 2
tag_latency = 2
data_latency = 2
response_latency = 1
mshrs = 16
tgts_per_mshr = 16
@ -64,7 +66,8 @@ class L1D(L1_DCache):
class WalkCache(PageTableWalkerCache):
hit_latency = 4
tag_latency = 4
data_latency = 4
response_latency = 4
mshrs = 6
tgts_per_mshr = 8
@ -74,7 +77,8 @@ class WalkCache(PageTableWalkerCache):
class L2(L2Cache):
hit_latency = 12
tag_latency = 12
data_latency = 12
response_latency = 5
mshrs = 32
tgts_per_mshr = 8
@ -87,7 +91,8 @@ class L2(L2Cache):
class L3(Cache):
size = '16MB'
assoc = 16
hit_latency = 20
tag_latency = 20
data_latency = 20
response_latency = 20
mshrs = 20
tgts_per_mshr = 12

View file

@ -153,7 +153,7 @@ for t, m in zip(testerspec, multiplier):
# Define a prototype L1 cache that we scale for all successive levels
proto_l1 = Cache(size = '32kB', assoc = 4,
hit_latency = 1, response_latency = 1,
tag_latency = 1, data_latency = 1, response_latency = 1,
tgts_per_mshr = 8)
if options.blocking:
@ -175,7 +175,8 @@ for scale in cachespec[:-1]:
prev = cache_proto[0]
next = prev()
next.size = prev.size * scale
next.hit_latency = prev.hit_latency * 10
next.tag_latency = prev.tag_latency * 10
next.data_latency = prev.data_latency * 10
next.response_latency = prev.response_latency * 10
next.assoc = prev.assoc * scale
next.mshrs = prev.mshrs * scale

View file

@ -176,7 +176,7 @@ else:
# Define a prototype L1 cache that we scale for all successive levels
proto_l1 = Cache(size = '32kB', assoc = 4,
hit_latency = 1, response_latency = 1,
tag_latency = 1, data_latency = 1, response_latency = 1,
tgts_per_mshr = 8, clusivity = 'mostly_incl',
writeback_clean = True)
@ -194,7 +194,8 @@ for scale in cachespec[:-1]:
prev = cache_proto[0]
next = prev()
next.size = prev.size * scale
next.hit_latency = prev.hit_latency * 10
next.tag_latency = prev.tag_latency * 10
next.data_latency = prev.data_latency * 10
next.response_latency = prev.response_latency * 10
next.assoc = prev.assoc * scale
next.mshrs = prev.mshrs * scale

View file

@ -45,7 +45,8 @@ class L1Cache(Cache):
"""Simple L1 Cache with default values"""
assoc = 2
hit_latency = 2
tag_latency = 2
data_latency = 2
response_latency = 2
mshrs = 4
tgts_per_mshr = 20
@ -107,7 +108,8 @@ class L2Cache(Cache):
# Default parameters
size = '256kB'
assoc = 8
hit_latency = 20
tag_latency = 20
data_latency = 20
response_latency = 20
mshrs = 20
tgts_per_mshr = 12

View file

@ -53,7 +53,8 @@ class BaseCache(MemObject):
size = Param.MemorySize("Capacity")
assoc = Param.Unsigned("Associativity")
hit_latency = Param.Cycles("Hit latency")
tag_latency = Param.Cycles("Tag lookup latency")
data_latency = Param.Cycles("Data access latency")
response_latency = Param.Cycles("Latency for the return path on a miss");
max_miss_count = Param.Counter(0,

View file

@ -72,9 +72,10 @@ BaseCache::BaseCache(const BaseCacheParams *p, unsigned blk_size)
mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
blkSize(blk_size),
lookupLatency(p->hit_latency),
forwardLatency(p->hit_latency),
fillLatency(p->response_latency),
lookupLatency(p->tag_latency),
dataLatency(p->data_latency),
forwardLatency(p->tag_latency),
fillLatency(p->data_latency),
responseLatency(p->response_latency),
numTarget(p->tgts_per_mshr),
forwardSnoops(true),

View file

@ -264,6 +264,12 @@ class BaseCache : public MemObject
*/
const Cycles lookupLatency;
/**
* The latency of data access of a cache. It occurs when there is
* an access to the cache.
*/
const Cycles dataLatency;
/**
* This is the forward latency of the cache. It occurs when there
* is a cache miss and a request is forwarded downstream, in

View file

@ -49,17 +49,22 @@ class BaseTags(ClockedObject):
# Get the block size from the parent (system)
block_size = Param.Int(Parent.cache_line_size, "block size in bytes")
# Get the hit latency from the parent (cache)
hit_latency = Param.Cycles(Parent.hit_latency,
"The hit latency for this cache")
# Get the tag lookup latency from the parent (cache)
tag_latency = Param.Cycles(Parent.tag_latency,
"The tag lookup latency for this cache")
# Get the RAM access latency from the parent (cache)
data_latency = Param.Cycles(Parent.data_latency,
"The data access latency for this cache")
sequential_access = Param.Bool(Parent.sequential_access,
"Whether to access tags and data sequentially")
class BaseSetAssoc(BaseTags):
type = 'BaseSetAssoc'
abstract = True
cxx_header = "mem/cache/tags/base_set_assoc.hh"
assoc = Param.Int(Parent.assoc, "associativity")
sequential_access = Param.Bool(Parent.sequential_access,
"Whether to access tags and data sequentially")
class LRU(BaseSetAssoc):
type = 'LRU'

View file

@ -56,7 +56,11 @@ using namespace std;
BaseTags::BaseTags(const Params *p)
: ClockedObject(p), blkSize(p->block_size), size(p->size),
accessLatency(p->hit_latency), cache(nullptr), warmupBound(0),
lookupLatency(p->tag_latency),
accessLatency(p->sequential_access ?
p->tag_latency + p->data_latency :
std::max(p->tag_latency, p->data_latency)),
cache(nullptr), warmupBound(0),
warmedUp(false), numBlocks(0)
{
}

View file

@ -69,7 +69,13 @@ class BaseTags : public ClockedObject
const unsigned blkSize;
/** The size of the cache. */
const unsigned size;
/** The access latency of the cache. */
/** The tag lookup latency of the cache. */
const Cycles lookupLatency;
/**
* The total access latency of the cache. This latency
* is different depending on the cache access mode
* (parallel or sequential)
*/
const Cycles accessLatency;
/** Pointer to the parent cache. */
BaseCache *cache;

View file

@ -208,7 +208,6 @@ public:
Addr tag = extractTag(addr);
int set = extractSet(addr);
BlkType *blk = sets[set].findBlk(tag, is_secure);
lat = accessLatency;;
// Access all tags in parallel, hence one in each way. The data side
// either accesses all blocks in parallel, or one block sequentially on
@ -223,12 +222,20 @@ public:
}
if (blk != nullptr) {
if (blk->whenReady > curTick()
&& cache->ticksToCycles(blk->whenReady - curTick())
> accessLatency) {
lat = cache->ticksToCycles(blk->whenReady - curTick());
// If a cache hit
lat = accessLatency;
// Check if the block to be accessed is available. If not,
// apply the accessLatency on top of block->whenReady.
if (blk->whenReady > curTick() &&
cache->ticksToCycles(blk->whenReady - curTick()) >
accessLatency) {
lat = cache->ticksToCycles(blk->whenReady - curTick()) +
accessLatency;
}
blk->refCount += 1;
} else {
// If a cache miss
lat = lookupLatency;
}
return blk;

View file

@ -186,6 +186,16 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
FALRUBlk* blk = hashLookup(blkAddr);
if (blk && blk->isValid()) {
// If a cache hit
lat = accessLatency;
// Check if the block to be accessed is available. If not,
// apply the accessLatency on top of block->whenReady.
if (blk->whenReady > curTick() &&
cache->ticksToCycles(blk->whenReady - curTick()) >
accessLatency) {
lat = cache->ticksToCycles(blk->whenReady - curTick()) +
accessLatency;
}
assert(blk->tag == blkAddr);
tmp_in_cache = blk->inCache;
for (unsigned i = 0; i < numCaches; i++) {
@ -200,6 +210,8 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
moveToHead(blk);
}
} else {
// If a cache miss
lat = lookupLatency;
blk = nullptr;
for (unsigned i = 0; i <= numCaches; ++i) {
misses[i]++;
@ -209,7 +221,6 @@ FALRU::accessBlock(Addr addr, bool is_secure, Cycles &lat, int context_src,
*inCache = tmp_in_cache;
}
lat = accessLatency;
//assert(check());
return blk;
}

View file

@ -51,6 +51,7 @@
#include <list>
#include <unordered_map>
#include "mem/cache/base.hh"
#include "mem/cache/blk.hh"
#include "mem/cache/tags/base.hh"
#include "mem/packet.hh"