gem5/src/gpu-compute/gpu_tlb.cc
Tony Gutierrez 74249f80df hsail,gpu-compute: fixes to appease clang++
fixes to appease clang++. tested on:

Ubuntu clang version 3.5.0-4ubuntu2~trusty2
(tags/RELEASE_350/final) (based on LLVM 3.5.0)

Ubuntu clang version 3.6.0-2ubuntu1~trusty1
(tags/RELEASE_360/final) (based on LLVM 3.6.0)

the fixes address the following five issues:

1) the exec continuations in gpu_static_inst.hh were marked
   as protected when they should be public. here we mark
   them as public

2) the Abs instruction uses std::abs() in its execute method.
   because Abs is templated, it can also operate on U32 and U64,
   types, which cause Abs::execute() to pass uint32_t and uint64_t
   types to std::abs() respectively. this triggers a warning
   because std::abs() has no effect in this case. to rememdy this
   we add template specialization for the execute() method of Abs
   when its template paramter is U32 or U64.

3) Some potocols that utilize the code in cprintf.hh were missing
   includes to BoolVec.hh, which defines operator<< for the BoolVec
   type. This would cause issues when the generated code would try
   to pass a BoolVec type to a method in cprintf.hh that used
   operator<< on an instance of a BoolVec.

4) Surprise, clang doesn't like it when you clobber all the bits
   in a newly allocated object. I.e., this code:

   tlb = new GpuTlbEntry\[size\];
   std::memset(tlb, 0, sizeof(GpuTlbEntry) \* size);

   Let's use std::vector to track the TLB entries in the GpuTlb now...

5) There were a few variables used only in DPRINTFs, so we mark them
   with M5_VAR_USED.
2016-10-26 22:48:45 -04:00

1800 lines
60 KiB
C++

/*
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Lisa Hsu
*/
#include "gpu-compute/gpu_tlb.hh"
#include <cmath>
#include <cstring>
#include "arch/x86/faults.hh"
#include "arch/x86/insts/microldstop.hh"
#include "arch/x86/pagetable.hh"
#include "arch/x86/pagetable_walker.hh"
#include "arch/x86/regs/misc.hh"
#include "arch/x86/x86_traits.hh"
#include "base/bitfield.hh"
#include "base/output.hh"
#include "base/trace.hh"
#include "cpu/base.hh"
#include "cpu/thread_context.hh"
#include "debug/GPUPrefetch.hh"
#include "debug/GPUTLB.hh"
#include "mem/packet_access.hh"
#include "mem/page_table.hh"
#include "mem/request.hh"
#include "sim/process.hh"
namespace X86ISA
{
GpuTLB::GpuTLB(const Params *p)
: MemObject(p), configAddress(0), size(p->size),
cleanupEvent(this, false, Event::Maximum_Pri), exitEvent(this)
{
assoc = p->assoc;
assert(assoc <= size);
numSets = size/assoc;
allocationPolicy = p->allocationPolicy;
hasMemSidePort = false;
accessDistance = p->accessDistance;
clock = p->clk_domain->clockPeriod();
tlb.assign(size, GpuTlbEntry());
freeList.resize(numSets);
entryList.resize(numSets);
for (int set = 0; set < numSets; ++set) {
for (int way = 0; way < assoc; ++way) {
int x = set * assoc + way;
freeList[set].push_back(&tlb.at(x));
}
}
FA = (size == assoc);
/**
* @warning: the set-associative version assumes you have a
* fixed page size of 4KB.
* If the page size is greather than 4KB (as defined in the
* TheISA::PageBytes), then there are various issues w/ the current
* implementation (you'd have the same 8KB page being replicated in
* different sets etc)
*/
setMask = numSets - 1;
#if 0
// GpuTLB doesn't yet support full system
walker = p->walker;
walker->setTLB(this);
#endif
maxCoalescedReqs = p->maxOutstandingReqs;
// Do not allow maxCoalescedReqs to be more than the TLB associativity
if (maxCoalescedReqs > assoc) {
maxCoalescedReqs = assoc;
cprintf("Forcing maxCoalescedReqs to %d (TLB assoc.) \n", assoc);
}
outstandingReqs = 0;
hitLatency = p->hitLatency;
missLatency1 = p->missLatency1;
missLatency2 = p->missLatency2;
// create the slave ports based on the number of connected ports
for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
// create the master ports based on the number of connected ports
for (size_t i = 0; i < p->port_master_connection_count; ++i) {
memSidePort.push_back(new MemSidePort(csprintf("%s-port%d",
name(), i), this, i));
}
}
// fixme: this is never called?
GpuTLB::~GpuTLB()
{
// make sure all the hash-maps are empty
assert(translationReturnEvent.empty());
}
BaseSlavePort&
GpuTLB::getSlavePort(const std::string &if_name, PortID idx)
{
if (if_name == "slave") {
if (idx >= static_cast<PortID>(cpuSidePort.size())) {
panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
}
return *cpuSidePort[idx];
} else {
panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
}
}
BaseMasterPort&
GpuTLB::getMasterPort(const std::string &if_name, PortID idx)
{
if (if_name == "master") {
if (idx >= static_cast<PortID>(memSidePort.size())) {
panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
}
hasMemSidePort = true;
return *memSidePort[idx];
} else {
panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
}
}
GpuTlbEntry*
GpuTLB::insert(Addr vpn, GpuTlbEntry &entry)
{
GpuTlbEntry *newEntry = nullptr;
/**
* vpn holds the virtual page address
* The least significant bits are simply masked
*/
int set = (vpn >> TheISA::PageShift) & setMask;
if (!freeList[set].empty()) {
newEntry = freeList[set].front();
freeList[set].pop_front();
} else {
newEntry = entryList[set].back();
entryList[set].pop_back();
}
*newEntry = entry;
newEntry->vaddr = vpn;
entryList[set].push_front(newEntry);
return newEntry;
}
GpuTLB::EntryList::iterator
GpuTLB::lookupIt(Addr va, bool update_lru)
{
int set = (va >> TheISA::PageShift) & setMask;
if (FA) {
assert(!set);
}
auto entry = entryList[set].begin();
for (; entry != entryList[set].end(); ++entry) {
int page_size = (*entry)->size();
if ((*entry)->vaddr <= va && (*entry)->vaddr + page_size > va) {
DPRINTF(GPUTLB, "Matched vaddr %#x to entry starting at %#x "
"with size %#x.\n", va, (*entry)->vaddr, page_size);
if (update_lru) {
entryList[set].push_front(*entry);
entryList[set].erase(entry);
entry = entryList[set].begin();
}
break;
}
}
return entry;
}
GpuTlbEntry*
GpuTLB::lookup(Addr va, bool update_lru)
{
int set = (va >> TheISA::PageShift) & setMask;
auto entry = lookupIt(va, update_lru);
if (entry == entryList[set].end())
return nullptr;
else
return *entry;
}
void
GpuTLB::invalidateAll()
{
DPRINTF(GPUTLB, "Invalidating all entries.\n");
for (int i = 0; i < numSets; ++i) {
while (!entryList[i].empty()) {
GpuTlbEntry *entry = entryList[i].front();
entryList[i].pop_front();
freeList[i].push_back(entry);
}
}
}
void
GpuTLB::setConfigAddress(uint32_t addr)
{
configAddress = addr;
}
void
GpuTLB::invalidateNonGlobal()
{
DPRINTF(GPUTLB, "Invalidating all non global entries.\n");
for (int i = 0; i < numSets; ++i) {
for (auto entryIt = entryList[i].begin();
entryIt != entryList[i].end();) {
if (!(*entryIt)->global) {
freeList[i].push_back(*entryIt);
entryList[i].erase(entryIt++);
} else {
++entryIt;
}
}
}
}
void
GpuTLB::demapPage(Addr va, uint64_t asn)
{
int set = (va >> TheISA::PageShift) & setMask;
auto entry = lookupIt(va, false);
if (entry != entryList[set].end()) {
freeList[set].push_back(*entry);
entryList[set].erase(entry);
}
}
Fault
GpuTLB::translateInt(RequestPtr req, ThreadContext *tc)
{
DPRINTF(GPUTLB, "Addresses references internal memory.\n");
Addr vaddr = req->getVaddr();
Addr prefix = (vaddr >> 3) & IntAddrPrefixMask;
if (prefix == IntAddrPrefixCPUID) {
panic("CPUID memory space not yet implemented!\n");
} else if (prefix == IntAddrPrefixMSR) {
vaddr = vaddr >> 3;
req->setFlags(Request::MMAPPED_IPR);
Addr regNum = 0;
switch (vaddr & ~IntAddrPrefixMask) {
case 0x10:
regNum = MISCREG_TSC;
break;
case 0x1B:
regNum = MISCREG_APIC_BASE;
break;
case 0xFE:
regNum = MISCREG_MTRRCAP;
break;
case 0x174:
regNum = MISCREG_SYSENTER_CS;
break;
case 0x175:
regNum = MISCREG_SYSENTER_ESP;
break;
case 0x176:
regNum = MISCREG_SYSENTER_EIP;
break;
case 0x179:
regNum = MISCREG_MCG_CAP;
break;
case 0x17A:
regNum = MISCREG_MCG_STATUS;
break;
case 0x17B:
regNum = MISCREG_MCG_CTL;
break;
case 0x1D9:
regNum = MISCREG_DEBUG_CTL_MSR;
break;
case 0x1DB:
regNum = MISCREG_LAST_BRANCH_FROM_IP;
break;
case 0x1DC:
regNum = MISCREG_LAST_BRANCH_TO_IP;
break;
case 0x1DD:
regNum = MISCREG_LAST_EXCEPTION_FROM_IP;
break;
case 0x1DE:
regNum = MISCREG_LAST_EXCEPTION_TO_IP;
break;
case 0x200:
regNum = MISCREG_MTRR_PHYS_BASE_0;
break;
case 0x201:
regNum = MISCREG_MTRR_PHYS_MASK_0;
break;
case 0x202:
regNum = MISCREG_MTRR_PHYS_BASE_1;
break;
case 0x203:
regNum = MISCREG_MTRR_PHYS_MASK_1;
break;
case 0x204:
regNum = MISCREG_MTRR_PHYS_BASE_2;
break;
case 0x205:
regNum = MISCREG_MTRR_PHYS_MASK_2;
break;
case 0x206:
regNum = MISCREG_MTRR_PHYS_BASE_3;
break;
case 0x207:
regNum = MISCREG_MTRR_PHYS_MASK_3;
break;
case 0x208:
regNum = MISCREG_MTRR_PHYS_BASE_4;
break;
case 0x209:
regNum = MISCREG_MTRR_PHYS_MASK_4;
break;
case 0x20A:
regNum = MISCREG_MTRR_PHYS_BASE_5;
break;
case 0x20B:
regNum = MISCREG_MTRR_PHYS_MASK_5;
break;
case 0x20C:
regNum = MISCREG_MTRR_PHYS_BASE_6;
break;
case 0x20D:
regNum = MISCREG_MTRR_PHYS_MASK_6;
break;
case 0x20E:
regNum = MISCREG_MTRR_PHYS_BASE_7;
break;
case 0x20F:
regNum = MISCREG_MTRR_PHYS_MASK_7;
break;
case 0x250:
regNum = MISCREG_MTRR_FIX_64K_00000;
break;
case 0x258:
regNum = MISCREG_MTRR_FIX_16K_80000;
break;
case 0x259:
regNum = MISCREG_MTRR_FIX_16K_A0000;
break;
case 0x268:
regNum = MISCREG_MTRR_FIX_4K_C0000;
break;
case 0x269:
regNum = MISCREG_MTRR_FIX_4K_C8000;
break;
case 0x26A:
regNum = MISCREG_MTRR_FIX_4K_D0000;
break;
case 0x26B:
regNum = MISCREG_MTRR_FIX_4K_D8000;
break;
case 0x26C:
regNum = MISCREG_MTRR_FIX_4K_E0000;
break;
case 0x26D:
regNum = MISCREG_MTRR_FIX_4K_E8000;
break;
case 0x26E:
regNum = MISCREG_MTRR_FIX_4K_F0000;
break;
case 0x26F:
regNum = MISCREG_MTRR_FIX_4K_F8000;
break;
case 0x277:
regNum = MISCREG_PAT;
break;
case 0x2FF:
regNum = MISCREG_DEF_TYPE;
break;
case 0x400:
regNum = MISCREG_MC0_CTL;
break;
case 0x404:
regNum = MISCREG_MC1_CTL;
break;
case 0x408:
regNum = MISCREG_MC2_CTL;
break;
case 0x40C:
regNum = MISCREG_MC3_CTL;
break;
case 0x410:
regNum = MISCREG_MC4_CTL;
break;
case 0x414:
regNum = MISCREG_MC5_CTL;
break;
case 0x418:
regNum = MISCREG_MC6_CTL;
break;
case 0x41C:
regNum = MISCREG_MC7_CTL;
break;
case 0x401:
regNum = MISCREG_MC0_STATUS;
break;
case 0x405:
regNum = MISCREG_MC1_STATUS;
break;
case 0x409:
regNum = MISCREG_MC2_STATUS;
break;
case 0x40D:
regNum = MISCREG_MC3_STATUS;
break;
case 0x411:
regNum = MISCREG_MC4_STATUS;
break;
case 0x415:
regNum = MISCREG_MC5_STATUS;
break;
case 0x419:
regNum = MISCREG_MC6_STATUS;
break;
case 0x41D:
regNum = MISCREG_MC7_STATUS;
break;
case 0x402:
regNum = MISCREG_MC0_ADDR;
break;
case 0x406:
regNum = MISCREG_MC1_ADDR;
break;
case 0x40A:
regNum = MISCREG_MC2_ADDR;
break;
case 0x40E:
regNum = MISCREG_MC3_ADDR;
break;
case 0x412:
regNum = MISCREG_MC4_ADDR;
break;
case 0x416:
regNum = MISCREG_MC5_ADDR;
break;
case 0x41A:
regNum = MISCREG_MC6_ADDR;
break;
case 0x41E:
regNum = MISCREG_MC7_ADDR;
break;
case 0x403:
regNum = MISCREG_MC0_MISC;
break;
case 0x407:
regNum = MISCREG_MC1_MISC;
break;
case 0x40B:
regNum = MISCREG_MC2_MISC;
break;
case 0x40F:
regNum = MISCREG_MC3_MISC;
break;
case 0x413:
regNum = MISCREG_MC4_MISC;
break;
case 0x417:
regNum = MISCREG_MC5_MISC;
break;
case 0x41B:
regNum = MISCREG_MC6_MISC;
break;
case 0x41F:
regNum = MISCREG_MC7_MISC;
break;
case 0xC0000080:
regNum = MISCREG_EFER;
break;
case 0xC0000081:
regNum = MISCREG_STAR;
break;
case 0xC0000082:
regNum = MISCREG_LSTAR;
break;
case 0xC0000083:
regNum = MISCREG_CSTAR;
break;
case 0xC0000084:
regNum = MISCREG_SF_MASK;
break;
case 0xC0000100:
regNum = MISCREG_FS_BASE;
break;
case 0xC0000101:
regNum = MISCREG_GS_BASE;
break;
case 0xC0000102:
regNum = MISCREG_KERNEL_GS_BASE;
break;
case 0xC0000103:
regNum = MISCREG_TSC_AUX;
break;
case 0xC0010000:
regNum = MISCREG_PERF_EVT_SEL0;
break;
case 0xC0010001:
regNum = MISCREG_PERF_EVT_SEL1;
break;
case 0xC0010002:
regNum = MISCREG_PERF_EVT_SEL2;
break;
case 0xC0010003:
regNum = MISCREG_PERF_EVT_SEL3;
break;
case 0xC0010004:
regNum = MISCREG_PERF_EVT_CTR0;
break;
case 0xC0010005:
regNum = MISCREG_PERF_EVT_CTR1;
break;
case 0xC0010006:
regNum = MISCREG_PERF_EVT_CTR2;
break;
case 0xC0010007:
regNum = MISCREG_PERF_EVT_CTR3;
break;
case 0xC0010010:
regNum = MISCREG_SYSCFG;
break;
case 0xC0010016:
regNum = MISCREG_IORR_BASE0;
break;
case 0xC0010017:
regNum = MISCREG_IORR_BASE1;
break;
case 0xC0010018:
regNum = MISCREG_IORR_MASK0;
break;
case 0xC0010019:
regNum = MISCREG_IORR_MASK1;
break;
case 0xC001001A:
regNum = MISCREG_TOP_MEM;
break;
case 0xC001001D:
regNum = MISCREG_TOP_MEM2;
break;
case 0xC0010114:
regNum = MISCREG_VM_CR;
break;
case 0xC0010115:
regNum = MISCREG_IGNNE;
break;
case 0xC0010116:
regNum = MISCREG_SMM_CTL;
break;
case 0xC0010117:
regNum = MISCREG_VM_HSAVE_PA;
break;
default:
return std::make_shared<GeneralProtection>(0);
}
//The index is multiplied by the size of a MiscReg so that
//any memory dependence calculations will not see these as
//overlapping.
req->setPaddr(regNum * sizeof(MiscReg));
return NoFault;
} else if (prefix == IntAddrPrefixIO) {
// TODO If CPL > IOPL or in virtual mode, check the I/O permission
// bitmap in the TSS.
Addr IOPort = vaddr & ~IntAddrPrefixMask;
// Make sure the address fits in the expected 16 bit IO address
// space.
assert(!(IOPort & ~0xFFFF));
if (IOPort == 0xCF8 && req->getSize() == 4) {
req->setFlags(Request::MMAPPED_IPR);
req->setPaddr(MISCREG_PCI_CONFIG_ADDRESS * sizeof(MiscReg));
} else if ((IOPort & ~mask(2)) == 0xCFC) {
req->setFlags(Request::UNCACHEABLE);
Addr configAddress =
tc->readMiscRegNoEffect(MISCREG_PCI_CONFIG_ADDRESS);
if (bits(configAddress, 31, 31)) {
req->setPaddr(PhysAddrPrefixPciConfig |
mbits(configAddress, 30, 2) |
(IOPort & mask(2)));
} else {
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
} else {
req->setFlags(Request::UNCACHEABLE);
req->setPaddr(PhysAddrPrefixIO | IOPort);
}
return NoFault;
} else {
panic("Access to unrecognized internal address space %#x.\n",
prefix);
}
}
/**
* TLB_lookup will only perform a TLB lookup returning true on a TLB hit
* and false on a TLB miss.
* Many of the checks about different modes have been converted to
* assertions, since these parts of the code are not really used.
* On a hit it will update the LRU stack.
*/
bool
GpuTLB::tlbLookup(RequestPtr req, ThreadContext *tc, bool update_stats)
{
bool tlb_hit = false;
#ifndef NDEBUG
uint32_t flags = req->getFlags();
int seg = flags & SegmentFlagMask;
#endif
assert(seg != SEGMENT_REG_MS);
Addr vaddr = req->getVaddr();
DPRINTF(GPUTLB, "TLB Lookup for vaddr %#x.\n", vaddr);
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
if (m5Reg.prot) {
DPRINTF(GPUTLB, "In protected mode.\n");
// make sure we are in 64-bit mode
assert(m5Reg.mode == LongMode);
// If paging is enabled, do the translation.
if (m5Reg.paging) {
DPRINTF(GPUTLB, "Paging enabled.\n");
//update LRU stack on a hit
GpuTlbEntry *entry = lookup(vaddr, true);
if (entry)
tlb_hit = true;
if (!update_stats) {
// functional tlb access for memory initialization
// i.e., memory seeding or instr. seeding -> don't update
// TLB and stats
return tlb_hit;
}
localNumTLBAccesses++;
if (!entry) {
localNumTLBMisses++;
} else {
localNumTLBHits++;
}
}
}
return tlb_hit;
}
Fault
GpuTLB::translate(RequestPtr req, ThreadContext *tc,
Translation *translation, Mode mode,
bool &delayedResponse, bool timing, int &latency)
{
uint32_t flags = req->getFlags();
int seg = flags & SegmentFlagMask;
bool storeCheck = flags & (StoreCheck << FlagShift);
// If this is true, we're dealing with a request
// to a non-memory address space.
if (seg == SEGMENT_REG_MS) {
return translateInt(req, tc);
}
delayedResponse = false;
Addr vaddr = req->getVaddr();
DPRINTF(GPUTLB, "Translating vaddr %#x.\n", vaddr);
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
// If protected mode has been enabled...
if (m5Reg.prot) {
DPRINTF(GPUTLB, "In protected mode.\n");
// If we're not in 64-bit mode, do protection/limit checks
if (m5Reg.mode != LongMode) {
DPRINTF(GPUTLB, "Not in long mode. Checking segment "
"protection.\n");
// Check for a null segment selector.
if (!(seg == SEGMENT_REG_TSG || seg == SYS_SEGMENT_REG_IDTR ||
seg == SEGMENT_REG_HS || seg == SEGMENT_REG_LS)
&& !tc->readMiscRegNoEffect(MISCREG_SEG_SEL(seg))) {
return std::make_shared<GeneralProtection>(0);
}
bool expandDown = false;
SegAttr attr = tc->readMiscRegNoEffect(MISCREG_SEG_ATTR(seg));
if (seg >= SEGMENT_REG_ES && seg <= SEGMENT_REG_HS) {
if (!attr.writable && (mode == BaseTLB::Write ||
storeCheck))
return std::make_shared<GeneralProtection>(0);
if (!attr.readable && mode == BaseTLB::Read)
return std::make_shared<GeneralProtection>(0);
expandDown = attr.expandDown;
}
Addr base = tc->readMiscRegNoEffect(MISCREG_SEG_BASE(seg));
Addr limit = tc->readMiscRegNoEffect(MISCREG_SEG_LIMIT(seg));
// This assumes we're not in 64 bit mode. If we were, the
// default address size is 64 bits, overridable to 32.
int size = 32;
bool sizeOverride = (flags & (AddrSizeFlagBit << FlagShift));
SegAttr csAttr = tc->readMiscRegNoEffect(MISCREG_CS_ATTR);
if ((csAttr.defaultSize && sizeOverride) ||
(!csAttr.defaultSize && !sizeOverride)) {
size = 16;
}
Addr offset = bits(vaddr - base, size - 1, 0);
Addr endOffset = offset + req->getSize() - 1;
if (expandDown) {
DPRINTF(GPUTLB, "Checking an expand down segment.\n");
warn_once("Expand down segments are untested.\n");
if (offset <= limit || endOffset <= limit)
return std::make_shared<GeneralProtection>(0);
} else {
if (offset > limit || endOffset > limit)
return std::make_shared<GeneralProtection>(0);
}
}
// If paging is enabled, do the translation.
if (m5Reg.paging) {
DPRINTF(GPUTLB, "Paging enabled.\n");
// The vaddr already has the segment base applied.
GpuTlbEntry *entry = lookup(vaddr);
localNumTLBAccesses++;
if (!entry) {
localNumTLBMisses++;
if (timing) {
latency = missLatency1;
}
if (FullSystem) {
fatal("GpuTLB doesn't support full-system mode\n");
} else {
DPRINTF(GPUTLB, "Handling a TLB miss for address %#x "
"at pc %#x.\n", vaddr, tc->instAddr());
Process *p = tc->getProcessPtr();
GpuTlbEntry newEntry;
bool success = p->pTable->lookup(vaddr, newEntry);
if (!success && mode != BaseTLB::Execute) {
// penalize a "page fault" more
if (timing) {
latency += missLatency2;
}
if (p->fixupStackFault(vaddr))
success = p->pTable->lookup(vaddr, newEntry);
}
if (!success) {
return std::make_shared<PageFault>(vaddr, true,
mode, true,
false);
} else {
newEntry.valid = success;
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
DPRINTF(GPUTLB, "Mapping %#x to %#x\n",
alignedVaddr, newEntry.pageStart());
entry = insert(alignedVaddr, newEntry);
}
DPRINTF(GPUTLB, "Miss was serviced.\n");
}
} else {
localNumTLBHits++;
if (timing) {
latency = hitLatency;
}
}
// Do paging protection checks.
bool inUser = (m5Reg.cpl == 3 &&
!(flags & (CPL0FlagBit << FlagShift)));
CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
bool badWrite = (!entry->writable && (inUser || cr0.wp));
if ((inUser && !entry->user) || (mode == BaseTLB::Write &&
badWrite)) {
// The page must have been present to get into the TLB in
// the first place. We'll assume the reserved bits are
// fine even though we're not checking them.
return std::make_shared<PageFault>(vaddr, true, mode,
inUser, false);
}
if (storeCheck && badWrite) {
// This would fault if this were a write, so return a page
// fault that reflects that happening.
return std::make_shared<PageFault>(vaddr, true,
BaseTLB::Write,
inUser, false);
}
DPRINTF(GPUTLB, "Entry found with paddr %#x, doing protection "
"checks.\n", entry->paddr);
int page_size = entry->size();
Addr paddr = entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
req->setPaddr(paddr);
if (entry->uncacheable)
req->setFlags(Request::UNCACHEABLE);
} else {
//Use the address which already has segmentation applied.
DPRINTF(GPUTLB, "Paging disabled.\n");
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
req->setPaddr(vaddr);
}
} else {
// Real mode
DPRINTF(GPUTLB, "In real mode.\n");
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, vaddr);
req->setPaddr(vaddr);
}
// Check for an access to the local APIC
if (FullSystem) {
LocalApicBase localApicBase =
tc->readMiscRegNoEffect(MISCREG_APIC_BASE);
Addr baseAddr = localApicBase.base * PageBytes;
Addr paddr = req->getPaddr();
if (baseAddr <= paddr && baseAddr + PageBytes > paddr) {
// Force the access to be uncacheable.
req->setFlags(Request::UNCACHEABLE);
req->setPaddr(x86LocalAPICAddress(tc->contextId(),
paddr - baseAddr));
}
}
return NoFault;
};
Fault
GpuTLB::translateAtomic(RequestPtr req, ThreadContext *tc, Mode mode,
int &latency)
{
bool delayedResponse;
return GpuTLB::translate(req, tc, nullptr, mode, delayedResponse, false,
latency);
}
void
GpuTLB::translateTiming(RequestPtr req, ThreadContext *tc,
Translation *translation, Mode mode, int &latency)
{
bool delayedResponse;
assert(translation);
Fault fault = GpuTLB::translate(req, tc, translation, mode,
delayedResponse, true, latency);
if (!delayedResponse)
translation->finish(fault, req, tc, mode);
}
Walker*
GpuTLB::getWalker()
{
return walker;
}
void
GpuTLB::serialize(CheckpointOut &cp) const
{
}
void
GpuTLB::unserialize(CheckpointIn &cp)
{
}
void
GpuTLB::regStats()
{
MemObject::regStats();
localNumTLBAccesses
.name(name() + ".local_TLB_accesses")
.desc("Number of TLB accesses")
;
localNumTLBHits
.name(name() + ".local_TLB_hits")
.desc("Number of TLB hits")
;
localNumTLBMisses
.name(name() + ".local_TLB_misses")
.desc("Number of TLB misses")
;
localTLBMissRate
.name(name() + ".local_TLB_miss_rate")
.desc("TLB miss rate")
;
accessCycles
.name(name() + ".access_cycles")
.desc("Cycles spent accessing this TLB level")
;
pageTableCycles
.name(name() + ".page_table_cycles")
.desc("Cycles spent accessing the page table")
;
localTLBMissRate = 100 * localNumTLBMisses / localNumTLBAccesses;
numUniquePages
.name(name() + ".unique_pages")
.desc("Number of unique pages touched")
;
localCycles
.name(name() + ".local_cycles")
.desc("Number of cycles spent in queue for all incoming reqs")
;
localLatency
.name(name() + ".local_latency")
.desc("Avg. latency over incoming coalesced reqs")
;
localLatency = localCycles / localNumTLBAccesses;
globalNumTLBAccesses
.name(name() + ".global_TLB_accesses")
.desc("Number of TLB accesses")
;
globalNumTLBHits
.name(name() + ".global_TLB_hits")
.desc("Number of TLB hits")
;
globalNumTLBMisses
.name(name() + ".global_TLB_misses")
.desc("Number of TLB misses")
;
globalTLBMissRate
.name(name() + ".global_TLB_miss_rate")
.desc("TLB miss rate")
;
globalTLBMissRate = 100 * globalNumTLBMisses / globalNumTLBAccesses;
avgReuseDistance
.name(name() + ".avg_reuse_distance")
.desc("avg. reuse distance over all pages (in ticks)")
;
}
/**
* Do the TLB lookup for this coalesced request and schedule
* another event <TLB access latency> cycles later.
*/
void
GpuTLB::issueTLBLookup(PacketPtr pkt)
{
assert(pkt);
assert(pkt->senderState);
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
bool update_stats = !sender_state->prefetch;
ThreadContext * tmp_tc = sender_state->tc;
DPRINTF(GPUTLB, "Translation req. for virt. page addr %#x\n",
virt_page_addr);
int req_cnt = sender_state->reqCnt.back();
if (update_stats) {
accessCycles -= (curTick() * req_cnt);
localCycles -= curTick();
updatePageFootprint(virt_page_addr);
globalNumTLBAccesses += req_cnt;
}
tlbOutcome lookup_outcome = TLB_MISS;
RequestPtr tmp_req = pkt->req;
// Access the TLB and figure out if it's a hit or a miss.
bool success = tlbLookup(tmp_req, tmp_tc, update_stats);
if (success) {
lookup_outcome = TLB_HIT;
// Put the entry in SenderState
GpuTlbEntry *entry = lookup(tmp_req->getVaddr(), false);
assert(entry);
sender_state->tlbEntry =
new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
if (update_stats) {
// the reqCnt has an entry per level, so its size tells us
// which level we are in
sender_state->hitLevel = sender_state->reqCnt.size();
globalNumTLBHits += req_cnt;
}
} else {
if (update_stats)
globalNumTLBMisses += req_cnt;
}
/*
* We now know the TLB lookup outcome (if it's a hit or a miss), as well
* as the TLB access latency.
*
* We create and schedule a new TLBEvent which will help us take the
* appropriate actions (e.g., update TLB on a hit, send request to lower
* level TLB on a miss, or start a page walk if this was the last-level
* TLB)
*/
TLBEvent *tlb_event =
new TLBEvent(this, virt_page_addr, lookup_outcome, pkt);
if (translationReturnEvent.count(virt_page_addr)) {
panic("Virtual Page Address %#x already has a return event\n",
virt_page_addr);
}
translationReturnEvent[virt_page_addr] = tlb_event;
assert(tlb_event);
DPRINTF(GPUTLB, "schedule translationReturnEvent @ curTick %d\n",
curTick() + this->ticks(hitLatency));
schedule(tlb_event, curTick() + this->ticks(hitLatency));
}
GpuTLB::TLBEvent::TLBEvent(GpuTLB* _tlb, Addr _addr, tlbOutcome tlb_outcome,
PacketPtr _pkt)
: Event(CPU_Tick_Pri), tlb(_tlb), virtPageAddr(_addr),
outcome(tlb_outcome), pkt(_pkt)
{
}
/**
* Do Paging protection checks. If we encounter a page fault, then
* an assertion is fired.
*/
void
GpuTLB::pagingProtectionChecks(ThreadContext *tc, PacketPtr pkt,
GpuTlbEntry * tlb_entry, Mode mode)
{
HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
uint32_t flags = pkt->req->getFlags();
bool storeCheck = flags & (StoreCheck << FlagShift);
// Do paging protection checks.
bool inUser = (m5Reg.cpl == 3 && !(flags & (CPL0FlagBit << FlagShift)));
CR0 cr0 = tc->readMiscRegNoEffect(MISCREG_CR0);
bool badWrite = (!tlb_entry->writable && (inUser || cr0.wp));
if ((inUser && !tlb_entry->user) ||
(mode == BaseTLB::Write && badWrite)) {
// The page must have been present to get into the TLB in
// the first place. We'll assume the reserved bits are
// fine even though we're not checking them.
assert(false);
}
if (storeCheck && badWrite) {
// This would fault if this were a write, so return a page
// fault that reflects that happening.
assert(false);
}
}
/**
* handleTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* The latter calls handelHit with TLB miss as tlbOutcome.
*/
void
GpuTLB::handleTranslationReturn(Addr virt_page_addr, tlbOutcome tlb_outcome,
PacketPtr pkt)
{
assert(pkt);
Addr vaddr = pkt->req->getVaddr();
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
Mode mode = sender_state->tlbMode;
GpuTlbEntry *local_entry, *new_entry;
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n", vaddr);
local_entry = sender_state->tlbEntry;
} else {
DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
vaddr);
// We are returning either from a page walk or from a hit at a lower
// TLB level. The senderState should be "carrying" a pointer to the
// correct TLBEntry.
new_entry = sender_state->tlbEntry;
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
virt_page_addr);
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
/**
* At this point the packet carries an up-to-date tlbEntry pointer
* in its senderState.
* Next step is to do the paging protection checks.
*/
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
pagingProtectionChecks(tc, pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
// Since this packet will be sent through the cpu side slave port,
// it must be converted to a response pkt if it is not one already
if (pkt->isRequest()) {
pkt->makeTimingResponse();
}
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable) {
pkt->req->setFlags(Request::UNCACHEABLE);
}
//send packet back to coalescer
cpuSidePort[0]->sendTimingResp(pkt);
//schedule cleanup event
cleanupQueue.push(virt_page_addr);
// schedule this only once per cycle.
// The check is required because we might have multiple translations
// returning the same cycle
// this is a maximum priority event and must be on the same cycle
// as the cleanup event in TLBCoalescer to avoid a race with
// IssueProbeEvent caused by TLBCoalescer::MemSidePort::recvReqRetry
if (!cleanupEvent.scheduled())
schedule(cleanupEvent, curTick());
}
/**
* Here we take the appropriate actions based on the result of the
* TLB lookup.
*/
void
GpuTLB::translationReturn(Addr virtPageAddr, tlbOutcome outcome,
PacketPtr pkt)
{
DPRINTF(GPUTLB, "Triggered TLBEvent for addr %#x\n", virtPageAddr);
assert(translationReturnEvent[virtPageAddr]);
assert(pkt);
TranslationState *tmp_sender_state =
safe_cast<TranslationState*>(pkt->senderState);
int req_cnt = tmp_sender_state->reqCnt.back();
bool update_stats = !tmp_sender_state->prefetch;
if (outcome == TLB_HIT) {
handleTranslationReturn(virtPageAddr, TLB_HIT, pkt);
if (update_stats) {
accessCycles += (req_cnt * curTick());
localCycles += curTick();
}
} else if (outcome == TLB_MISS) {
DPRINTF(GPUTLB, "This is a TLB miss\n");
if (update_stats) {
accessCycles += (req_cnt*curTick());
localCycles += curTick();
}
if (hasMemSidePort) {
// the one cyle added here represent the delay from when we get
// the reply back till when we propagate it to the coalescer
// above.
if (update_stats) {
accessCycles += (req_cnt * 1);
localCycles += 1;
}
/**
* There is a TLB below. Send the coalesced request.
* We actually send the very first packet of all the
* pending packets for this virtual page address.
*/
if (!memSidePort[0]->sendTimingReq(pkt)) {
DPRINTF(GPUTLB, "Failed sending translation request to "
"lower level TLB for addr %#x\n", virtPageAddr);
memSidePort[0]->retries.push_back(pkt);
} else {
DPRINTF(GPUTLB, "Sent translation request to lower level "
"TLB for addr %#x\n", virtPageAddr);
}
} else {
//this is the last level TLB. Start a page walk
DPRINTF(GPUTLB, "Last level TLB - start a page walk for "
"addr %#x\n", virtPageAddr);
if (update_stats)
pageTableCycles -= (req_cnt*curTick());
TLBEvent *tlb_event = translationReturnEvent[virtPageAddr];
assert(tlb_event);
tlb_event->updateOutcome(PAGE_WALK);
schedule(tlb_event, curTick() + ticks(missLatency2));
}
} else if (outcome == PAGE_WALK) {
if (update_stats)
pageTableCycles += (req_cnt*curTick());
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virtPageAddr);
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
Process *p = sender_state->tc->getProcessPtr();
TlbEntry newEntry;
Addr vaddr = pkt->req->getVaddr();
#ifndef NDEBUG
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
assert(alignedVaddr == virtPageAddr);
#endif
bool success;
success = p->pTable->lookup(vaddr, newEntry);
if (!success && sender_state->tlbMode != BaseTLB::Execute) {
if (p->fixupStackFault(vaddr)) {
success = p->pTable->lookup(vaddr, newEntry);
}
}
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
newEntry.pageStart());
sender_state->tlbEntry =
new GpuTlbEntry(0, newEntry.vaddr, newEntry.paddr, success);
handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
} else if (outcome == MISS_RETURN) {
/** we add an extra cycle in the return path of the translation
* requests in between the various TLB levels.
*/
handleTranslationReturn(virtPageAddr, TLB_MISS, pkt);
} else {
assert(false);
}
}
void
GpuTLB::TLBEvent::process()
{
tlb->translationReturn(virtPageAddr, outcome, pkt);
}
const char*
GpuTLB::TLBEvent::description() const
{
return "trigger translationDoneEvent";
}
void
GpuTLB::TLBEvent::updateOutcome(tlbOutcome _outcome)
{
outcome = _outcome;
}
Addr
GpuTLB::TLBEvent::getTLBEventVaddr()
{
return virtPageAddr;
}
/*
* recvTiming receives a coalesced timing request from a TLBCoalescer
* and it calls issueTLBLookup()
* It only rejects the packet if we have exceeded the max
* outstanding number of requests for the TLB
*/
bool
GpuTLB::CpuSidePort::recvTimingReq(PacketPtr pkt)
{
if (tlb->outstandingReqs < tlb->maxCoalescedReqs) {
tlb->issueTLBLookup(pkt);
// update number of outstanding translation requests
tlb->outstandingReqs++;
return true;
} else {
DPRINTF(GPUTLB, "Reached maxCoalescedReqs number %d\n",
tlb->outstandingReqs);
return false;
}
}
/**
* handleFuncTranslationReturn is called on a TLB hit,
* when a TLB miss returns or when a page fault returns.
* It updates LRU, inserts the TLB entry on a miss
* depending on the allocation policy and does the required
* protection checks. It does NOT create a new packet to
* update the packet's addr; this is done in hsail-gpu code.
*/
void
GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
{
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
Mode mode = sender_state->tlbMode;
Addr vaddr = pkt->req->getVaddr();
GpuTlbEntry *local_entry, *new_entry;
if (tlb_outcome == TLB_HIT) {
DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
"%#x\n", vaddr);
local_entry = sender_state->tlbEntry;
} else {
DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
"%#x\n", vaddr);
// We are returning either from a page walk or from a hit at a lower
// TLB level. The senderState should be "carrying" a pointer to the
// correct TLBEntry.
new_entry = sender_state->tlbEntry;
assert(new_entry);
local_entry = new_entry;
if (allocationPolicy) {
Addr virt_page_addr = roundDown(vaddr, TheISA::PageBytes);
DPRINTF(GPUTLB, "allocating entry w/ addr %#x\n",
virt_page_addr);
local_entry = insert(virt_page_addr, *new_entry);
}
assert(local_entry);
}
DPRINTF(GPUTLB, "Entry found with vaddr %#x, doing protection checks "
"while paddr was %#x.\n", local_entry->vaddr,
local_entry->paddr);
// Do paging checks if it's a normal functional access. If it's for a
// prefetch, then sometimes you can try to prefetch something that won't
// pass protection. We don't actually want to fault becuase there is no
// demand access to deem this a violation. Just put it in the TLB and
// it will fault if indeed a future demand access touches it in
// violation.
if (!sender_state->prefetch && sender_state->tlbEntry->valid)
pagingProtectionChecks(tc, pkt, local_entry, mode);
int page_size = local_entry->size();
Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
pkt->req->setPaddr(paddr);
if (local_entry->uncacheable)
pkt->req->setFlags(Request::UNCACHEABLE);
}
// This is used for atomic translations. Need to
// make it all happen during the same cycle.
void
GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
{
TranslationState *sender_state =
safe_cast<TranslationState*>(pkt->senderState);
ThreadContext *tc = sender_state->tc;
bool update_stats = !sender_state->prefetch;
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
if (update_stats)
tlb->updatePageFootprint(virt_page_addr);
// do the TLB lookup without updating the stats
bool success = tlb->tlbLookup(pkt->req, tc, update_stats);
tlbOutcome tlb_outcome = success ? TLB_HIT : TLB_MISS;
// functional mode means no coalescing
// global metrics are the same as the local metrics
if (update_stats) {
tlb->globalNumTLBAccesses++;
if (success) {
sender_state->hitLevel = sender_state->reqCnt.size();
tlb->globalNumTLBHits++;
}
}
if (!success) {
if (update_stats)
tlb->globalNumTLBMisses++;
if (tlb->hasMemSidePort) {
// there is a TLB below -> propagate down the TLB hierarchy
tlb->memSidePort[0]->sendFunctional(pkt);
// If no valid translation from a prefetch, then just return
if (sender_state->prefetch && !pkt->req->hasPaddr())
return;
} else {
// Need to access the page table and update the TLB
DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
virt_page_addr);
Process *p = tc->getProcessPtr();
TlbEntry newEntry;
Addr vaddr = pkt->req->getVaddr();
#ifndef NDEBUG
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
assert(alignedVaddr == virt_page_addr);
#endif
bool success = p->pTable->lookup(vaddr, newEntry);
if (!success && sender_state->tlbMode != BaseTLB::Execute) {
if (p->fixupStackFault(vaddr))
success = p->pTable->lookup(vaddr, newEntry);
}
if (!sender_state->prefetch) {
// no PageFaults are permitted after
// the second page table lookup
assert(success);
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
newEntry.pageStart());
sender_state->tlbEntry = new GpuTlbEntry(0, newEntry.vaddr,
newEntry.paddr,
success);
} else {
// If this was a prefetch, then do the normal thing if it
// was a successful translation. Otherwise, send an empty
// TLB entry back so that it can be figured out as empty and
// handled accordingly.
if (success) {
DPRINTF(GPUTLB, "Mapping %#x to %#x\n", alignedVaddr,
newEntry.pageStart());
sender_state->tlbEntry = new GpuTlbEntry(0,
newEntry.vaddr,
newEntry.paddr,
success);
} else {
DPRINTF(GPUPrefetch, "Prefetch failed %#x\n",
alignedVaddr);
sender_state->tlbEntry = new GpuTlbEntry();
return;
}
}
}
} else {
DPRINTF(GPUPrefetch, "Functional Hit for vaddr %#x\n",
tlb->lookup(pkt->req->getVaddr()));
GpuTlbEntry *entry = tlb->lookup(pkt->req->getVaddr(),
update_stats);
assert(entry);
sender_state->tlbEntry =
new GpuTlbEntry(0, entry->vaddr, entry->paddr, entry->valid);
}
// This is the function that would populate pkt->req with the paddr of
// the translation. But if no translation happens (i.e Prefetch fails)
// then the early returns in the above code wiill keep this function
// from executing.
tlb->handleFuncTranslationReturn(pkt, tlb_outcome);
}
void
GpuTLB::CpuSidePort::recvReqRetry()
{
// The CPUSidePort never sends anything but replies. No retries
// expected.
assert(false);
}
AddrRangeList
GpuTLB::CpuSidePort::getAddrRanges() const
{
// currently not checked by the master
AddrRangeList ranges;
return ranges;
}
/**
* MemSidePort receives the packet back.
* We need to call the handleTranslationReturn
* and propagate up the hierarchy.
*/
bool
GpuTLB::MemSidePort::recvTimingResp(PacketPtr pkt)
{
Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
TheISA::PageBytes);
DPRINTF(GPUTLB, "MemSidePort recvTiming for virt_page_addr %#x\n",
virt_page_addr);
TLBEvent *tlb_event = tlb->translationReturnEvent[virt_page_addr];
assert(tlb_event);
assert(virt_page_addr == tlb_event->getTLBEventVaddr());
tlb_event->updateOutcome(MISS_RETURN);
tlb->schedule(tlb_event, curTick()+tlb->ticks(1));
return true;
}
void
GpuTLB::MemSidePort::recvReqRetry()
{
// No retries should reach the TLB. The retries
// should only reach the TLBCoalescer.
assert(false);
}
void
GpuTLB::cleanup()
{
while (!cleanupQueue.empty()) {
Addr cleanup_addr = cleanupQueue.front();
cleanupQueue.pop();
// delete TLBEvent
TLBEvent * old_tlb_event = translationReturnEvent[cleanup_addr];
delete old_tlb_event;
translationReturnEvent.erase(cleanup_addr);
// update number of outstanding requests
outstandingReqs--;
}
/** the higher level coalescer should retry if it has
* any pending requests.
*/
for (int i = 0; i < cpuSidePort.size(); ++i) {
cpuSidePort[i]->sendRetryReq();
}
}
void
GpuTLB::updatePageFootprint(Addr virt_page_addr)
{
std::pair<AccessPatternTable::iterator, bool> ret;
AccessInfo tmp_access_info;
tmp_access_info.lastTimeAccessed = 0;
tmp_access_info.accessesPerPage = 0;
tmp_access_info.totalReuseDistance = 0;
tmp_access_info.sumDistance = 0;
tmp_access_info.meanDistance = 0;
ret = TLBFootprint.insert(AccessPatternTable::value_type(virt_page_addr,
tmp_access_info));
bool first_page_access = ret.second;
if (first_page_access) {
numUniquePages++;
} else {
int accessed_before;
accessed_before = curTick() - ret.first->second.lastTimeAccessed;
ret.first->second.totalReuseDistance += accessed_before;
}
ret.first->second.accessesPerPage++;
ret.first->second.lastTimeAccessed = curTick();
if (accessDistance) {
ret.first->second.localTLBAccesses
.push_back(localNumTLBAccesses.value());
}
}
void
GpuTLB::exitCallback()
{
std::ostream *page_stat_file = nullptr;
if (accessDistance) {
// print per page statistics to a separate file (.csv format)
// simout is the gem5 output directory (default is m5out or the one
// specified with -d
page_stat_file = simout.create(name().c_str())->stream();
// print header
*page_stat_file << "page,max_access_distance,mean_access_distance, "
<< "stddev_distance" << std::endl;
}
// update avg. reuse distance footprint
AccessPatternTable::iterator iter, iter_begin, iter_end;
unsigned int sum_avg_reuse_distance_per_page = 0;
// iterate through all pages seen by this TLB
for (iter = TLBFootprint.begin(); iter != TLBFootprint.end(); iter++) {
sum_avg_reuse_distance_per_page += iter->second.totalReuseDistance /
iter->second.accessesPerPage;
if (accessDistance) {
unsigned int tmp = iter->second.localTLBAccesses[0];
unsigned int prev = tmp;
for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
if (i) {
tmp = prev + 1;
}
prev = iter->second.localTLBAccesses[i];
// update the localTLBAccesses value
// with the actual differece
iter->second.localTLBAccesses[i] -= tmp;
// compute the sum of AccessDistance per page
// used later for mean
iter->second.sumDistance +=
iter->second.localTLBAccesses[i];
}
iter->second.meanDistance =
iter->second.sumDistance / iter->second.accessesPerPage;
// compute std_dev and max (we need a second round because we
// need to know the mean value
unsigned int max_distance = 0;
unsigned int stddev_distance = 0;
for (int i = 0; i < iter->second.localTLBAccesses.size(); ++i) {
unsigned int tmp_access_distance =
iter->second.localTLBAccesses[i];
if (tmp_access_distance > max_distance) {
max_distance = tmp_access_distance;
}
unsigned int diff =
tmp_access_distance - iter->second.meanDistance;
stddev_distance += pow(diff, 2);
}
stddev_distance =
sqrt(stddev_distance/iter->second.accessesPerPage);
if (page_stat_file) {
*page_stat_file << std::hex << iter->first << ",";
*page_stat_file << std::dec << max_distance << ",";
*page_stat_file << std::dec << iter->second.meanDistance
<< ",";
*page_stat_file << std::dec << stddev_distance;
*page_stat_file << std::endl;
}
// erase the localTLBAccesses array
iter->second.localTLBAccesses.clear();
}
}
if (!TLBFootprint.empty()) {
avgReuseDistance =
sum_avg_reuse_distance_per_page / TLBFootprint.size();
}
//clear the TLBFootprint map
TLBFootprint.clear();
}
} // namespace X86ISA
X86ISA::GpuTLB*
X86GPUTLBParams::create()
{
return new X86ISA::GpuTLB(this);
}