X86: Make the X86 TLB take advantage of delayed translations, and get rid of the fake TLB miss faults.

This commit is contained in:
Gabe Black 2009-02-25 10:16:21 -08:00
parent 6ed47e9464
commit 40fdba2454
6 changed files with 187 additions and 192 deletions

View file

@ -163,56 +163,6 @@ namespace X86ISA
}
}
void FakeITLBFault::invoke(ThreadContext * tc)
{
// Start the page table walker.
tc->getITBPtr()->walk(tc, vaddr, write, execute);
}
void FakeDTLBFault::invoke(ThreadContext * tc)
{
// Start the page table walker.
tc->getDTBPtr()->walk(tc, vaddr, write, execute);
}
#else // !FULL_SYSTEM
void FakeITLBFault::invoke(ThreadContext * tc)
{
DPRINTF(TLB, "Invoking an ITLB fault for address %#x at pc %#x.\n",
vaddr, tc->readPC());
Process *p = tc->getProcessPtr();
TlbEntry entry;
bool success = p->pTable->lookup(vaddr, entry);
if(!success) {
panic("Tried to execute unmapped address %#x.\n", vaddr);
} else {
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr,
entry.pageStart());
tc->getITBPtr()->insert(alignedVaddr, entry);
}
}
void FakeDTLBFault::invoke(ThreadContext * tc)
{
DPRINTF(TLB, "Invoking an DTLB fault for address %#x at pc %#x.\n",
vaddr, tc->readPC());
Process *p = tc->getProcessPtr();
TlbEntry entry;
bool success = p->pTable->lookup(vaddr, entry);
if(!success) {
p->checkAndAllocNextPage(vaddr);
success = p->pTable->lookup(vaddr, entry);
}
if(!success) {
panic("Tried to access unmapped address %#x.\n", vaddr);
} else {
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr,
entry.pageStart());
tc->getDTBPtr()->insert(alignedVaddr, entry);
}
}
#endif
} // namespace X86ISA

View file

@ -422,38 +422,6 @@ namespace X86ISA
return true;
}
};
// These faults aren't part of the ISA definition. They trigger filling
// the tlb on a miss and are to take the place of a hardware table walker.
class FakeITLBFault : public X86Fault
{
protected:
Addr vaddr;
bool write;
bool execute;
public:
FakeITLBFault(Addr _vaddr, bool _write, bool _execute) :
X86Fault("fake instruction tlb fault", "itlb", 0),
vaddr(_vaddr), write(_write), execute(_execute)
{}
void invoke(ThreadContext * tc);
};
class FakeDTLBFault : public X86Fault
{
protected:
Addr vaddr;
bool write;
bool execute;
public:
FakeDTLBFault(Addr _vaddr, bool _write, bool _execute) :
X86Fault("fake data tlb fault", "dtlb", 0),
vaddr(_vaddr), write(_write), execute(_execute)
{}
void invoke(ThreadContext * tc);
};
};
#endif // __ARCH_X86_FAULTS_HH__

View file

@ -84,7 +84,7 @@ BitUnion64(PageTableEntry)
Bitfield<0> p;
EndBitUnion(PageTableEntry)
void
Fault
Walker::doNext(PacketPtr &read, PacketPtr &write)
{
assert(state != Ready && state != Waiting);
@ -106,11 +106,11 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
pte.a = 1;
entry.writable = pte.w;
entry.user = pte.u;
if (badNX)
panic("NX violation!\n");
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
entry.noExec = pte.nx;
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
nextState = LongPDP;
break;
case LongPDP:
@ -119,10 +119,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
pte.a = 1;
entry.writable = entry.writable && pte.w;
entry.user = entry.user && pte.u;
if (badNX)
panic("NX violation!\n");
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
nextState = LongPD;
break;
case LongPD:
@ -130,10 +130,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
pte.a = 1;
entry.writable = entry.writable && pte.w;
entry.user = entry.user && pte.u;
if (badNX)
panic("NX violation!\n");
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
if (!pte.ps) {
// 4 KB page
entry.size = 4 * (1 << 10);
@ -150,36 +150,32 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
entry.patBit = bits(pte, 12);
entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
}
case LongPTE:
doWrite = !pte.a;
pte.a = 1;
entry.writable = entry.writable && pte.w;
entry.user = entry.user && pte.u;
if (badNX)
panic("NX violation!\n");
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
entry.paddr = (uint64_t)pte & (mask(40) << 12);
entry.uncacheable = uncacheable;
entry.global = pte.g;
entry.patBit = bits(pte, 12);
entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
case PAEPDP:
nextRead = ((uint64_t)pte & (mask(40) << 12)) + vaddr.pael2 * size;
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (!pte.p) {
stop();
return pageFault(pte.p);
}
nextState = PAEPD;
break;
case PAEPD:
@ -187,10 +183,10 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
pte.a = 1;
entry.writable = pte.w;
entry.user = pte.u;
if (badNX)
panic("NX violation!\n");
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
if (!pte.ps) {
// 4 KB page
entry.size = 4 * (1 << 10);
@ -206,39 +202,35 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
entry.patBit = bits(pte, 12);
entry.vaddr = entry.vaddr & ~((2 * (1 << 20)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
}
case PAEPTE:
doWrite = !pte.a;
pte.a = 1;
entry.writable = entry.writable && pte.w;
entry.user = entry.user && pte.u;
if (badNX)
panic("NX violation!\n");
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (badNX || !pte.p) {
stop();
return pageFault(pte.p);
}
entry.paddr = (uint64_t)pte & (mask(40) << 12);
entry.uncacheable = uncacheable;
entry.global = pte.g;
entry.patBit = bits(pte, 7);
entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
case PSEPD:
doWrite = !pte.a;
pte.a = 1;
entry.writable = pte.w;
entry.user = pte.u;
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (!pte.p) {
stop();
return pageFault(pte.p);
}
if (!pte.ps) {
// 4 KB page
entry.size = 4 * (1 << 10);
@ -255,44 +247,40 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
entry.patBit = bits(pte, 12);
entry.vaddr = entry.vaddr & ~((4 * (1 << 20)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
}
case PD:
doWrite = !pte.a;
pte.a = 1;
entry.writable = pte.w;
entry.user = pte.u;
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (!pte.p) {
stop();
return pageFault(pte.p);
}
// 4 KB page
entry.size = 4 * (1 << 10);
nextRead = ((uint64_t)pte & (mask(20) << 12)) + vaddr.norml2 * size;
nextState = PTE;
break;
nextState = PTE;
break;
case PTE:
doWrite = !pte.a;
pte.a = 1;
entry.writable = pte.w;
entry.user = pte.u;
if (!pte.p)
panic("Page at %#x not present!\n", entry.vaddr);
if (!pte.p) {
stop();
return pageFault(pte.p);
}
entry.paddr = (uint64_t)pte & (mask(20) << 12);
entry.uncacheable = uncacheable;
entry.global = pte.g;
entry.patBit = bits(pte, 7);
entry.vaddr = entry.vaddr & ~((4 * (1 << 10)) - 1);
tlb->insert(entry.vaddr, entry);
nextState = Ready;
delete read->req;
delete read;
read = NULL;
return;
stop();
return NoFault;
default:
panic("Unknown page table walker state %d!\n");
}
@ -316,16 +304,21 @@ Walker::doNext(PacketPtr &read, PacketPtr &write)
delete oldRead->req;
delete oldRead;
}
return NoFault;
}
void
Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
Fault
Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
RequestPtr _req, bool _write, bool _execute)
{
assert(state == Ready);
assert(!tc);
tc = _tc;
req = _req;
Addr vaddr = req->getVaddr();
execute = _execute;
write = _write;
translation = _translation;
VAddr addr = vaddr;
@ -339,6 +332,7 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
// Do long mode.
state = LongPML4;
top = (cr3.longPdtb << 12) + addr.longl4 * size;
enableNX = efer.nxe;
} else {
// We're in some flavor of legacy mode.
CR4 cr4 = tc->readMiscRegNoEffect(MISCREG_CR4);
@ -346,6 +340,7 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
// Do legacy PAE.
state = PAEPDP;
top = (cr3.paePdtb << 5) + addr.pael3 * size;
enableNX = efer.nxe;
} else {
size = 4;
top = (cr3.pdtb << 12) + addr.norml2 * size;
@ -356,14 +351,13 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
// Do legacy non PSE.
state = PD;
}
enableNX = false;
}
}
nextState = Ready;
entry.vaddr = vaddr;
enableNX = efer.nxe;
Request::Flags flags = Request::PHYSICAL;
if (cr3.pcd)
flags.set(Request::UNCACHEABLE);
@ -372,13 +366,15 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
read->allocate();
Enums::MemoryMode memMode = sys->getMemoryMode();
if (memMode == Enums::timing) {
tc->suspend();
timingFault = NoFault;
port.sendTiming(read);
} else if (memMode == Enums::atomic) {
Fault fault;
do {
port.sendAtomic(read);
PacketPtr write = NULL;
doNext(read, write);
fault = doNext(read, write);
assert(fault == NoFault || read == NULL);
state = nextState;
nextState = Ready;
if (write)
@ -387,9 +383,11 @@ Walker::start(ThreadContext * _tc, Addr vaddr, bool _write, bool _execute)
tc = NULL;
state = Ready;
nextState = Waiting;
return fault;
} else {
panic("Unrecognized memory system mode.\n");
}
return NoFault;
}
bool
@ -410,9 +408,10 @@ Walker::recvTiming(PacketPtr pkt)
state = nextState;
nextState = Ready;
PacketPtr write = NULL;
doNext(pkt, write);
timingFault = doNext(pkt, write);
state = Waiting;
read = pkt;
assert(timingFault == NoFault || read == NULL);
if (write) {
writes.push_back(write);
}
@ -421,10 +420,27 @@ Walker::recvTiming(PacketPtr pkt)
sendPackets();
}
if (inflight == 0 && read == NULL && writes.size() == 0) {
tc->activate(0);
tc = NULL;
state = Ready;
nextState = Waiting;
if (timingFault == NoFault) {
/*
* Finish the translation. Now that we now the right entry is
* in the TLB, this should work with no memory accesses.
* There could be new faults unrelated to the table walk like
* permissions violations, so we'll need the return value as
* well.
*/
bool delayedResponse;
Fault fault = tlb->translate(req, tc, NULL, write, execute,
delayedResponse, true);
assert(!delayedResponse);
// Let the CPU continue.
translation->finish(fault, req, tc, write);
} else {
// There was a fault during the walk. Let the CPU know.
translation->finish(timingFault, req, tc, write);
}
}
} else if (pkt->wasNacked()) {
pkt->reinitNacked();
@ -525,6 +541,14 @@ Walker::getPort(const std::string &if_name, int idx)
panic("No page table walker port named %s!\n", if_name);
}
Fault
Walker::pageFault(bool present)
{
HandyM5Reg m5reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
return new PageFault(entry.vaddr, present, write,
m5reg.cpl == 3, false, execute && enableNX);
}
}
X86ISA::Walker *

View file

@ -91,11 +91,22 @@ namespace X86ISA
// if the machine is finished, or points to a packet to initiate
// the next read. If any write is required to update an "accessed"
// bit, write will point to a packet to do the write. Otherwise it
// will be NULL.
void doNext(PacketPtr &read, PacketPtr &write);
// will be NULL. The return value is whatever fault was incurred
// during this stage of the lookup.
Fault doNext(PacketPtr &read, PacketPtr &write);
// Kick off the state machine.
void start(ThreadContext * _tc, Addr vaddr, bool write, bool execute);
Fault start(ThreadContext * _tc, BaseTLB::Translation *translation,
RequestPtr req, bool write, bool execute);
// Clean up after the state machine.
void
stop()
{
nextState = Ready;
delete read->req;
delete read;
read = NULL;
}
protected:
@ -110,6 +121,11 @@ namespace X86ISA
bool retrying;
/*
* The fault, if any, that's waiting to be delivered in timing mode.
*/
Fault timingFault;
/*
* Functions for dealing with packets.
*/
@ -156,16 +172,18 @@ namespace X86ISA
// The TLB we're supposed to load.
TLB * tlb;
System * sys;
BaseTLB::Translation * translation;
/*
* State machine state.
*/
ThreadContext * tc;
RequestPtr req;
State state;
State nextState;
int size;
bool enableNX;
bool write, execute;
bool write, execute, user;
TlbEntry entry;
Fault pageFault(bool present);

View file

@ -72,6 +72,9 @@
#if FULL_SYSTEM
#include "arch/x86/pagetable_walker.hh"
#else
#include "mem/page_table.hh"
#include "sim/process.hh"
#endif
namespace X86ISA {
@ -90,7 +93,7 @@ TLB::TLB(const Params *p) : BaseTLB(p), configAddress(0), size(p->size)
#endif
}
void
TlbEntry *
TLB::insert(Addr vpn, TlbEntry &entry)
{
//TODO Deal with conflicting entries
@ -106,6 +109,7 @@ TLB::insert(Addr vpn, TlbEntry &entry)
*newEntry = entry;
newEntry->vaddr = vpn;
entryList.push_front(newEntry);
return newEntry;
}
TLB::EntryList::iterator
@ -138,14 +142,6 @@ TLB::lookup(Addr va, bool update_lru)
return *entry;
}
#if FULL_SYSTEM
void
TLB::walk(ThreadContext * _tc, Addr vaddr, bool write, bool execute)
{
walker->start(_tc, vaddr, write, execute);
}
#endif
void
TLB::invalidateAll()
{
@ -188,11 +184,12 @@ TLB::demapPage(Addr va, uint64_t asn)
}
}
template<class TlbFault>
Fault
TLB::translateAtomic(RequestPtr req, ThreadContext *tc,
bool write, bool execute)
TLB::translate(RequestPtr req, ThreadContext *tc,
Translation *translation, bool write, bool execute,
bool &delayedResponse, bool timing)
{
delayedResponse = false;
Addr vaddr = req->getVaddr();
DPRINTF(TLB, "Translating vaddr %#x.\n", vaddr);
uint32_t flags = req->getFlags();
@ -617,14 +614,45 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc,
// The vaddr already has the segment base applied.
TlbEntry *entry = lookup(vaddr);
if (!entry) {
return new TlbFault(vaddr, write, execute);
} else {
// Do paging protection checks.
DPRINTF(TLB, "Entry found with paddr %#x, doing protection checks.\n", entry->paddr);
Addr paddr = entry->paddr | (vaddr & (entry->size-1));
DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr);
req->setPaddr(paddr);
#if FULL_SYSTEM
Fault fault = walker->start(tc, translation, req,
write, execute);
if (timing || fault != NoFault) {
// This gets ignored in atomic mode.
delayedResponse = true;
return fault;
}
entry = lookup(vaddr);
assert(entry);
#else
DPRINTF(TLB, "Handling a TLB miss for "
"address %#x at pc %#x.\n",
vaddr, tc->readPC());
Process *p = tc->getProcessPtr();
TlbEntry newEntry;
bool success = p->pTable->lookup(vaddr, newEntry);
if(!success && !execute) {
p->checkAndAllocNextPage(vaddr);
success = p->pTable->lookup(vaddr, newEntry);
}
if(!success) {
panic("Tried to execute unmapped address %#x.\n", vaddr);
} else {
Addr alignedVaddr = p->pTable->pageAlign(vaddr);
DPRINTF(TLB, "Mapping %#x to %#x\n", alignedVaddr,
newEntry.pageStart());
entry = insert(alignedVaddr, newEntry);
}
DPRINTF(TLB, "Miss was serviced.\n");
#endif
}
// Do paging protection checks.
DPRINTF(TLB, "Entry found with paddr %#x, "
"doing protection checks.\n", entry->paddr);
Addr paddr = entry->paddr | (vaddr & (entry->size-1));
DPRINTF(TLB, "Translated %#x -> %#x.\n", vaddr, paddr);
req->setPaddr(paddr);
} else {
//Use the address which already has segmentation applied.
DPRINTF(TLB, "Paging disabled.\n");
@ -665,29 +693,41 @@ TLB::translateAtomic(RequestPtr req, ThreadContext *tc,
Fault
DTB::translateAtomic(RequestPtr req, ThreadContext *tc, bool write)
{
return TLB::translateAtomic<FakeDTLBFault>(req, tc, write, false);
bool delayedResponse;
return TLB::translate(req, tc, NULL, write,
false, delayedResponse, false);
}
void
DTB::translateTiming(RequestPtr req, ThreadContext *tc,
Translation *translation, bool write)
{
bool delayedResponse;
assert(translation);
translation->finish(translateAtomic(req, tc, write), req, tc, write);
Fault fault = TLB::translate(req, tc, translation,
write, false, delayedResponse, true);
if (!delayedResponse)
translation->finish(fault, req, tc, write);
}
Fault
ITB::translateAtomic(RequestPtr req, ThreadContext *tc)
{
return TLB::translateAtomic<FakeITLBFault>(req, tc, false, true);
bool delayedResponse;
return TLB::translate(req, tc, NULL, false,
true, delayedResponse, false);
}
void
ITB::translateTiming(RequestPtr req, ThreadContext *tc,
Translation *translation)
{
bool delayedResponse;
assert(translation);
translation->finish(translateAtomic(req, tc), req, tc, false);
Fault fault = TLB::translate(req, tc, translation,
false, true, delayedResponse, true);
if (!delayedResponse)
translation->finish(fault, req, tc, false);
}
#if FULL_SYSTEM

View file

@ -87,8 +87,7 @@ namespace X86ISA
class TLB : public BaseTLB
{
protected:
friend class FakeITLBFault;
friend class FakeDTLBFault;
friend class Walker;
typedef std::list<TlbEntry *> EntryList;
@ -118,8 +117,6 @@ namespace X86ISA
protected:
Walker * walker;
void walk(ThreadContext * _tc, Addr vaddr, bool write, bool execute);
#endif
public:
@ -137,15 +134,13 @@ namespace X86ISA
EntryList freeList;
EntryList entryList;
template<class TlbFault>
Fault translateAtomic(RequestPtr req, ThreadContext *tc,
bool write, bool execute);
void translateTiming(RequestPtr req, ThreadContext *tc,
Translation *translation, bool write, bool execute);
Fault translate(RequestPtr req, ThreadContext *tc,
Translation *translation, bool write, bool execute,
bool &delayedResponse, bool timing);
public:
void insert(Addr vpn, TlbEntry &entry);
TlbEntry * insert(Addr vpn, TlbEntry &entry);
// Checkpointing
virtual void serialize(std::ostream &os);