From f743bdcb6994a4e0e413e1a3499f7424b5ec2efa Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Sun, 23 Nov 2014 18:01:08 -0800 Subject: [PATCH] x86: Segment initialization to support KvmCPU in SE This patch sets up low and high privilege code and data segments and places them in the following order: cs low, ds low, ds, cs, in the GDT. Additionally, a syscall and page fault handler for KvmCPU in SE mode are defined. The order of the segment selectors in GDT is required in this manner for interrupt handling to work properly. Segment initialization is done for all the thread contexts. --- src/arch/x86/process.cc | 473 +++++++++++++++++++++++++++++++++----- src/arch/x86/regs/misc.hh | 58 +++++ src/arch/x86/system.cc | 54 ++--- src/arch/x86/system.hh | 8 + src/sim/Process.py | 1 + src/sim/process.cc | 1 + src/sim/process.hh | 3 + 7 files changed, 504 insertions(+), 94 deletions(-) diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc index fc5a88888..98e7ea150 100644 --- a/src/arch/x86/process.cc +++ b/src/arch/x86/process.cc @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014 Advanced Micro Devices, Inc. * Copyright (c) 2007 The Hewlett-Packard Development Company * All rights reserved. * @@ -45,6 +46,7 @@ #include "arch/x86/regs/segment.hh" #include "arch/x86/isa_traits.hh" #include "arch/x86/process.hh" +#include "arch/x86/system.hh" #include "arch/x86/types.hh" #include "base/loader/elf_object.hh" #include "base/loader/object_file.hh" @@ -52,6 +54,7 @@ #include "base/trace.hh" #include "cpu/thread_context.hh" #include "debug/Stack.hh" +#include "mem/multi_level_page_table.hh" #include "mem/page_table.hh" #include "sim/process_impl.hh" #include "sim/syscall_emul.hh" @@ -183,73 +186,418 @@ X86_64LiveProcess::initState() initVirtMem.writeBlob(vsyscallPage.base + vsyscallPage.vgettimeofdayOffset, vgettimeofdayBlob, sizeof(vgettimeofdayBlob)); - for (int i = 0; i < contextIds.size(); i++) { - ThreadContext * tc = system->getThreadContext(contextIds[i]); + if (kvmInSE) { + PortProxy physProxy = system->physProxy; - SegAttr dataAttr = 0; - dataAttr.dpl = 3; - dataAttr.unusable = 0; - dataAttr.defaultSize = 1; - dataAttr.longMode = 1; - dataAttr.avl = 0; - dataAttr.granularity = 1; - dataAttr.present = 1; - dataAttr.type = 3; - dataAttr.writable = 1; - dataAttr.readable = 1; - dataAttr.expandDown = 0; - dataAttr.system = 1; + /* + * Set up the gdt. + */ + uint8_t numGDTEntries = 0; + uint64_t nullDescriptor = 0; + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&nullDescriptor), 8); + numGDTEntries++; - //Initialize the segment registers. - for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) { - tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); - tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); - tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); + SegDescriptor initDesc = 0; + initDesc.type.codeOrData = 0; // code or data type + initDesc.type.c = 0; // conforming + initDesc.type.r = 1; // readable + initDesc.dpl = 0; // privilege + initDesc.p = 1; // present + initDesc.l = 1; // longmode - 64 bit + initDesc.d = 0; // operand size + initDesc.g = 1; // granularity + initDesc.s = 1; // system segment + initDesc.limitHigh = 0xFFFF; + initDesc.limitLow = 0xF; + initDesc.baseHigh = 0x0; + initDesc.baseLow = 0x0; + + //64 bit code segment + SegDescriptor csLowPLDesc = initDesc; + csLowPLDesc.type.codeOrData = 1; + csLowPLDesc.dpl = 0; + uint64_t csLowPLDescVal = csLowPLDesc; + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&csLowPLDescVal), 8); + + numGDTEntries++; + + SegSelector csLowPL = 0; + csLowPL.si = numGDTEntries - 1; + csLowPL.rpl = 0; + + //64 bit data segment + SegDescriptor dsLowPLDesc = initDesc; + dsLowPLDesc.type.codeOrData = 0; + dsLowPLDesc.dpl = 0; + uint64_t dsLowPLDescVal = dsLowPLDesc; + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&dsLowPLDescVal), 8); + + numGDTEntries++; + + SegSelector dsLowPL = 0; + dsLowPL.si = numGDTEntries - 1; + dsLowPL.rpl = 0; + + //64 bit data segment + SegDescriptor dsDesc = initDesc; + dsDesc.type.codeOrData = 0; + dsDesc.dpl = 3; + uint64_t dsDescVal = dsDesc; + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&dsDescVal), 8); + + numGDTEntries++; + + SegSelector ds = 0; + ds.si = numGDTEntries - 1; + ds.rpl = 3; + + //64 bit code segment + SegDescriptor csDesc = initDesc; + csDesc.type.codeOrData = 1; + csDesc.dpl = 3; + uint64_t csDescVal = csDesc; + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&csDescVal), 8); + + numGDTEntries++; + + SegSelector cs = 0; + cs.si = numGDTEntries - 1; + cs.rpl = 3; + + SegSelector scall = 0; + scall.si = csLowPL.si; + scall.rpl = 0; + + SegSelector sret = 0; + sret.si = dsLowPL.si; + sret.rpl = 3; + + /* In long mode the TSS has been extended to 16 Bytes */ + TSSlow TSSDescLow = 0; + TSSDescLow.type = 0xB; + TSSDescLow.dpl = 0; // Privelege level 0 + TSSDescLow.p = 1; // Present + TSSDescLow.g = 1; // Page granularity + TSSDescLow.limitHigh = 0xF; + TSSDescLow.limitLow = 0xFFFF; + TSSDescLow.baseLow = (((uint32_t)TSSVirtAddr) << 8) >> 8; + TSSDescLow.baseHigh = (uint8_t)(((uint32_t)TSSVirtAddr) >> 24); + + TSShigh TSSDescHigh = 0; + TSSDescHigh.base = (uint32_t)(TSSVirtAddr >> 32); + + struct TSSDesc { + uint64_t low; + uint64_t high; + } tssDescVal = {TSSDescLow, TSSDescHigh}; + + physProxy.writeBlob(GDTPhysAddr + numGDTEntries * 8, + (uint8_t *)(&tssDescVal), sizeof(tssDescVal)); + + numGDTEntries++; + + SegSelector tssSel = 0; + tssSel.si = numGDTEntries - 1; + + uint64_t tss_base_addr = (TSSDescHigh.base << 32) | ((TSSDescLow.baseHigh << 24) | TSSDescLow.baseLow); + uint64_t tss_limit = TSSDescLow.limitLow | (TSSDescLow.limitHigh << 16); + + SegAttr tss_attr = 0; + + tss_attr.type = TSSDescLow.type; + tss_attr.dpl = TSSDescLow.dpl; + tss_attr.present = TSSDescLow.p; + tss_attr.granularity = TSSDescLow.g; + tss_attr.unusable = 0; + + for (int i = 0; i < contextIds.size(); i++) { + ThreadContext * tc = system->getThreadContext(contextIds[i]); + + tc->setMiscReg(MISCREG_CS, (MiscReg)cs); + tc->setMiscReg(MISCREG_DS, (MiscReg)ds); + tc->setMiscReg(MISCREG_ES, (MiscReg)ds); + tc->setMiscReg(MISCREG_FS, (MiscReg)ds); + tc->setMiscReg(MISCREG_GS, (MiscReg)ds); + tc->setMiscReg(MISCREG_SS, (MiscReg)ds); + + // LDT + tc->setMiscReg(MISCREG_TSL, 0); + SegAttr tslAttr = 0; + tslAttr.present = 1; + tslAttr.type = 2; + tc->setMiscReg(MISCREG_TSL_ATTR, tslAttr); + + tc->setMiscReg(MISCREG_TSG_BASE, GDTVirtAddr); + tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1); + + tc->setMiscReg(MISCREG_TR, (MiscReg)tssSel); + tc->setMiscReg(MISCREG_SEG_BASE(SYS_SEGMENT_REG_TR), tss_base_addr); + tc->setMiscReg(MISCREG_SEG_EFF_BASE(SYS_SEGMENT_REG_TR), 0); + tc->setMiscReg(MISCREG_SEG_LIMIT(SYS_SEGMENT_REG_TR), tss_limit); + tc->setMiscReg(MISCREG_SEG_ATTR(SYS_SEGMENT_REG_TR), (MiscReg)tss_attr); + + //Start using longmode segments. + installSegDesc(tc, SEGMENT_REG_CS, csDesc, true); + installSegDesc(tc, SEGMENT_REG_DS, dsDesc, true); + installSegDesc(tc, SEGMENT_REG_ES, dsDesc, true); + installSegDesc(tc, SEGMENT_REG_FS, dsDesc, true); + installSegDesc(tc, SEGMENT_REG_GS, dsDesc, true); + installSegDesc(tc, SEGMENT_REG_SS, dsDesc, true); + + Efer efer = 0; + efer.sce = 1; // Enable system call extensions. + efer.lme = 1; // Enable long mode. + efer.lma = 1; // Activate long mode. + efer.nxe = 0; // Enable nx support. + efer.svme = 1; // Enable svm support for now. + efer.ffxsr = 0; // Turn on fast fxsave and fxrstor. + tc->setMiscReg(MISCREG_EFER, efer); + + //Set up the registers that describe the operating mode. + CR0 cr0 = 0; + cr0.pg = 1; // Turn on paging. + cr0.cd = 0; // Don't disable caching. + cr0.nw = 0; // This is bit is defined to be ignored. + cr0.am = 1; // No alignment checking + cr0.wp = 1; // Supervisor mode can write read only pages + cr0.ne = 1; + cr0.et = 1; // This should always be 1 + cr0.ts = 0; // We don't do task switching, so causing fp exceptions + // would be pointless. + cr0.em = 0; // Allow x87 instructions to execute natively. + cr0.mp = 1; // This doesn't really matter, but the manual suggests + // setting it to one. + cr0.pe = 1; // We're definitely in protected mode. + tc->setMiscReg(MISCREG_CR0, cr0); + + CR0 cr2 = 0; + tc->setMiscReg(MISCREG_CR2, cr2); + + CR3 cr3 = pageTablePhysAddr; + tc->setMiscReg(MISCREG_CR3, cr3); + + CR4 cr4 = 0; + //Turn on pae. + cr4.osxsave = 1; // Enable XSAVE and Proc Extended States + cr4.osxmmexcpt = 1; // Operating System Unmasked Exception + cr4.osfxsr = 1; // Operating System FXSave/FSRSTOR Support + cr4.pce = 0; // Performance-Monitoring Counter Enable + cr4.pge = 0; // Page-Global Enable + cr4.mce = 0; // Machine Check Enable + cr4.pae = 1; // Physical-Address Extension + cr4.pse = 0; // Page Size Extensions + cr4.de = 0; // Debugging Extensions + cr4.tsd = 0; // Time Stamp Disable + cr4.pvi = 0; // Protected-Mode Virtual Interrupts + cr4.vme = 0; // Virtual-8086 Mode Extensions + + tc->setMiscReg(MISCREG_CR4, cr4); + + CR4 cr8 = 0; + tc->setMiscReg(MISCREG_CR8, cr8); + + const Addr PageMapLevel4 = pageTablePhysAddr; + //Point to the page tables. + tc->setMiscReg(MISCREG_CR3, PageMapLevel4); + + tc->setMiscReg(MISCREG_MXCSR, 0x1f80); + + tc->setMiscReg(MISCREG_APIC_BASE, 0xfee00900); + + tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_TSG - MISCREG_SEG_SEL_BASE), GDTVirtAddr); + tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_TSG - MISCREG_SEG_SEL_BASE), 0xffff); + + tc->setMiscReg(MISCREG_SEG_BASE(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), IDTVirtAddr); + tc->setMiscReg(MISCREG_SEG_LIMIT(MISCREG_IDTR - MISCREG_SEG_SEL_BASE), 0xffff); + + /* enabling syscall and sysret */ + MiscReg star = ((MiscReg)sret << 48) | ((MiscReg)scall << 32); + tc->setMiscReg(MISCREG_STAR, star); + MiscReg lstar = (MiscReg) syscallCodeVirtAddr; + tc->setMiscReg(MISCREG_LSTAR, lstar); + MiscReg sfmask = (1<<8) | (1<<10); // TF | DF + tc->setMiscReg(MISCREG_SF_MASK, sfmask); } - SegAttr csAttr = 0; - csAttr.dpl = 3; - csAttr.unusable = 0; - csAttr.defaultSize = 0; - csAttr.longMode = 1; - csAttr.avl = 0; - csAttr.granularity = 1; - csAttr.present = 1; - csAttr.type = 10; - csAttr.writable = 0; - csAttr.readable = 1; - csAttr.expandDown = 0; - csAttr.system = 1; + /* + * Setting up the content of the TSS + * and writting it to physical memory + */ - tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); + struct { + uint32_t reserved0; // +00h + uint32_t RSP0_low; // +04h + uint32_t RSP0_high; // +08h + uint32_t RSP1_low; // +0Ch + uint32_t RSP1_high; // +10h + uint32_t RSP2_low; // +14h + uint32_t RSP2_high; // +18h + uint32_t reserved1; // +1Ch + uint32_t reserved2; // +20h + uint32_t IST1_low; // +24h + uint32_t IST1_high; // +28h + uint32_t IST2_low; // +2Ch + uint32_t IST2_high; // +30h + uint32_t IST3_low; // +34h + uint32_t IST3_high; // +38h + uint32_t IST4_low; // +3Ch + uint32_t IST4_high; // +40h + uint32_t IST5_low; // +44h + uint32_t IST5_high; // +48h + uint32_t IST6_low; // +4Ch + uint32_t IST6_high; // +50h + uint32_t IST7_low; // +54h + uint32_t IST7_high; // +58h + uint32_t reserved3; // +5Ch + uint32_t reserved4; // +60h + uint16_t reserved5; // +64h + uint16_t IO_MapBase; // +66h + } tss; - Efer efer = 0; - efer.sce = 1; // Enable system call extensions. - efer.lme = 1; // Enable long mode. - efer.lma = 1; // Activate long mode. - efer.nxe = 1; // Enable nx support. - efer.svme = 0; // Disable svm support for now. It isn't implemented. - efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. - tc->setMiscReg(MISCREG_EFER, efer); + /** setting Interrupt Stack Table */ + uint64_t IST_start = ISTVirtAddr + PageBytes; + tss.IST1_low = (uint32_t)IST_start; + tss.IST1_high = (uint32_t)(IST_start >> 32); + tss.RSP0_low = tss.IST1_low; + tss.RSP0_high = tss.IST1_high; + tss.RSP1_low = tss.IST1_low; + tss.RSP1_high = tss.IST1_high; + tss.RSP2_low = tss.IST1_low; + tss.RSP2_high = tss.IST1_high; + physProxy.writeBlob(TSSPhysAddr, (uint8_t *)(&tss), sizeof(tss)); - //Set up the registers that describe the operating mode. - CR0 cr0 = 0; - cr0.pg = 1; // Turn on paging. - cr0.cd = 0; // Don't disable caching. - cr0.nw = 0; // This is bit is defined to be ignored. - cr0.am = 0; // No alignment checking - cr0.wp = 0; // Supervisor mode can write read only pages - cr0.ne = 1; - cr0.et = 1; // This should always be 1 - cr0.ts = 0; // We don't do task switching, so causing fp exceptions - // would be pointless. - cr0.em = 0; // Allow x87 instructions to execute natively. - cr0.mp = 1; // This doesn't really matter, but the manual suggests - // setting it to one. - cr0.pe = 1; // We're definitely in protected mode. - tc->setMiscReg(MISCREG_CR0, cr0); + /* Setting IDT gates */ + GateDescriptorLow PFGateLow = 0; + PFGateLow.offsetHigh = (uint16_t)((uint32_t)PFHandlerVirtAddr >> 16); + PFGateLow.offsetLow = (uint16_t)PFHandlerVirtAddr; + PFGateLow.selector = (MiscReg)csLowPL; + PFGateLow.p = 1; + PFGateLow.dpl = 0; + PFGateLow.type = 0xe; // gate interrupt type + PFGateLow.IST = 0; // setting IST to 0 and using RSP0 - tc->setMiscReg(MISCREG_MXCSR, 0x1f80); + GateDescriptorHigh PFGateHigh = 0; + PFGateHigh.offset = (uint32_t)(PFHandlerVirtAddr >> 32); + + struct { + uint64_t low; + uint64_t high; + } PFGate = {PFGateLow, PFGateHigh}; + + physProxy.writeBlob(IDTPhysAddr + 0xE0, + (uint8_t *)(&PFGate), sizeof(PFGate)); + + /** System call handler */ + uint8_t syscallBlob[] = { + 0x48,0xa3,0x00,0x60,0x00, + 0x00,0x00,0xc9,0xff,0xff, // mov %rax, (0xffffc90000005600) + 0x48,0x0f,0x07, // sysret + }; + + physProxy.writeBlob(syscallCodePhysAddr, + syscallBlob, sizeof(syscallBlob)); + + /** Page fault handler */ + uint8_t faultBlob[] = { + 0x48,0xa3,0x00,0x61,0x00, + 0x00,0x00,0xc9,0xff,0xff, // mov %rax, (0xffffc90000005700) + 0x48,0x83,0xc4,0x08, // add $0x8,%rsp # skip error + 0x48,0xcf, // iretq + }; + + physProxy.writeBlob(PFHandlerPhysAddr, faultBlob, sizeof(faultBlob)); + + MultiLevelPageTable *pt = + dynamic_cast *>(pTable); + + /* Syscall handler */ + pt->map(syscallCodeVirtAddr, syscallCodePhysAddr, PageBytes, false); + /* GDT */ + pt->map(GDTVirtAddr, GDTPhysAddr, PageBytes, false); + /* IDT */ + pt->map(IDTVirtAddr, IDTPhysAddr, PageBytes, false); + /* TSS */ + pt->map(TSSVirtAddr, TSSPhysAddr, PageBytes, false); + /* IST */ + pt->map(ISTVirtAddr, ISTPhysAddr, PageBytes, false); + /* PF handler */ + pt->map(PFHandlerVirtAddr, PFHandlerPhysAddr, PageBytes, false); + /* MMIO region for m5ops */ + pt->map(MMIORegionVirtAddr, MMIORegionPhysAddr, 16*PageBytes, false); + } else { + for (int i = 0; i < contextIds.size(); i++) { + ThreadContext * tc = system->getThreadContext(contextIds[i]); + + SegAttr dataAttr = 0; + dataAttr.dpl = 3; + dataAttr.unusable = 0; + dataAttr.defaultSize = 1; + dataAttr.longMode = 1; + dataAttr.avl = 0; + dataAttr.granularity = 1; + dataAttr.present = 1; + dataAttr.type = 3; + dataAttr.writable = 1; + dataAttr.readable = 1; + dataAttr.expandDown = 0; + dataAttr.system = 1; + + //Initialize the segment registers. + for(int seg = 0; seg < NUM_SEGMENTREGS; seg++) { + tc->setMiscRegNoEffect(MISCREG_SEG_BASE(seg), 0); + tc->setMiscRegNoEffect(MISCREG_SEG_EFF_BASE(seg), 0); + tc->setMiscRegNoEffect(MISCREG_SEG_ATTR(seg), dataAttr); + } + + SegAttr csAttr = 0; + csAttr.dpl = 3; + csAttr.unusable = 0; + csAttr.defaultSize = 0; + csAttr.longMode = 1; + csAttr.avl = 0; + csAttr.granularity = 1; + csAttr.present = 1; + csAttr.type = 10; + csAttr.writable = 0; + csAttr.readable = 1; + csAttr.expandDown = 0; + csAttr.system = 1; + + tc->setMiscRegNoEffect(MISCREG_CS_ATTR, csAttr); + + Efer efer = 0; + efer.sce = 1; // Enable system call extensions. + efer.lme = 1; // Enable long mode. + efer.lma = 1; // Activate long mode. + efer.nxe = 1; // Enable nx support. + efer.svme = 0; // Disable svm support for now. It isn't implemented. + efer.ffxsr = 1; // Turn on fast fxsave and fxrstor. + tc->setMiscReg(MISCREG_EFER, efer); + + //Set up the registers that describe the operating mode. + CR0 cr0 = 0; + cr0.pg = 1; // Turn on paging. + cr0.cd = 0; // Don't disable caching. + cr0.nw = 0; // This is bit is defined to be ignored. + cr0.am = 0; // No alignment checking + cr0.wp = 0; // Supervisor mode can write read only pages + cr0.ne = 1; + cr0.et = 1; // This should always be 1 + cr0.ts = 0; // We don't do task switching, so causing fp exceptions + // would be pointless. + cr0.em = 0; // Allow x87 instructions to execute natively. + cr0.mp = 1; // This doesn't really matter, but the manual suggests + // setting it to one. + cr0.pe = 1; // We're definitely in protected mode. + tc->setMiscReg(MISCREG_CR0, cr0); + + tc->setMiscReg(MISCREG_MXCSR, 0x1f80); + } } } @@ -573,10 +921,13 @@ X86LiveProcess::argsInit(int pageSize, stack_min = stack_base - space_needed; stack_min = roundDown(stack_min, align); - stack_size = stack_base - stack_min; + stack_size = roundUp(stack_base - stack_min, pageSize); // map memory - allocateMem(roundDown(stack_min, pageSize), roundUp(stack_size, pageSize)); + Addr stack_end = roundDown(stack_base - stack_size, pageSize); + + DPRINTF(Stack, "Mapping the stack: 0x%x %dB\n", stack_end, stack_size); + allocateMem(stack_end, stack_size); // map out initial stack contents IntType sentry_base = stack_base - sentry_size; diff --git a/src/arch/x86/regs/misc.hh b/src/arch/x86/regs/misc.hh index 5887e7486..77f4e1bcd 100644 --- a/src/arch/x86/regs/misc.hh +++ b/src/arch/x86/regs/misc.hh @@ -613,6 +613,9 @@ namespace X86ISA EndBitUnion(CR3) BitUnion64(CR4) + Bitfield<18> osxsave; // Enable XSAVE and Proc Extended States + Bitfield<16> fsgsbase; // Enable RDFSBASE, RDGSBASE, WRFSBASE, + // WRGSBASE instructions Bitfield<10> osxmmexcpt; // Operating System Unmasked // Exception Support Bitfield<9> osfxsr; // Operating System FXSave/FSRSTOR Support @@ -885,6 +888,44 @@ namespace X86ISA EndSubBitUnion(type) EndBitUnion(SegDescriptor) + /** + * TSS Descriptor (long mode - 128 bits) + * the lower 64 bits + */ + BitUnion64(TSSlow) + Bitfield<63, 56> baseHigh; + Bitfield<39, 16> baseLow; + Bitfield<55> g; // Granularity + Bitfield<52> avl; // Available To Software + Bitfield<51, 48> limitHigh; + Bitfield<15, 0> limitLow; + Bitfield<47> p; // Present + Bitfield<46, 45> dpl; // Descriptor Privilege-Level + SubBitUnion(type, 43, 40) + // Specifies whether this descriptor is for code or data. + Bitfield<43> codeOrData; + + // These bit fields are for code segments + Bitfield<42> c; // Conforming + Bitfield<41> r; // Readable + + // These bit fields are for data segments + Bitfield<42> e; // Expand-Down + Bitfield<41> w; // Writable + + // This is used for both code and data segments. + Bitfield<40> a; // Accessed + EndSubBitUnion(type) + EndBitUnion(TSSlow) + + /** + * TSS Descriptor (long mode - 128 bits) + * the upper 64 bits + */ + BitUnion64(TSShigh) + Bitfield<31, 0> base; + EndBitUnion(TSShigh) + BitUnion64(SegAttr) Bitfield<1, 0> dpl; Bitfield<2> unusable; @@ -910,6 +951,23 @@ namespace X86ISA Bitfield<36, 32> count; // Parameter Count EndBitUnion(GateDescriptor) + /** + * Long Mode Gate Descriptor + */ + BitUnion64(GateDescriptorLow) + Bitfield<63, 48> offsetHigh; // Target Code-Segment Offset + Bitfield<47> p; // Present + Bitfield<46, 45> dpl; // Descriptor Privilege-Level + Bitfield<43, 40> type; + Bitfield<35, 32> IST; // IST pointer to TSS -- new stack for exception handling + Bitfield<31, 16> selector; // Target Code-Segment Selector + Bitfield<15, 0> offsetLow; // Target Code-Segment Offset + EndBitUnion(GateDescriptorLow) + + BitUnion64(GateDescriptorHigh) + Bitfield<31, 0> offset; // Target Code-Segment Offset + EndBitUnion(GateDescriptorHigh) + /** * Descriptor-Table Registers */ diff --git a/src/arch/x86/system.cc b/src/arch/x86/system.cc index 5d214cb37..d8ea274a6 100644 --- a/src/arch/x86/system.cc +++ b/src/arch/x86/system.cc @@ -63,8 +63,8 @@ X86System::X86System(Params *p) : { } -static void -installSegDesc(ThreadContext *tc, SegmentRegIndex seg, +void +X86ISA::installSegDesc(ThreadContext *tc, SegmentRegIndex seg, SegDescriptor desc, bool longmode) { uint64_t base = desc.baseLow + (desc.baseHigh << 24); @@ -151,19 +151,25 @@ X86System::initState() (uint8_t *)(&nullDescriptor), 8); numGDTEntries++; + SegDescriptor initDesc = 0; + initDesc.type.codeOrData = 0; // code or data type + initDesc.type.c = 0; // conforming + initDesc.type.r = 1; // readable + initDesc.dpl = 0; // privilege + initDesc.p = 1; // present + initDesc.l = 1; // longmode - 64 bit + initDesc.d = 0; // operand size + initDesc.g = 1; // granularity + initDesc.s = 1; // system segment + initDesc.limitHigh = 0xFFFF; + initDesc.limitLow = 0xF; + initDesc.baseHigh = 0x0; + initDesc.baseLow = 0x0; + //64 bit code segment - SegDescriptor csDesc = 0; + SegDescriptor csDesc = initDesc; csDesc.type.codeOrData = 1; - csDesc.type.c = 0; // Not conforming - csDesc.type.r = 1; // Readable - csDesc.dpl = 0; // Privelege level 0 - csDesc.p = 1; // Present - csDesc.l = 1; // 64 bit - csDesc.d = 0; // default operand size - csDesc.g = 1; // Page granularity - csDesc.s = 1; // Not a system segment - csDesc.limitHigh = 0xF; - csDesc.limitLow = 0xFFFF; + csDesc.dpl = 0; //Because we're dealing with a pointer and I don't think it's //guaranteed that there isn't anything in a nonvirtual class between //it's beginning in memory and it's actual data, we'll use an @@ -180,17 +186,7 @@ X86System::initState() tc->setMiscReg(MISCREG_CS, (MiscReg)cs); //32 bit data segment - SegDescriptor dsDesc = 0; - dsDesc.type.codeOrData = 0; - dsDesc.type.e = 0; // Not expand down - dsDesc.type.w = 1; // Writable - dsDesc.dpl = 0; // Privelege level 0 - dsDesc.p = 1; // Present - dsDesc.d = 1; // default operand size - dsDesc.g = 1; // Page granularity - dsDesc.s = 1; // Not a system segment - dsDesc.limitHigh = 0xF; - dsDesc.limitLow = 0xFFFF; + SegDescriptor dsDesc = initDesc; uint64_t dsDescVal = dsDesc; physProxy.writeBlob(GDTBase + numGDTEntries * 8, (uint8_t *)(&dsDescVal), 8); @@ -210,15 +206,7 @@ X86System::initState() tc->setMiscReg(MISCREG_TSG_BASE, GDTBase); tc->setMiscReg(MISCREG_TSG_LIMIT, 8 * numGDTEntries - 1); - SegDescriptor tssDesc = 0; - tssDesc.type = 0xB; - tssDesc.dpl = 0; // Privelege level 0 - tssDesc.p = 1; // Present - tssDesc.d = 1; // default operand size - tssDesc.g = 1; // Page granularity - tssDesc.s = 0; - tssDesc.limitHigh = 0xF; - tssDesc.limitLow = 0xFFFF; + SegDescriptor tssDesc = initDesc; uint64_t tssDescVal = tssDesc; physProxy.writeBlob(GDTBase + numGDTEntries * 8, (uint8_t *)(&tssDescVal), 8); diff --git a/src/arch/x86/system.hh b/src/arch/x86/system.hh index e8dd2f8b3..578110a7b 100644 --- a/src/arch/x86/system.hh +++ b/src/arch/x86/system.hh @@ -43,6 +43,7 @@ #include #include +#include "arch/x86/regs/misc.hh" #include "base/loader/symtab.hh" #include "cpu/pc_event.hh" #include "kern/system_events.hh" @@ -62,6 +63,9 @@ namespace X86ISA class ConfigTable; } + void installSegDesc(ThreadContext *tc, SegmentRegIndex seg, + SegDescriptor desc, bool longmode); + /* memory mappings for KVMCpu in SE mode */ const uint64_t syscallCodeVirtAddr = 0xffff800000000000; const uint64_t syscallCodePhysAddr = 0x60000; @@ -73,6 +77,10 @@ namespace X86ISA const uint64_t TSSPhysAddr = 0x63000; const uint64_t ISTVirtAddr = 0xffff800000004000; const uint64_t ISTPhysAddr = 0x64000; + const uint64_t PFHandlerVirtAddr = 0xffff800000005000; + const uint64_t PFHandlerPhysAddr = 0x65000; + const uint64_t MMIORegionVirtAddr = 0xffffc90000000000; + const uint64_t MMIORegionPhysAddr = 0xffff0000; const uint64_t pageTablePhysAddr = 0x70000; } diff --git a/src/sim/Process.py b/src/sim/Process.py index 7e5f75363..f64ab0883 100644 --- a/src/sim/Process.py +++ b/src/sim/Process.py @@ -40,6 +40,7 @@ class Process(SimObject): system = Param.System(Parent.any, "system process will run on") useArchPT = Param.Bool('false', 'maintain an in-memory version of the page\ table in an architecture-specific format') + kvmInSE = Param.Bool('false', 'initialize the process for KvmCPU in SE') max_stack_size = Param.MemorySize('64MB', 'maximum size of the stack') @classmethod diff --git a/src/sim/process.cc b/src/sim/process.cc index d1fb22029..f53c6b850 100644 --- a/src/sim/process.cc +++ b/src/sim/process.cc @@ -107,6 +107,7 @@ Process::Process(ProcessParams * params) max_stack_size(params->max_stack_size), M5_pid(system->allocatePID()), useArchPT(params->useArchPT), + kvmInSE(params->kvmInSE), pTable(useArchPT ? static_cast(new ArchPageTable(name(), M5_pid, system)) : static_cast(new FuncPageTable(name(), M5_pid)) ), diff --git a/src/sim/process.hh b/src/sim/process.hh index 6e26bb8bd..d0865f990 100644 --- a/src/sim/process.hh +++ b/src/sim/process.hh @@ -128,6 +128,9 @@ class Process : public SimObject // flag for using architecture specific page table bool useArchPT; + // running KvmCPU in SE mode requires special initialization + bool kvmInSE; + PageTableBase* pTable; class FdMap