gem5/src/arch/alpha/isa/decoder.isa

1096 lines
44 KiB
Plaintext
Raw Normal View History

// -*- mode:c++ -*-
// Copyright (c) 2003-2006 The Regents of The University of Michigan
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Steve Reinhardt
////////////////////////////////////////////////////////////////////
//
// The actual decoder specification
//
decode OPCODE default Unknown::unknown() {
format LoadAddress {
0x08: lda({{ Ra = Rb + disp; }});
0x09: ldah({{ Ra = Rb + (disp << 16); }});
}
format LoadOrNop {
0x0a: ldbu({{ Ra.uq = Mem.ub; }});
0x0c: ldwu({{ Ra.uq = Mem.uw; }});
0x0b: ldq_u({{ Ra = Mem.uq; }}, ea_code = {{ EA = (Rb + disp) & ~7; }});
0x23: ldt({{ Fa = Mem.df; }});
0x2a: ldl_l({{ Ra.sl = Mem.sl; }}, mem_flags = LLSC);
0x2b: ldq_l({{ Ra.uq = Mem.uq; }}, mem_flags = LLSC);
}
format LoadOrPrefetch {
0x28: ldl({{ Ra.sl = Mem.sl; }});
0x29: ldq({{ Ra.uq = Mem.uq; }}, pf_flags = EVICT_NEXT);
// IsFloating flag on lds gets the prefetch to disassemble
// using f31 instead of r31... funcitonally it's unnecessary
0x22: lds({{ Fa.uq = s_to_t(Mem.ul); }},
pf_flags = PF_EXCLUSIVE, inst_flags = IsFloating);
}
format Store {
0x0e: stb({{ Mem.ub = Ra<7:0>; }});
0x0d: stw({{ Mem.uw = Ra<15:0>; }});
0x2c: stl({{ Mem.ul = Ra<31:0>; }});
0x2d: stq({{ Mem.uq = Ra.uq; }});
0x0f: stq_u({{ Mem.uq = Ra.uq; }}, {{ EA = (Rb + disp) & ~7; }});
0x26: sts({{ Mem.ul = t_to_s(Fa.uq); }});
0x27: stt({{ Mem.df = Fa; }});
}
format StoreCond {
0x2e: stl_c({{ Mem.ul = Ra<31:0>; }},
{{
Change how memory operands are handled in ISA descriptions. Should enable implementation of split-phase timing loads with new memory model. May create slight timing differences under FullCPU, as I believe we were not handling software prefetches correctly before when the split MemAcc/Exec model was used. I haven't looked into this in any detail though. arch/alpha/isa/decoder.isa: HwLoadStore format split into separate HwLoad and HwStore formats. Copy instructions now fall under MiscPrefetch format. Mem_write_result is now just write_result in store conditionals. arch/alpha/isa/mem.isa: Split MemAccExecute and LoadStoreExecute templates into separate templates for loads and stores; now that memory operands are handled differently from registers, it's impossible to have a single template serve both. Also unified the handling of "regular" prefetches (loads to r31) and "misc" prefetches (e.g., wh64) under the new scheme. It looks like SW prefetches were not handled correctly in FullCPU up til now, since we generated an execute() method for the outer instruction but didn't generate a proper method for MemAcc::execute() (instead getting a default no-op method for that). arch/alpha/isa/pal.isa: Split HwLoadStore into separate HwLoad and HwStore formats to select proper template (see change to mem.isa in this changeset). arch/isa_parser.py: Stop trying to treat memory operands like register operands, since we never used them in a uniform way anyway, and it made it impossible to do split-phase loads as needed for the new CPU model. Now there's no more 'op_mem_rd', 'op_nonmem_rd', etc.: 'op_rd' just does register operands, and the template code is responsible for formulating the call to the memory system. Right now the only thing exported by InstObjParams is a new attribute 'mem_acc_size' which gives the memory access size in bits, though more attributes can be added if needed. Also moved code in findOperands() method to OperandDescriptorList.__init__(), which is where it belongs. --HG-- extra : convert_revision : 6d53d07e0c5e828455834ded4395fa40f9146a34
2006-02-10 15:12:55 +01:00
uint64_t tmp = write_result;
// see stq_c
Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
if (tmp == 1) {
xc->setStCondFailures(0);
}
}}, mem_flags = LLSC, inst_flags = IsStoreConditional);
0x2f: stq_c({{ Mem.uq = Ra; }},
{{
Change how memory operands are handled in ISA descriptions. Should enable implementation of split-phase timing loads with new memory model. May create slight timing differences under FullCPU, as I believe we were not handling software prefetches correctly before when the split MemAcc/Exec model was used. I haven't looked into this in any detail though. arch/alpha/isa/decoder.isa: HwLoadStore format split into separate HwLoad and HwStore formats. Copy instructions now fall under MiscPrefetch format. Mem_write_result is now just write_result in store conditionals. arch/alpha/isa/mem.isa: Split MemAccExecute and LoadStoreExecute templates into separate templates for loads and stores; now that memory operands are handled differently from registers, it's impossible to have a single template serve both. Also unified the handling of "regular" prefetches (loads to r31) and "misc" prefetches (e.g., wh64) under the new scheme. It looks like SW prefetches were not handled correctly in FullCPU up til now, since we generated an execute() method for the outer instruction but didn't generate a proper method for MemAcc::execute() (instead getting a default no-op method for that). arch/alpha/isa/pal.isa: Split HwLoadStore into separate HwLoad and HwStore formats to select proper template (see change to mem.isa in this changeset). arch/isa_parser.py: Stop trying to treat memory operands like register operands, since we never used them in a uniform way anyway, and it made it impossible to do split-phase loads as needed for the new CPU model. Now there's no more 'op_mem_rd', 'op_nonmem_rd', etc.: 'op_rd' just does register operands, and the template code is responsible for formulating the call to the memory system. Right now the only thing exported by InstObjParams is a new attribute 'mem_acc_size' which gives the memory access size in bits, though more attributes can be added if needed. Also moved code in findOperands() method to OperandDescriptorList.__init__(), which is where it belongs. --HG-- extra : convert_revision : 6d53d07e0c5e828455834ded4395fa40f9146a34
2006-02-10 15:12:55 +01:00
uint64_t tmp = write_result;
// If the write operation returns 0 or 1, then
// this was a conventional store conditional,
// and the value indicates the success/failure
// of the operation. If another value is
// returned, then this was a Turbolaser
// mailbox access, and we don't update the
// result register at all.
Ra = (tmp == 0 || tmp == 1) ? tmp : Ra;
if (tmp == 1) {
// clear failure counter... this is
// non-architectural and for debugging
// only.
xc->setStCondFailures(0);
}
}}, mem_flags = LLSC, inst_flags = IsStoreConditional);
}
format IntegerOperate {
0x10: decode INTFUNC { // integer arithmetic operations
0x00: addl({{ Rc.sl = Ra.sl + Rb_or_imm.sl; }});
0x40: addlv({{
int32_t tmp = Ra.sl + Rb_or_imm.sl;
// signed overflow occurs when operands have same sign
// and sign of result does not match.
if (Ra.sl<31:> == Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc.sl = tmp;
}});
0x02: s4addl({{ Rc.sl = (Ra.sl << 2) + Rb_or_imm.sl; }});
0x12: s8addl({{ Rc.sl = (Ra.sl << 3) + Rb_or_imm.sl; }});
0x20: addq({{ Rc = Ra + Rb_or_imm; }});
0x60: addqv({{
uint64_t tmp = Ra + Rb_or_imm;
// signed overflow occurs when operands have same sign
// and sign of result does not match.
if (Ra<63:> == Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc = tmp;
}});
0x22: s4addq({{ Rc = (Ra << 2) + Rb_or_imm; }});
0x32: s8addq({{ Rc = (Ra << 3) + Rb_or_imm; }});
0x09: subl({{ Rc.sl = Ra.sl - Rb_or_imm.sl; }});
0x49: sublv({{
int32_t tmp = Ra.sl - Rb_or_imm.sl;
// signed overflow detection is same as for add,
// except we need to look at the *complemented*
// sign bit of the subtrahend (Rb), i.e., if the initial
// signs are the *same* then no overflow can occur
if (Ra.sl<31:> != Rb_or_imm.sl<31:> && tmp<31:> != Ra.sl<31:>)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc.sl = tmp;
}});
0x0b: s4subl({{ Rc.sl = (Ra.sl << 2) - Rb_or_imm.sl; }});
0x1b: s8subl({{ Rc.sl = (Ra.sl << 3) - Rb_or_imm.sl; }});
0x29: subq({{ Rc = Ra - Rb_or_imm; }});
0x69: subqv({{
uint64_t tmp = Ra - Rb_or_imm;
// signed overflow detection is same as for add,
// except we need to look at the *complemented*
// sign bit of the subtrahend (Rb), i.e., if the initial
// signs are the *same* then no overflow can occur
if (Ra<63:> != Rb_or_imm<63:> && tmp<63:> != Ra<63:>)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc = tmp;
}});
0x2b: s4subq({{ Rc = (Ra << 2) - Rb_or_imm; }});
0x3b: s8subq({{ Rc = (Ra << 3) - Rb_or_imm; }});
0x2d: cmpeq({{ Rc = (Ra == Rb_or_imm); }});
0x6d: cmple({{ Rc = (Ra.sq <= Rb_or_imm.sq); }});
0x4d: cmplt({{ Rc = (Ra.sq < Rb_or_imm.sq); }});
0x3d: cmpule({{ Rc = (Ra.uq <= Rb_or_imm.uq); }});
0x1d: cmpult({{ Rc = (Ra.uq < Rb_or_imm.uq); }});
0x0f: cmpbge({{
int hi = 7;
int lo = 0;
uint64_t tmp = 0;
for (int i = 0; i < 8; ++i) {
tmp |= (Ra.uq<hi:lo> >= Rb_or_imm.uq<hi:lo>) << i;
hi += 8;
lo += 8;
}
Rc = tmp;
}});
}
0x11: decode INTFUNC { // integer logical operations
0x00: and({{ Rc = Ra & Rb_or_imm; }});
0x08: bic({{ Rc = Ra & ~Rb_or_imm; }});
0x20: bis({{ Rc = Ra | Rb_or_imm; }});
0x28: ornot({{ Rc = Ra | ~Rb_or_imm; }});
0x40: xor({{ Rc = Ra ^ Rb_or_imm; }});
0x48: eqv({{ Rc = Ra ^ ~Rb_or_imm; }});
// conditional moves
0x14: cmovlbs({{ Rc = ((Ra & 1) == 1) ? Rb_or_imm : Rc; }});
0x16: cmovlbc({{ Rc = ((Ra & 1) == 0) ? Rb_or_imm : Rc; }});
0x24: cmoveq({{ Rc = (Ra == 0) ? Rb_or_imm : Rc; }});
0x26: cmovne({{ Rc = (Ra != 0) ? Rb_or_imm : Rc; }});
0x44: cmovlt({{ Rc = (Ra.sq < 0) ? Rb_or_imm : Rc; }});
0x46: cmovge({{ Rc = (Ra.sq >= 0) ? Rb_or_imm : Rc; }});
0x64: cmovle({{ Rc = (Ra.sq <= 0) ? Rb_or_imm : Rc; }});
0x66: cmovgt({{ Rc = (Ra.sq > 0) ? Rb_or_imm : Rc; }});
// For AMASK, RA must be R31.
0x61: decode RA {
31: amask({{ Rc = Rb_or_imm & ~ULL(0x17); }});
}
// For IMPLVER, RA must be R31 and the B operand
// must be the immediate value 1.
0x6c: decode RA {
31: decode IMM {
1: decode INTIMM {
// return EV5 for FULL_SYSTEM and EV6 otherwise
1: implver({{
#if FULL_SYSTEM
Rc = 1;
#else
Rc = 2;
#endif
}});
}
}
}
#if FULL_SYSTEM
// The mysterious 11.25...
0x25: WarnUnimpl::eleven25();
#endif
}
0x12: decode INTFUNC {
0x39: sll({{ Rc = Ra << Rb_or_imm<5:0>; }});
0x34: srl({{ Rc = Ra.uq >> Rb_or_imm<5:0>; }});
0x3c: sra({{ Rc = Ra.sq >> Rb_or_imm<5:0>; }});
0x02: mskbl({{ Rc = Ra & ~(mask( 8) << (Rb_or_imm<2:0> * 8)); }});
0x12: mskwl({{ Rc = Ra & ~(mask(16) << (Rb_or_imm<2:0> * 8)); }});
0x22: mskll({{ Rc = Ra & ~(mask(32) << (Rb_or_imm<2:0> * 8)); }});
0x32: mskql({{ Rc = Ra & ~(mask(64) << (Rb_or_imm<2:0> * 8)); }});
0x52: mskwh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra & ~(mask(16) >> (64 - 8 * bv))) : Ra;
}});
0x62: msklh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra & ~(mask(32) >> (64 - 8 * bv))) : Ra;
}});
0x72: mskqh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra & ~(mask(64) >> (64 - 8 * bv))) : Ra;
}});
0x06: extbl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))< 7:0>; }});
0x16: extwl({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<15:0>; }});
0x26: extll({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8))<31:0>; }});
0x36: extql({{ Rc = (Ra.uq >> (Rb_or_imm<2:0> * 8)); }});
0x5a: extwh({{
Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<15:0>; }});
0x6a: extlh({{
Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>)<31:0>; }});
0x7a: extqh({{
Rc = (Ra << (64 - (Rb_or_imm<2:0> * 8))<5:0>); }});
0x0b: insbl({{ Rc = Ra< 7:0> << (Rb_or_imm<2:0> * 8); }});
0x1b: inswl({{ Rc = Ra<15:0> << (Rb_or_imm<2:0> * 8); }});
0x2b: insll({{ Rc = Ra<31:0> << (Rb_or_imm<2:0> * 8); }});
0x3b: insql({{ Rc = Ra << (Rb_or_imm<2:0> * 8); }});
0x57: inswh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra.uq<15:0> >> (64 - 8 * bv)) : 0;
}});
0x67: inslh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra.uq<31:0> >> (64 - 8 * bv)) : 0;
}});
0x77: insqh({{
int bv = Rb_or_imm<2:0>;
Rc = bv ? (Ra.uq >> (64 - 8 * bv)) : 0;
}});
0x30: zap({{
uint64_t zapmask = 0;
for (int i = 0; i < 8; ++i) {
if (Rb_or_imm<i:>)
zapmask |= (mask(8) << (i * 8));
}
Rc = Ra & ~zapmask;
}});
0x31: zapnot({{
uint64_t zapmask = 0;
for (int i = 0; i < 8; ++i) {
if (!Rb_or_imm<i:>)
zapmask |= (mask(8) << (i * 8));
}
Rc = Ra & ~zapmask;
}});
}
0x13: decode INTFUNC { // integer multiplies
0x00: mull({{ Rc.sl = Ra.sl * Rb_or_imm.sl; }}, IntMultOp);
0x20: mulq({{ Rc = Ra * Rb_or_imm; }}, IntMultOp);
0x30: umulh({{
uint64_t hi, lo;
mul128(Ra, Rb_or_imm, hi, lo);
Rc = hi;
}}, IntMultOp);
0x40: mullv({{
// 32-bit multiply with trap on overflow
int64_t Rax = Ra.sl; // sign extended version of Ra.sl
int64_t Rbx = Rb_or_imm.sl;
int64_t tmp = Rax * Rbx;
// To avoid overflow, all the upper 32 bits must match
// the sign bit of the lower 32. We code this as
// checking the upper 33 bits for all 0s or all 1s.
uint64_t sign_bits = tmp<63:31>;
if (sign_bits != 0 && sign_bits != mask(33))
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc.sl = tmp<31:0>;
}}, IntMultOp);
0x60: mulqv({{
// 64-bit multiply with trap on overflow
uint64_t hi, lo;
mul128(Ra, Rb_or_imm, hi, lo);
// all the upper 64 bits must match the sign bit of
// the lower 64
if (!((hi == 0 && lo<63:> == 0) ||
(hi == mask(64) && lo<63:> == 1)))
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Rc = lo;
}}, IntMultOp);
}
0x1c: decode INTFUNC {
0x00: decode RA { 31: sextb({{ Rc.sb = Rb_or_imm< 7:0>; }}); }
0x01: decode RA { 31: sextw({{ Rc.sw = Rb_or_imm<15:0>; }}); }
0x30: ctpop({{
uint64_t count = 0;
for (int i = 0; Rb<63:i>; ++i) {
if (Rb<i:i> == 0x1)
++count;
}
Rc = count;
}}, IntAluOp);
0x31: perr({{
uint64_t temp = 0;
int hi = 7;
int lo = 0;
for (int i = 0; i < 8; ++i) {
uint8_t ra_ub = Ra.uq<hi:lo>;
uint8_t rb_ub = Rb.uq<hi:lo>;
temp += (ra_ub >= rb_ub) ?
(ra_ub - rb_ub) : (rb_ub - ra_ub);
hi += 8;
lo += 8;
}
Rc = temp;
}});
0x32: ctlz({{
uint64_t count = 0;
uint64_t temp = Rb;
if (temp<63:32>) temp >>= 32; else count += 32;
if (temp<31:16>) temp >>= 16; else count += 16;
if (temp<15:8>) temp >>= 8; else count += 8;
if (temp<7:4>) temp >>= 4; else count += 4;
if (temp<3:2>) temp >>= 2; else count += 2;
if (temp<1:1>) temp >>= 1; else count += 1;
if ((temp<0:0>) != 0x1) count += 1;
Rc = count;
}}, IntAluOp);
0x33: cttz({{
uint64_t count = 0;
uint64_t temp = Rb;
if (!(temp<31:0>)) { temp >>= 32; count += 32; }
if (!(temp<15:0>)) { temp >>= 16; count += 16; }
if (!(temp<7:0>)) { temp >>= 8; count += 8; }
if (!(temp<3:0>)) { temp >>= 4; count += 4; }
if (!(temp<1:0>)) { temp >>= 2; count += 2; }
if (!(temp<0:0> & ULL(0x1))) {
temp >>= 1; count += 1;
}
if (!(temp<0:0> & ULL(0x1))) count += 1;
Rc = count;
}}, IntAluOp);
0x34: unpkbw({{
Rc = (Rb.uq<7:0>
| (Rb.uq<15:8> << 16)
| (Rb.uq<23:16> << 32)
| (Rb.uq<31:24> << 48));
}}, IntAluOp);
0x35: unpkbl({{
Rc = (Rb.uq<7:0> | (Rb.uq<15:8> << 32));
}}, IntAluOp);
0x36: pkwb({{
Rc = (Rb.uq<7:0>
| (Rb.uq<23:16> << 8)
| (Rb.uq<39:32> << 16)
| (Rb.uq<55:48> << 24));
}}, IntAluOp);
0x37: pklb({{
Rc = (Rb.uq<7:0> | (Rb.uq<39:32> << 8));
}}, IntAluOp);
0x38: minsb8({{
uint64_t temp = 0;
int hi = 63;
int lo = 56;
for (int i = 7; i >= 0; --i) {
int8_t ra_sb = Ra.uq<hi:lo>;
int8_t rb_sb = Rb.uq<hi:lo>;
temp = ((temp << 8)
| ((ra_sb < rb_sb) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 8;
lo -= 8;
}
Rc = temp;
}});
0x39: minsw4({{
uint64_t temp = 0;
int hi = 63;
int lo = 48;
for (int i = 3; i >= 0; --i) {
int16_t ra_sw = Ra.uq<hi:lo>;
int16_t rb_sw = Rb.uq<hi:lo>;
temp = ((temp << 16)
| ((ra_sw < rb_sw) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 16;
lo -= 16;
}
Rc = temp;
}});
0x3a: minub8({{
uint64_t temp = 0;
int hi = 63;
int lo = 56;
for (int i = 7; i >= 0; --i) {
uint8_t ra_ub = Ra.uq<hi:lo>;
uint8_t rb_ub = Rb.uq<hi:lo>;
temp = ((temp << 8)
| ((ra_ub < rb_ub) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 8;
lo -= 8;
}
Rc = temp;
}});
0x3b: minuw4({{
uint64_t temp = 0;
int hi = 63;
int lo = 48;
for (int i = 3; i >= 0; --i) {
uint16_t ra_sw = Ra.uq<hi:lo>;
uint16_t rb_sw = Rb.uq<hi:lo>;
temp = ((temp << 16)
| ((ra_sw < rb_sw) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 16;
lo -= 16;
}
Rc = temp;
}});
0x3c: maxub8({{
uint64_t temp = 0;
int hi = 63;
int lo = 56;
for (int i = 7; i >= 0; --i) {
uint8_t ra_ub = Ra.uq<hi:lo>;
uint8_t rb_ub = Rb.uq<hi:lo>;
temp = ((temp << 8)
| ((ra_ub > rb_ub) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 8;
lo -= 8;
}
Rc = temp;
}});
0x3d: maxuw4({{
uint64_t temp = 0;
int hi = 63;
int lo = 48;
for (int i = 3; i >= 0; --i) {
uint16_t ra_uw = Ra.uq<hi:lo>;
uint16_t rb_uw = Rb.uq<hi:lo>;
temp = ((temp << 16)
| ((ra_uw > rb_uw) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 16;
lo -= 16;
}
Rc = temp;
}});
0x3e: maxsb8({{
uint64_t temp = 0;
int hi = 63;
int lo = 56;
for (int i = 7; i >= 0; --i) {
int8_t ra_sb = Ra.uq<hi:lo>;
int8_t rb_sb = Rb.uq<hi:lo>;
temp = ((temp << 8)
| ((ra_sb > rb_sb) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 8;
lo -= 8;
}
Rc = temp;
}});
0x3f: maxsw4({{
uint64_t temp = 0;
int hi = 63;
int lo = 48;
for (int i = 3; i >= 0; --i) {
int16_t ra_sw = Ra.uq<hi:lo>;
int16_t rb_sw = Rb.uq<hi:lo>;
temp = ((temp << 16)
| ((ra_sw > rb_sw) ? Ra.uq<hi:lo>
: Rb.uq<hi:lo>));
hi -= 16;
lo -= 16;
}
Rc = temp;
}});
format BasicOperateWithNopCheck {
0x70: decode RB {
31: ftoit({{ Rc = Fa.uq; }}, FloatCvtOp);
}
0x78: decode RB {
31: ftois({{ Rc.sl = t_to_s(Fa.uq); }},
FloatCvtOp);
}
}
}
}
// Conditional branches.
format CondBranch {
0x39: beq({{ cond = (Ra == 0); }});
0x3d: bne({{ cond = (Ra != 0); }});
0x3e: bge({{ cond = (Ra.sq >= 0); }});
0x3f: bgt({{ cond = (Ra.sq > 0); }});
0x3b: ble({{ cond = (Ra.sq <= 0); }});
0x3a: blt({{ cond = (Ra.sq < 0); }});
0x38: blbc({{ cond = ((Ra & 1) == 0); }});
0x3c: blbs({{ cond = ((Ra & 1) == 1); }});
0x31: fbeq({{ cond = (Fa == 0); }});
0x35: fbne({{ cond = (Fa != 0); }});
0x36: fbge({{ cond = (Fa >= 0); }});
0x37: fbgt({{ cond = (Fa > 0); }});
0x33: fble({{ cond = (Fa <= 0); }});
0x32: fblt({{ cond = (Fa < 0); }});
}
// unconditional branches
format UncondBranch {
0x30: br();
0x34: bsr(IsCall);
}
// indirect branches
0x1a: decode JMPFUNC {
format Jump {
0: jmp();
1: jsr(IsCall);
2: ret(IsReturn);
3: jsr_coroutine(IsCall, IsReturn);
}
}
// Square root and integer-to-FP moves
0x14: decode FP_SHORTFUNC {
// Integer to FP register moves must have RB == 31
0x4: decode RB {
31: decode FP_FULLFUNC {
format BasicOperateWithNopCheck {
0x004: itofs({{ Fc.uq = s_to_t(Ra.ul); }}, FloatCvtOp);
0x024: itoft({{ Fc.uq = Ra.uq; }}, FloatCvtOp);
0x014: FailUnimpl::itoff(); // VAX-format conversion
}
}
}
// Square root instructions must have FA == 31
0xb: decode FA {
31: decode FP_TYPEFUNC {
format FloatingPointOperate {
#if SS_COMPATIBLE_FP
0x0b: sqrts({{
if (Fb < 0.0)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new ArithmeticFault;
Fc = sqrt(Fb);
}}, FloatSqrtOp);
#else
0x0b: sqrts({{
if (Fb.sf < 0.0)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new ArithmeticFault;
Fc.sf = sqrt(Fb.sf);
}}, FloatSqrtOp);
#endif
0x2b: sqrtt({{
if (Fb < 0.0)
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new ArithmeticFault;
Fc = sqrt(Fb);
}}, FloatSqrtOp);
}
}
}
// VAX-format sqrtf and sqrtg are not implemented
0xa: FailUnimpl::sqrtfg();
}
// IEEE floating point
0x16: decode FP_SHORTFUNC_TOP2 {
// The top two bits of the short function code break this
// space into four groups: binary ops, compares, reserved, and
// conversions. See Table 4-12 of AHB. There are different
// special cases in these different groups, so we decode on
// these top two bits first just to select a decode strategy.
// Most of these instructions may have various trapping and
// rounding mode flags set; these are decoded in the
// FloatingPointDecode template used by the
// FloatingPointOperate format.
// add/sub/mul/div: just decode on the short function code
// and source type. All valid trapping and rounding modes apply.
0: decode FP_TRAPMODE {
// check for valid trapping modes here
0,1,5,7: decode FP_TYPEFUNC {
format FloatingPointOperate {
#if SS_COMPATIBLE_FP
0x00: adds({{ Fc = Fa + Fb; }});
0x01: subs({{ Fc = Fa - Fb; }});
0x02: muls({{ Fc = Fa * Fb; }}, FloatMultOp);
0x03: divs({{ Fc = Fa / Fb; }}, FloatDivOp);
#else
0x00: adds({{ Fc.sf = Fa.sf + Fb.sf; }});
0x01: subs({{ Fc.sf = Fa.sf - Fb.sf; }});
0x02: muls({{ Fc.sf = Fa.sf * Fb.sf; }}, FloatMultOp);
0x03: divs({{ Fc.sf = Fa.sf / Fb.sf; }}, FloatDivOp);
#endif
0x20: addt({{ Fc = Fa + Fb; }});
0x21: subt({{ Fc = Fa - Fb; }});
0x22: mult({{ Fc = Fa * Fb; }}, FloatMultOp);
0x23: divt({{ Fc = Fa / Fb; }}, FloatDivOp);
}
}
}
// Floating-point compare instructions must have the default
// rounding mode, and may use the default trapping mode or
// /SU. Both trapping modes are treated the same by M5; the
// only difference on the real hardware (as far a I can tell)
// is that without /SU you'd get an imprecise trap if you
// tried to compare a NaN with something else (instead of an
// "unordered" result).
1: decode FP_FULLFUNC {
format BasicOperateWithNopCheck {
0x0a5, 0x5a5: cmpteq({{ Fc = (Fa == Fb) ? 2.0 : 0.0; }},
FloatCmpOp);
0x0a7, 0x5a7: cmptle({{ Fc = (Fa <= Fb) ? 2.0 : 0.0; }},
FloatCmpOp);
0x0a6, 0x5a6: cmptlt({{ Fc = (Fa < Fb) ? 2.0 : 0.0; }},
FloatCmpOp);
0x0a4, 0x5a4: cmptun({{ // unordered
Fc = (!(Fa < Fb) && !(Fa == Fb) && !(Fa > Fb)) ? 2.0 : 0.0;
}}, FloatCmpOp);
}
}
// The FP-to-integer and integer-to-FP conversion insts
// require that FA be 31.
3: decode FA {
31: decode FP_TYPEFUNC {
format FloatingPointOperate {
0x2f: decode FP_ROUNDMODE {
format FPFixedRounding {
// "chopped" i.e. round toward zero
0: cvttq({{ Fc.sq = (int64_t)trunc(Fb); }},
Chopped);
// round to minus infinity
1: cvttq({{ Fc.sq = (int64_t)floor(Fb); }},
MinusInfinity);
}
default: cvttq({{ Fc.sq = (int64_t)nearbyint(Fb); }});
}
// The cvtts opcode is overloaded to be cvtst if the trap
// mode is 2 or 6 (which are not valid otherwise)
0x2c: decode FP_FULLFUNC {
format BasicOperateWithNopCheck {
// trap on denorm version "cvtst/s" is
// simulated same as cvtst
0x2ac, 0x6ac: cvtst({{ Fc = Fb.sf; }});
}
default: cvtts({{ Fc.sf = Fb; }});
}
// The trapping mode for integer-to-FP conversions
// must be /SUI or nothing; /U and /SU are not
// allowed. The full set of rounding modes are
// supported though.
0x3c: decode FP_TRAPMODE {
0,7: cvtqs({{ Fc.sf = Fb.sq; }});
}
0x3e: decode FP_TRAPMODE {
0,7: cvtqt({{ Fc = Fb.sq; }});
}
}
}
}
}
// misc FP operate
0x17: decode FP_FULLFUNC {
format BasicOperateWithNopCheck {
0x010: cvtlq({{
Fc.sl = (Fb.uq<63:62> << 30) | Fb.uq<58:29>;
}});
0x030: cvtql({{
Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
}});
// We treat the precise & imprecise trapping versions of
// cvtql identically.
0x130, 0x530: cvtqlv({{
// To avoid overflow, all the upper 32 bits must match
// the sign bit of the lower 32. We code this as
// checking the upper 33 bits for all 0s or all 1s.
uint64_t sign_bits = Fb.uq<63:31>;
if (sign_bits != 0 && sign_bits != mask(33))
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new IntegerOverflowFault;
Fc.uq = (Fb.uq<31:30> << 62) | (Fb.uq<29:0> << 29);
}});
0x020: cpys({{ // copy sign
Fc.uq = (Fa.uq<63:> << 63) | Fb.uq<62:0>;
}});
0x021: cpysn({{ // copy sign negated
Fc.uq = (~Fa.uq<63:> << 63) | Fb.uq<62:0>;
}});
0x022: cpyse({{ // copy sign and exponent
Fc.uq = (Fa.uq<63:52> << 52) | Fb.uq<51:0>;
}});
0x02a: fcmoveq({{ Fc = (Fa == 0) ? Fb : Fc; }});
0x02b: fcmovne({{ Fc = (Fa != 0) ? Fb : Fc; }});
0x02c: fcmovlt({{ Fc = (Fa < 0) ? Fb : Fc; }});
0x02d: fcmovge({{ Fc = (Fa >= 0) ? Fb : Fc; }});
0x02e: fcmovle({{ Fc = (Fa <= 0) ? Fb : Fc; }});
0x02f: fcmovgt({{ Fc = (Fa > 0) ? Fb : Fc; }});
0x024: mt_fpcr({{ FPCR = Fa.uq; }}, IsIprAccess);
0x025: mf_fpcr({{ Fa.uq = FPCR; }}, IsIprAccess);
}
}
// miscellaneous mem-format ops
0x18: decode MEMFUNC {
format WarnUnimpl {
0x8000: fetch();
0xa000: fetch_m();
0xe800: ecb();
}
format MiscPrefetch {
0xf800: wh64({{ EA = Rb & ~ULL(63); }},
{{ ; }},
mem_flags = PREFETCH);
}
format BasicOperate {
0xc000: rpcc({{
#if FULL_SYSTEM
/* Rb is a fake dependency so here is a fun way to get
* the parser to understand that.
*/
2008-09-28 06:03:47 +02:00
Ra = xc->readMiscReg(IPR_CC) + (Rb & 0);
#else
Ra = curTick;
#endif
}}, IsUnverifiable);
// All of the barrier instructions below do nothing in
// their execute() methods (hence the empty code blocks).
// All of their functionality is hard-coded in the
// pipeline based on the flags IsSerializing,
// IsMemBarrier, and IsWriteBarrier. In the current
// detailed CPU model, the execute() function only gets
// called at fetch, so there's no way to generate pipeline
// behavior at any other stage. Once we go to an
// exec-in-exec CPU model we should be able to get rid of
// these flags and implement this behavior via the
// execute() methods.
// trapb is just a barrier on integer traps, where excb is
// a barrier on integer and FP traps. "EXCB is thus a
// superset of TRAPB." (Alpha ARM, Sec 4.11.4) We treat
// them the same though.
0x0000: trapb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
0x0400: excb({{ }}, IsSerializing, IsSerializeBefore, No_OpClass);
0x4000: mb({{ }}, IsMemBarrier, MemReadOp);
0x4400: wmb({{ }}, IsWriteBarrier, MemWriteOp);
}
#if FULL_SYSTEM
format BasicOperate {
0xe000: rc({{
Ra = IntrFlag;
IntrFlag = 0;
}}, IsNonSpeculative, IsUnverifiable);
0xf000: rs({{
Ra = IntrFlag;
IntrFlag = 1;
}}, IsNonSpeculative, IsUnverifiable);
}
#else
format FailUnimpl {
0xe000: rc();
0xf000: rs();
}
#endif
}
#if FULL_SYSTEM
0x00: CallPal::call_pal({{
if (!palValid ||
(palPriv
2008-09-28 06:03:47 +02:00
&& xc->readMiscReg(IPR_ICM) != mode_kernel)) {
// invalid pal function code, or attempt to do privileged
// PAL call in non-kernel mode
Changed Fault from a FaultBase * to a RefCountingPtr, added "new"s where appropriate, and took away the constant examples of each fault which where for comparing to a fault to determine its type. arch/alpha/alpha_memory.cc: arch/alpha/isa/decoder.isa: Added news where faults are created. arch/alpha/ev5.cc: Changed places where a fault was compared to a fault type to use isA rather than == arch/alpha/faults.cc: arch/alpha/faults.hh: Changed Fault to be a RefCountingPtr arch/alpha/isa/fp.isa: Added a new where a FloatEnableFault was created. arch/alpha/isa/unimp.isa: arch/alpha/isa/unknown.isa: Added a new where an UnimplementedFault is created. base/refcnt.hh: Added include of stddef.h for the NULL macro cpu/base_dyn_inst.cc: Added a new where an UnimplementedOpcodeFault is created. cpu/o3/alpha_cpu_impl.hh: Changed places where a fault was compared to a fault type to use isA rather than ==. Also changed fault->name to fault->name() cpu/o3/regfile.hh: Added new where UnimplementedOpcodeFaults are created. cpu/simple/cpu.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Also added a new where an Interrupt fault is created. dev/alpha_console.cc: Added news where MachineCheckFaults are created. dev/pcidev.hh: Added news where MachineCheckFaults are generated. dev/sinic.cc: Changed places where a fault was compared to a fault type to use isA rather than ==. Added news where MachineCheckFaults are created. Fixed a problem where m5.fast had unused variables. kern/kernel_stats.cc: Commented out where _faults is initialized. This statistic will probably be moved elsewhere in the future. kern/kernel_stats.hh: Commented out the declaration of _fault. when fault() is called, the fault increments its own stat. sim/faults.cc: sim/faults.hh: Changed Fault from a FaultBase * to a RefCountingPtr. --HG-- extra : convert_revision : b40ccfc42482d5a115e111dd897fa378d23c6c7d
2006-02-24 07:51:45 +01:00
fault = new UnimplementedOpcodeFault;
ISA,CPU,etc: Create an ISA defined PC type that abstracts out ISA behaviors. This change is a low level and pervasive reorganization of how PCs are managed in M5. Back when Alpha was the only ISA, there were only 2 PCs to worry about, the PC and the NPC, and the lsb of the PC signaled whether or not you were in PAL mode. As other ISAs were added, we had to add an NNPC, micro PC and next micropc, x86 and ARM introduced variable length instruction sets, and ARM started to keep track of mode bits in the PC. Each CPU model handled PCs in its own custom way that needed to be updated individually to handle the new dimensions of variability, or, in the case of ARMs mode-bit-in-the-pc hack, the complexity could be hidden in the ISA at the ISA implementation's expense. Areas like the branch predictor hadn't been updated to handle branch delay slots or micropcs, and it turns out that had introduced a significant (10s of percent) performance bug in SPARC and to a lesser extend MIPS. Rather than perpetuate the problem by reworking O3 again to handle the PC features needed by x86, this change was introduced to rework PC handling in a more modular, transparent, and hopefully efficient way. PC type: Rather than having the superset of all possible elements of PC state declared in each of the CPU models, each ISA defines its own PCState type which has exactly the elements it needs. A cross product of canned PCState classes are defined in the new "generic" ISA directory for ISAs with/without delay slots and microcode. These are either typedef-ed or subclassed by each ISA. To read or write this structure through a *Context, you use the new pcState() accessor which reads or writes depending on whether it has an argument. If you just want the address of the current or next instruction or the current micro PC, you can get those through read-only accessors on either the PCState type or the *Contexts. These are instAddr(), nextInstAddr(), and microPC(). Note the move away from readPC. That name is ambiguous since it's not clear whether or not it should be the actual address to fetch from, or if it should have extra bits in it like the PAL mode bit. Each class is free to define its own functions to get at whatever values it needs however it needs to to be used in ISA specific code. Eventually Alpha's PAL mode bit could be moved out of the PC and into a separate field like ARM. These types can be reset to a particular pc (where npc = pc + sizeof(MachInst), nnpc = npc + sizeof(MachInst), upc = 0, nupc = 1 as appropriate), printed, serialized, and compared. There is a branching() function which encapsulates code in the CPU models that checked if an instruction branched or not. Exactly what that means in the context of branch delay slots which can skip an instruction when not taken is ambiguous, and ideally this function and its uses can be eliminated. PCStates also generally know how to advance themselves in various ways depending on if they point at an instruction, a microop, or the last microop of a macroop. More on that later. Ideally, accessing all the PCs at once when setting them will improve performance of M5 even though more data needs to be moved around. This is because often all the PCs need to be manipulated together, and by getting them all at once you avoid multiple function calls. Also, the PCs of a particular thread will have spatial locality in the cache. Previously they were grouped by element in arrays which spread out accesses. Advancing the PC: The PCs were previously managed entirely by the CPU which had to know about PC semantics, try to figure out which dimension to increment the PC in, what to set NPC/NNPC, etc. These decisions are best left to the ISA in conjunction with the PC type itself. Because most of the information about how to increment the PC (mainly what type of instruction it refers to) is contained in the instruction object, a new advancePC virtual function was added to the StaticInst class. Subclasses provide an implementation that moves around the right element of the PC with a minimal amount of decision making. In ISAs like Alpha, the instructions always simply assign NPC to PC without having to worry about micropcs, nnpcs, etc. The added cost of a virtual function call should be outweighed by not having to figure out as much about what to do with the PCs and mucking around with the extra elements. One drawback of making the StaticInsts advance the PC is that you have to actually have one to advance the PC. This would, superficially, seem to require decoding an instruction before fetch could advance. This is, as far as I can tell, realistic. fetch would advance through memory addresses, not PCs, perhaps predicting new memory addresses using existing ones. More sophisticated decisions about control flow would be made later on, after the instruction was decoded, and handed back to fetch. If branching needs to happen, some amount of decoding needs to happen to see that it's a branch, what the target is, etc. This could get a little more complicated if that gets done by the predecoder, but I'm choosing to ignore that for now. Variable length instructions: To handle variable length instructions in x86 and ARM, the predecoder now takes in the current PC by reference to the getExtMachInst function. It can modify the PC however it needs to (by setting NPC to be the PC + instruction length, for instance). This could be improved since the CPU doesn't know if the PC was modified and always has to write it back. ISA parser: To support the new API, all PC related operand types were removed from the parser and replaced with a PCState type. There are two warts on this implementation. First, as with all the other operand types, the PCState still has to have a valid operand type even though it doesn't use it. Second, using syntax like PCS.npc(target) doesn't work for two reasons, this looks like the syntax for operand type overriding, and the parser can't figure out if you're reading or writing. Instructions that use the PCS operand (which I've consistently called it) need to first read it into a local variable, manipulate it, and then write it back out. Return address stack: The return address stack needed a little extra help because, in the presence of branch delay slots, it has to merge together elements of the return PC and the call PC. To handle that, a buildRetPC utility function was added. There are basically only two versions in all the ISAs, but it didn't seem short enough to put into the generic ISA directory. Also, the branch predictor code in O3 and InOrder were adjusted so that they always store the PC of the actual call instruction in the RAS, not the next PC. If the call instruction is a microop, the next PC refers to the next microop in the same macroop which is probably not desirable. The buildRetPC function advances the PC intelligently to the next macroop (in an ISA specific way) so that that case works. Change in stats: There were no change in stats except in MIPS and SPARC in the O3 model. MIPS runs in about 9% fewer ticks. SPARC runs with 30%-50% fewer ticks, which could likely be improved further by setting call/return instruction flags and taking advantage of the RAS. TODO: Add != operators to the PCState classes, defined trivially to be !(a==b). Smooth out places where PCs are split apart, passed around, and put back together later. I think this might happen in SPARC's fault code. Add ISA specific constructors that allow setting PC elements without calling a bunch of accessors. Try to eliminate the need for the branching() function. Factor out Alpha's PAL mode pc bit into a separate flag field, and eliminate places where it's blindly masked out or tested in the PC.
2010-10-31 08:07:20 +01:00
} else {
// check to see if simulator wants to do something special
// on this PAL call (including maybe suppress it)
bool dopal = xc->simPalCheck(palFunc);
if (dopal) {
ISA,CPU,etc: Create an ISA defined PC type that abstracts out ISA behaviors. This change is a low level and pervasive reorganization of how PCs are managed in M5. Back when Alpha was the only ISA, there were only 2 PCs to worry about, the PC and the NPC, and the lsb of the PC signaled whether or not you were in PAL mode. As other ISAs were added, we had to add an NNPC, micro PC and next micropc, x86 and ARM introduced variable length instruction sets, and ARM started to keep track of mode bits in the PC. Each CPU model handled PCs in its own custom way that needed to be updated individually to handle the new dimensions of variability, or, in the case of ARMs mode-bit-in-the-pc hack, the complexity could be hidden in the ISA at the ISA implementation's expense. Areas like the branch predictor hadn't been updated to handle branch delay slots or micropcs, and it turns out that had introduced a significant (10s of percent) performance bug in SPARC and to a lesser extend MIPS. Rather than perpetuate the problem by reworking O3 again to handle the PC features needed by x86, this change was introduced to rework PC handling in a more modular, transparent, and hopefully efficient way. PC type: Rather than having the superset of all possible elements of PC state declared in each of the CPU models, each ISA defines its own PCState type which has exactly the elements it needs. A cross product of canned PCState classes are defined in the new "generic" ISA directory for ISAs with/without delay slots and microcode. These are either typedef-ed or subclassed by each ISA. To read or write this structure through a *Context, you use the new pcState() accessor which reads or writes depending on whether it has an argument. If you just want the address of the current or next instruction or the current micro PC, you can get those through read-only accessors on either the PCState type or the *Contexts. These are instAddr(), nextInstAddr(), and microPC(). Note the move away from readPC. That name is ambiguous since it's not clear whether or not it should be the actual address to fetch from, or if it should have extra bits in it like the PAL mode bit. Each class is free to define its own functions to get at whatever values it needs however it needs to to be used in ISA specific code. Eventually Alpha's PAL mode bit could be moved out of the PC and into a separate field like ARM. These types can be reset to a particular pc (where npc = pc + sizeof(MachInst), nnpc = npc + sizeof(MachInst), upc = 0, nupc = 1 as appropriate), printed, serialized, and compared. There is a branching() function which encapsulates code in the CPU models that checked if an instruction branched or not. Exactly what that means in the context of branch delay slots which can skip an instruction when not taken is ambiguous, and ideally this function and its uses can be eliminated. PCStates also generally know how to advance themselves in various ways depending on if they point at an instruction, a microop, or the last microop of a macroop. More on that later. Ideally, accessing all the PCs at once when setting them will improve performance of M5 even though more data needs to be moved around. This is because often all the PCs need to be manipulated together, and by getting them all at once you avoid multiple function calls. Also, the PCs of a particular thread will have spatial locality in the cache. Previously they were grouped by element in arrays which spread out accesses. Advancing the PC: The PCs were previously managed entirely by the CPU which had to know about PC semantics, try to figure out which dimension to increment the PC in, what to set NPC/NNPC, etc. These decisions are best left to the ISA in conjunction with the PC type itself. Because most of the information about how to increment the PC (mainly what type of instruction it refers to) is contained in the instruction object, a new advancePC virtual function was added to the StaticInst class. Subclasses provide an implementation that moves around the right element of the PC with a minimal amount of decision making. In ISAs like Alpha, the instructions always simply assign NPC to PC without having to worry about micropcs, nnpcs, etc. The added cost of a virtual function call should be outweighed by not having to figure out as much about what to do with the PCs and mucking around with the extra elements. One drawback of making the StaticInsts advance the PC is that you have to actually have one to advance the PC. This would, superficially, seem to require decoding an instruction before fetch could advance. This is, as far as I can tell, realistic. fetch would advance through memory addresses, not PCs, perhaps predicting new memory addresses using existing ones. More sophisticated decisions about control flow would be made later on, after the instruction was decoded, and handed back to fetch. If branching needs to happen, some amount of decoding needs to happen to see that it's a branch, what the target is, etc. This could get a little more complicated if that gets done by the predecoder, but I'm choosing to ignore that for now. Variable length instructions: To handle variable length instructions in x86 and ARM, the predecoder now takes in the current PC by reference to the getExtMachInst function. It can modify the PC however it needs to (by setting NPC to be the PC + instruction length, for instance). This could be improved since the CPU doesn't know if the PC was modified and always has to write it back. ISA parser: To support the new API, all PC related operand types were removed from the parser and replaced with a PCState type. There are two warts on this implementation. First, as with all the other operand types, the PCState still has to have a valid operand type even though it doesn't use it. Second, using syntax like PCS.npc(target) doesn't work for two reasons, this looks like the syntax for operand type overriding, and the parser can't figure out if you're reading or writing. Instructions that use the PCS operand (which I've consistently called it) need to first read it into a local variable, manipulate it, and then write it back out. Return address stack: The return address stack needed a little extra help because, in the presence of branch delay slots, it has to merge together elements of the return PC and the call PC. To handle that, a buildRetPC utility function was added. There are basically only two versions in all the ISAs, but it didn't seem short enough to put into the generic ISA directory. Also, the branch predictor code in O3 and InOrder were adjusted so that they always store the PC of the actual call instruction in the RAS, not the next PC. If the call instruction is a microop, the next PC refers to the next microop in the same macroop which is probably not desirable. The buildRetPC function advances the PC intelligently to the next macroop (in an ISA specific way) so that that case works. Change in stats: There were no change in stats except in MIPS and SPARC in the O3 model. MIPS runs in about 9% fewer ticks. SPARC runs with 30%-50% fewer ticks, which could likely be improved further by setting call/return instruction flags and taking advantage of the RAS. TODO: Add != operators to the PCState classes, defined trivially to be !(a==b). Smooth out places where PCs are split apart, passed around, and put back together later. I think this might happen in SPARC's fault code. Add ISA specific constructors that allow setting PC elements without calling a bunch of accessors. Try to eliminate the need for the branching() function. Factor out Alpha's PAL mode pc bit into a separate flag field, and eliminate places where it's blindly masked out or tested in the PC.
2010-10-31 08:07:20 +01:00
PCState pc = PCS;
xc->setMiscReg(IPR_EXC_ADDR, pc.npc());
pc.npc(xc->readMiscReg(IPR_PAL_BASE) + palOffset);
PCS = pc;
}
}
}}, IsNonSpeculative);
#else
0x00: decode PALFUNC {
format EmulatedCallPal {
0x00: halt ({{
exitSimLoop("halt instruction encountered");
}}, IsNonSpeculative);
0x83: callsys({{
xc->syscall(R0);
}}, IsSerializeAfter, IsNonSpeculative, IsSyscall);
// Read uniq reg into ABI return value register (r0)
0x9e: rduniq({{ R0 = Runiq; }}, IsIprAccess);
// Write uniq reg with value from ABI arg register (r16)
0x9f: wruniq({{ Runiq = R16; }}, IsIprAccess);
}
}
#endif
#if FULL_SYSTEM
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
0x1b: decode PALMODE {
0: OpcdecFault::hw_st_quad();
1: decode HW_LDST_QUAD {
format HwLoad {
0: hw_ld({{ EA = (Rb + disp) & ~3; }}, {{ Ra = Mem.ul; }},
L, IsSerializing, IsSerializeBefore);
1: hw_ld({{ EA = (Rb + disp) & ~7; }}, {{ Ra = Mem.uq; }},
Q, IsSerializing, IsSerializeBefore);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
}
}
Change how memory operands are handled in ISA descriptions. Should enable implementation of split-phase timing loads with new memory model. May create slight timing differences under FullCPU, as I believe we were not handling software prefetches correctly before when the split MemAcc/Exec model was used. I haven't looked into this in any detail though. arch/alpha/isa/decoder.isa: HwLoadStore format split into separate HwLoad and HwStore formats. Copy instructions now fall under MiscPrefetch format. Mem_write_result is now just write_result in store conditionals. arch/alpha/isa/mem.isa: Split MemAccExecute and LoadStoreExecute templates into separate templates for loads and stores; now that memory operands are handled differently from registers, it's impossible to have a single template serve both. Also unified the handling of "regular" prefetches (loads to r31) and "misc" prefetches (e.g., wh64) under the new scheme. It looks like SW prefetches were not handled correctly in FullCPU up til now, since we generated an execute() method for the outer instruction but didn't generate a proper method for MemAcc::execute() (instead getting a default no-op method for that). arch/alpha/isa/pal.isa: Split HwLoadStore into separate HwLoad and HwStore formats to select proper template (see change to mem.isa in this changeset). arch/isa_parser.py: Stop trying to treat memory operands like register operands, since we never used them in a uniform way anyway, and it made it impossible to do split-phase loads as needed for the new CPU model. Now there's no more 'op_mem_rd', 'op_nonmem_rd', etc.: 'op_rd' just does register operands, and the template code is responsible for formulating the call to the memory system. Right now the only thing exported by InstObjParams is a new attribute 'mem_acc_size' which gives the memory access size in bits, though more attributes can be added if needed. Also moved code in findOperands() method to OperandDescriptorList.__init__(), which is where it belongs. --HG-- extra : convert_revision : 6d53d07e0c5e828455834ded4395fa40f9146a34
2006-02-10 15:12:55 +01:00
}
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
0x1f: decode PALMODE {
0: OpcdecFault::hw_st_cond();
format HwStore {
1: decode HW_LDST_COND {
0: decode HW_LDST_QUAD {
0: hw_st({{ EA = (Rb + disp) & ~3; }},
{{ Mem.ul = Ra<31:0>; }}, L, IsSerializing, IsSerializeBefore);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
1: hw_st({{ EA = (Rb + disp) & ~7; }},
{{ Mem.uq = Ra.uq; }}, Q, IsSerializing, IsSerializeBefore);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
}
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
1: FailUnimpl::hw_st_cond();
}
}
}
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
0x19: decode PALMODE {
0: OpcdecFault::hw_mfpr();
format HwMoveIPR {
1: hw_mfpr({{
int miscRegIndex = (ipr_index < MaxInternalProcRegs) ?
IprToMiscRegIndex[ipr_index] : -1;
if(miscRegIndex < 0 || !IprIsReadable(miscRegIndex) ||
miscRegIndex >= NumInternalProcRegs)
fault = new UnimplementedOpcodeFault;
else
Ra = xc->readMiscReg(miscRegIndex);
}}, IsIprAccess);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
}
}
0x1d: decode PALMODE {
0: OpcdecFault::hw_mtpr();
format HwMoveIPR {
1: hw_mtpr({{
int miscRegIndex = (ipr_index < MaxInternalProcRegs) ?
IprToMiscRegIndex[ipr_index] : -1;
if(miscRegIndex < 0 || !IprIsWritable(miscRegIndex) ||
miscRegIndex >= NumInternalProcRegs)
fault = new UnimplementedOpcodeFault;
else
xc->setMiscReg(miscRegIndex, Ra);
if (traceData) { traceData->setData(Ra); }
}}, IsIprAccess);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
}
}
0x1e: decode PALMODE {
0: OpcdecFault::hw_rei();
format BasicOperate {
1: hw_rei({{ xc->hwrei(); }}, IsSerializing, IsSerializeBefore);
Changes to support automatic renaming of the shadow registers at decode time. This requires using an ExtMachInst (uint64_t) instead of the normal MachInst; the ExtMachInst is packed with extra decode context information. In the case of Alpha, the PAL mode is included. The shadow registers are folded into the normal integer registers to ease renaming indexing. Include the removed Opcdec class of instructions for faulting when a pal mode only instruction is decoded in non-pal mode. arch/alpha/ev5.cc: Changes to automatically map the shadow registers if the instruction is in PAL mode. arch/alpha/isa/branch.isa: arch/alpha/isa/decoder.isa: arch/alpha/isa/fp.isa: arch/alpha/isa/int.isa: arch/alpha/isa/mem.isa: arch/alpha/isa/pal.isa: arch/alpha/isa/unimp.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. arch/alpha/isa/main.isa: Changes for automatically using the shadow registers. Now instructions must decode based on an ExtMachInst, which is a MachInst with any decode context information concatenated onto the higher order bits. The decoder (for Alpha) uses the 32nd bit in order to determine if the machine is in PAL mode. If it is, then it refers to the reg_redir table to determine the true index of the register it is using. Also include the opcdec instruction definition. arch/alpha/isa_traits.hh: Define ExtMachInst type that is used by the static inst in order to decode the instruction, given the context of being in pal mode or not. Redefine the number of Int registers, splitting it into NumIntArchRegs (32) and NumIntRegs (32 + 8 shadow registers). Change the dependence tags to reflect the integer registers include the 8 shadow registers. Define function to make an ExtMachInst. Currently it is somewhat specific to Alpha; in the future it must be decided to make this more generic and possibly slower, or leave it specific to each architecture and ifdef it within the CPU. arch/isa_parser.py: Have static insts decode on the ExtMachInst. base/remote_gdb.cc: Support the automatic remapping of shadow registers. Remote GDB must now look at the PC being read in order to tell if it should use the normal register indices or the shadow register indices. cpu/o3/regfile.hh: Comment out the pal registers; they are now a part of the integer registers. cpu/simple/cpu.cc: Create an ExtMachInst to decode on, based on the normal MachInst and the PC of the instructoin. cpu/static_inst.hh: Change from MachInst to ExtMachInst to support shadow register renaming. --HG-- extra : convert_revision : 1d23eabf735e297068e1917445a6348e9f8c88d5
2006-03-03 21:28:25 +01:00
}
}
#endif
format BasicOperate {
// M5 special opcodes use the reserved 0x01 opcode space
0x01: decode M5FUNC {
#if FULL_SYSTEM
0x00: arm({{
PseudoInst::arm(xc->tcBase());
}}, IsNonSpeculative);
0x01: quiesce({{
PseudoInst::quiesce(xc->tcBase());
}}, IsNonSpeculative, IsQuiesce);
0x02: quiesceNs({{
PseudoInst::quiesceNs(xc->tcBase(), R16);
}}, IsNonSpeculative, IsQuiesce);
0x03: quiesceCycles({{
PseudoInst::quiesceCycles(xc->tcBase(), R16);
}}, IsNonSpeculative, IsQuiesce, IsUnverifiable);
0x04: quiesceTime({{
R0 = PseudoInst::quiesceTime(xc->tcBase());
}}, IsNonSpeculative, IsUnverifiable);
#endif
0x07: rpns({{
R0 = PseudoInst::rpns(xc->tcBase());
}}, IsNonSpeculative, IsUnverifiable);
0x09: wakeCPU({{
PseudoInst::wakeCPU(xc->tcBase(), R16);
}}, IsNonSpeculative, IsUnverifiable);
0x10: deprecated_ivlb({{
warn_once("Obsolete M5 ivlb instruction encountered.\n");
}});
0x11: deprecated_ivle({{
warn_once("Obsolete M5 ivlb instruction encountered.\n");
}});
0x20: deprecated_exit ({{
warn_once("deprecated M5 exit instruction encountered.\n");
PseudoInst::m5exit(xc->tcBase(), 0);
}}, No_OpClass, IsNonSpeculative);
0x21: m5exit({{
PseudoInst::m5exit(xc->tcBase(), R16);
}}, No_OpClass, IsNonSpeculative);
#if FULL_SYSTEM
0x31: loadsymbol({{
PseudoInst::loadsymbol(xc->tcBase());
}}, No_OpClass, IsNonSpeculative);
0x30: initparam({{
Ra = xc->tcBase()->getCpuPtr()->system->init_param;
}});
#endif
0x40: resetstats({{
PseudoInst::resetstats(xc->tcBase(), R16, R17);
}}, IsNonSpeculative);
0x41: dumpstats({{
PseudoInst::dumpstats(xc->tcBase(), R16, R17);
}}, IsNonSpeculative);
0x42: dumpresetstats({{
PseudoInst::dumpresetstats(xc->tcBase(), R16, R17);
}}, IsNonSpeculative);
0x43: m5checkpoint({{
PseudoInst::m5checkpoint(xc->tcBase(), R16, R17);
}}, IsNonSpeculative);
#if FULL_SYSTEM
0x50: m5readfile({{
R0 = PseudoInst::readfile(xc->tcBase(), R16, R17, R18);
}}, IsNonSpeculative);
#endif
0x51: m5break({{
PseudoInst::debugbreak(xc->tcBase());
}}, IsNonSpeculative);
0x52: m5switchcpu({{
PseudoInst::switchcpu(xc->tcBase());
}}, IsNonSpeculative);
#if FULL_SYSTEM
0x53: m5addsymbol({{
PseudoInst::addsymbol(xc->tcBase(), R16, R17);
}}, IsNonSpeculative);
#endif
0x54: m5panic({{
ISA,CPU,etc: Create an ISA defined PC type that abstracts out ISA behaviors. This change is a low level and pervasive reorganization of how PCs are managed in M5. Back when Alpha was the only ISA, there were only 2 PCs to worry about, the PC and the NPC, and the lsb of the PC signaled whether or not you were in PAL mode. As other ISAs were added, we had to add an NNPC, micro PC and next micropc, x86 and ARM introduced variable length instruction sets, and ARM started to keep track of mode bits in the PC. Each CPU model handled PCs in its own custom way that needed to be updated individually to handle the new dimensions of variability, or, in the case of ARMs mode-bit-in-the-pc hack, the complexity could be hidden in the ISA at the ISA implementation's expense. Areas like the branch predictor hadn't been updated to handle branch delay slots or micropcs, and it turns out that had introduced a significant (10s of percent) performance bug in SPARC and to a lesser extend MIPS. Rather than perpetuate the problem by reworking O3 again to handle the PC features needed by x86, this change was introduced to rework PC handling in a more modular, transparent, and hopefully efficient way. PC type: Rather than having the superset of all possible elements of PC state declared in each of the CPU models, each ISA defines its own PCState type which has exactly the elements it needs. A cross product of canned PCState classes are defined in the new "generic" ISA directory for ISAs with/without delay slots and microcode. These are either typedef-ed or subclassed by each ISA. To read or write this structure through a *Context, you use the new pcState() accessor which reads or writes depending on whether it has an argument. If you just want the address of the current or next instruction or the current micro PC, you can get those through read-only accessors on either the PCState type or the *Contexts. These are instAddr(), nextInstAddr(), and microPC(). Note the move away from readPC. That name is ambiguous since it's not clear whether or not it should be the actual address to fetch from, or if it should have extra bits in it like the PAL mode bit. Each class is free to define its own functions to get at whatever values it needs however it needs to to be used in ISA specific code. Eventually Alpha's PAL mode bit could be moved out of the PC and into a separate field like ARM. These types can be reset to a particular pc (where npc = pc + sizeof(MachInst), nnpc = npc + sizeof(MachInst), upc = 0, nupc = 1 as appropriate), printed, serialized, and compared. There is a branching() function which encapsulates code in the CPU models that checked if an instruction branched or not. Exactly what that means in the context of branch delay slots which can skip an instruction when not taken is ambiguous, and ideally this function and its uses can be eliminated. PCStates also generally know how to advance themselves in various ways depending on if they point at an instruction, a microop, or the last microop of a macroop. More on that later. Ideally, accessing all the PCs at once when setting them will improve performance of M5 even though more data needs to be moved around. This is because often all the PCs need to be manipulated together, and by getting them all at once you avoid multiple function calls. Also, the PCs of a particular thread will have spatial locality in the cache. Previously they were grouped by element in arrays which spread out accesses. Advancing the PC: The PCs were previously managed entirely by the CPU which had to know about PC semantics, try to figure out which dimension to increment the PC in, what to set NPC/NNPC, etc. These decisions are best left to the ISA in conjunction with the PC type itself. Because most of the information about how to increment the PC (mainly what type of instruction it refers to) is contained in the instruction object, a new advancePC virtual function was added to the StaticInst class. Subclasses provide an implementation that moves around the right element of the PC with a minimal amount of decision making. In ISAs like Alpha, the instructions always simply assign NPC to PC without having to worry about micropcs, nnpcs, etc. The added cost of a virtual function call should be outweighed by not having to figure out as much about what to do with the PCs and mucking around with the extra elements. One drawback of making the StaticInsts advance the PC is that you have to actually have one to advance the PC. This would, superficially, seem to require decoding an instruction before fetch could advance. This is, as far as I can tell, realistic. fetch would advance through memory addresses, not PCs, perhaps predicting new memory addresses using existing ones. More sophisticated decisions about control flow would be made later on, after the instruction was decoded, and handed back to fetch. If branching needs to happen, some amount of decoding needs to happen to see that it's a branch, what the target is, etc. This could get a little more complicated if that gets done by the predecoder, but I'm choosing to ignore that for now. Variable length instructions: To handle variable length instructions in x86 and ARM, the predecoder now takes in the current PC by reference to the getExtMachInst function. It can modify the PC however it needs to (by setting NPC to be the PC + instruction length, for instance). This could be improved since the CPU doesn't know if the PC was modified and always has to write it back. ISA parser: To support the new API, all PC related operand types were removed from the parser and replaced with a PCState type. There are two warts on this implementation. First, as with all the other operand types, the PCState still has to have a valid operand type even though it doesn't use it. Second, using syntax like PCS.npc(target) doesn't work for two reasons, this looks like the syntax for operand type overriding, and the parser can't figure out if you're reading or writing. Instructions that use the PCS operand (which I've consistently called it) need to first read it into a local variable, manipulate it, and then write it back out. Return address stack: The return address stack needed a little extra help because, in the presence of branch delay slots, it has to merge together elements of the return PC and the call PC. To handle that, a buildRetPC utility function was added. There are basically only two versions in all the ISAs, but it didn't seem short enough to put into the generic ISA directory. Also, the branch predictor code in O3 and InOrder were adjusted so that they always store the PC of the actual call instruction in the RAS, not the next PC. If the call instruction is a microop, the next PC refers to the next microop in the same macroop which is probably not desirable. The buildRetPC function advances the PC intelligently to the next macroop (in an ISA specific way) so that that case works. Change in stats: There were no change in stats except in MIPS and SPARC in the O3 model. MIPS runs in about 9% fewer ticks. SPARC runs with 30%-50% fewer ticks, which could likely be improved further by setting call/return instruction flags and taking advantage of the RAS. TODO: Add != operators to the PCState classes, defined trivially to be !(a==b). Smooth out places where PCs are split apart, passed around, and put back together later. I think this might happen in SPARC's fault code. Add ISA specific constructors that allow setting PC elements without calling a bunch of accessors. Try to eliminate the need for the branching() function. Factor out Alpha's PAL mode pc bit into a separate flag field, and eliminate places where it's blindly masked out or tested in the PC.
2010-10-31 08:07:20 +01:00
panic("M5 panic instruction called at pc=%#x.",
xc->pcState().pc());
}}, IsNonSpeculative);
#define CPANN(lbl) CPA::cpa()->lbl(xc->tcBase())
0x55: decode RA {
0x00: m5a_old({{
panic("Deprecated M5 annotate instruction executed at pc=%#x\n",
ISA,CPU,etc: Create an ISA defined PC type that abstracts out ISA behaviors. This change is a low level and pervasive reorganization of how PCs are managed in M5. Back when Alpha was the only ISA, there were only 2 PCs to worry about, the PC and the NPC, and the lsb of the PC signaled whether or not you were in PAL mode. As other ISAs were added, we had to add an NNPC, micro PC and next micropc, x86 and ARM introduced variable length instruction sets, and ARM started to keep track of mode bits in the PC. Each CPU model handled PCs in its own custom way that needed to be updated individually to handle the new dimensions of variability, or, in the case of ARMs mode-bit-in-the-pc hack, the complexity could be hidden in the ISA at the ISA implementation's expense. Areas like the branch predictor hadn't been updated to handle branch delay slots or micropcs, and it turns out that had introduced a significant (10s of percent) performance bug in SPARC and to a lesser extend MIPS. Rather than perpetuate the problem by reworking O3 again to handle the PC features needed by x86, this change was introduced to rework PC handling in a more modular, transparent, and hopefully efficient way. PC type: Rather than having the superset of all possible elements of PC state declared in each of the CPU models, each ISA defines its own PCState type which has exactly the elements it needs. A cross product of canned PCState classes are defined in the new "generic" ISA directory for ISAs with/without delay slots and microcode. These are either typedef-ed or subclassed by each ISA. To read or write this structure through a *Context, you use the new pcState() accessor which reads or writes depending on whether it has an argument. If you just want the address of the current or next instruction or the current micro PC, you can get those through read-only accessors on either the PCState type or the *Contexts. These are instAddr(), nextInstAddr(), and microPC(). Note the move away from readPC. That name is ambiguous since it's not clear whether or not it should be the actual address to fetch from, or if it should have extra bits in it like the PAL mode bit. Each class is free to define its own functions to get at whatever values it needs however it needs to to be used in ISA specific code. Eventually Alpha's PAL mode bit could be moved out of the PC and into a separate field like ARM. These types can be reset to a particular pc (where npc = pc + sizeof(MachInst), nnpc = npc + sizeof(MachInst), upc = 0, nupc = 1 as appropriate), printed, serialized, and compared. There is a branching() function which encapsulates code in the CPU models that checked if an instruction branched or not. Exactly what that means in the context of branch delay slots which can skip an instruction when not taken is ambiguous, and ideally this function and its uses can be eliminated. PCStates also generally know how to advance themselves in various ways depending on if they point at an instruction, a microop, or the last microop of a macroop. More on that later. Ideally, accessing all the PCs at once when setting them will improve performance of M5 even though more data needs to be moved around. This is because often all the PCs need to be manipulated together, and by getting them all at once you avoid multiple function calls. Also, the PCs of a particular thread will have spatial locality in the cache. Previously they were grouped by element in arrays which spread out accesses. Advancing the PC: The PCs were previously managed entirely by the CPU which had to know about PC semantics, try to figure out which dimension to increment the PC in, what to set NPC/NNPC, etc. These decisions are best left to the ISA in conjunction with the PC type itself. Because most of the information about how to increment the PC (mainly what type of instruction it refers to) is contained in the instruction object, a new advancePC virtual function was added to the StaticInst class. Subclasses provide an implementation that moves around the right element of the PC with a minimal amount of decision making. In ISAs like Alpha, the instructions always simply assign NPC to PC without having to worry about micropcs, nnpcs, etc. The added cost of a virtual function call should be outweighed by not having to figure out as much about what to do with the PCs and mucking around with the extra elements. One drawback of making the StaticInsts advance the PC is that you have to actually have one to advance the PC. This would, superficially, seem to require decoding an instruction before fetch could advance. This is, as far as I can tell, realistic. fetch would advance through memory addresses, not PCs, perhaps predicting new memory addresses using existing ones. More sophisticated decisions about control flow would be made later on, after the instruction was decoded, and handed back to fetch. If branching needs to happen, some amount of decoding needs to happen to see that it's a branch, what the target is, etc. This could get a little more complicated if that gets done by the predecoder, but I'm choosing to ignore that for now. Variable length instructions: To handle variable length instructions in x86 and ARM, the predecoder now takes in the current PC by reference to the getExtMachInst function. It can modify the PC however it needs to (by setting NPC to be the PC + instruction length, for instance). This could be improved since the CPU doesn't know if the PC was modified and always has to write it back. ISA parser: To support the new API, all PC related operand types were removed from the parser and replaced with a PCState type. There are two warts on this implementation. First, as with all the other operand types, the PCState still has to have a valid operand type even though it doesn't use it. Second, using syntax like PCS.npc(target) doesn't work for two reasons, this looks like the syntax for operand type overriding, and the parser can't figure out if you're reading or writing. Instructions that use the PCS operand (which I've consistently called it) need to first read it into a local variable, manipulate it, and then write it back out. Return address stack: The return address stack needed a little extra help because, in the presence of branch delay slots, it has to merge together elements of the return PC and the call PC. To handle that, a buildRetPC utility function was added. There are basically only two versions in all the ISAs, but it didn't seem short enough to put into the generic ISA directory. Also, the branch predictor code in O3 and InOrder were adjusted so that they always store the PC of the actual call instruction in the RAS, not the next PC. If the call instruction is a microop, the next PC refers to the next microop in the same macroop which is probably not desirable. The buildRetPC function advances the PC intelligently to the next macroop (in an ISA specific way) so that that case works. Change in stats: There were no change in stats except in MIPS and SPARC in the O3 model. MIPS runs in about 9% fewer ticks. SPARC runs with 30%-50% fewer ticks, which could likely be improved further by setting call/return instruction flags and taking advantage of the RAS. TODO: Add != operators to the PCState classes, defined trivially to be !(a==b). Smooth out places where PCs are split apart, passed around, and put back together later. I think this might happen in SPARC's fault code. Add ISA specific constructors that allow setting PC elements without calling a bunch of accessors. Try to eliminate the need for the branching() function. Factor out Alpha's PAL mode pc bit into a separate flag field, and eliminate places where it's blindly masked out or tested in the PC.
2010-10-31 08:07:20 +01:00
xc->pcState().pc());
}}, IsNonSpeculative);
0x01: m5a_bsm({{
CPANN(swSmBegin);
}}, IsNonSpeculative);
0x02: m5a_esm({{
CPANN(swSmEnd);
}}, IsNonSpeculative);
0x03: m5a_begin({{
CPANN(swExplictBegin);
}}, IsNonSpeculative);
0x04: m5a_end({{
CPANN(swEnd);
}}, IsNonSpeculative);
0x06: m5a_q({{
CPANN(swQ);
}}, IsNonSpeculative);
0x07: m5a_dq({{
CPANN(swDq);
}}, IsNonSpeculative);
0x08: m5a_wf({{
CPANN(swWf);
}}, IsNonSpeculative);
0x09: m5a_we({{
CPANN(swWe);
}}, IsNonSpeculative);
0x0C: m5a_sq({{
CPANN(swSq);
}}, IsNonSpeculative);
0x0D: m5a_aq({{
CPANN(swAq);
}}, IsNonSpeculative);
0x0E: m5a_pq({{
CPANN(swPq);
}}, IsNonSpeculative);
0x0F: m5a_l({{
CPANN(swLink);
}}, IsNonSpeculative);
0x10: m5a_identify({{
CPANN(swIdentify);
}}, IsNonSpeculative);
0x11: m5a_getid({{
R0 = CPANN(swGetId);
}}, IsNonSpeculative);
0x13: m5a_scl({{
CPANN(swSyscallLink);
}}, IsNonSpeculative);
0x14: m5a_rq({{
CPANN(swRq);
}}, IsNonSpeculative);
} // M5 Annotate Operations
#undef CPANN
0x56: m5reserved2({{
warn("M5 reserved opcode ignored");
}}, IsNonSpeculative);
0x57: m5reserved3({{
warn("M5 reserved opcode ignored");
}}, IsNonSpeculative);
0x58: m5reserved4({{
warn("M5 reserved opcode ignored");
}}, IsNonSpeculative);
0x59: m5reserved5({{
warn("M5 reserved opcode ignored");
}}, IsNonSpeculative);
}
}
}