From 3bb5fd8c44bbac6cbcfd18aed5fd46c112d19d7f Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 8 Apr 2007 01:42:42 +0000 Subject: [PATCH 1/5] Get the "hard" SPARC instructions working in o3. I don't like that the IsStoreConditional flag needs to be set for them because they aren't store conditional instructions, and I should fix the format code which is not handling the opt_flags correctly. --HG-- extra : convert_revision : cfd32808592832d7b6fbdaace5ae7b17c8a246e9 --- src/arch/sparc/isa/formats/mem/swap.isa | 10 +++++----- src/cpu/base_dyn_inst.hh | 5 +++++ src/cpu/o3/lsq_unit_impl.hh | 5 +++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/arch/sparc/isa/formats/mem/swap.isa b/src/arch/sparc/isa/formats/mem/swap.isa index 818597a84..b71542a2b 100644 --- a/src/arch/sparc/isa/formats/mem/swap.isa +++ b/src/arch/sparc/isa/formats/mem/swap.isa @@ -137,7 +137,7 @@ def format Swap(code, postacc_code, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, '', name, Name, flags, - opt_flags, postacc_code) + ["IsStoreConditional"], postacc_code) }}; def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ @@ -148,7 +148,7 @@ def format SwapAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doMemFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; @@ -163,8 +163,8 @@ let {{ decode_block = BasicDecode.subst(iop) microParams = {"code": code, "postacc_code" : postacc_code, "ea_code" : addrCalcReg, "fault_check" : faultCode} - exec_output = doSplitExecute(execute, name, Name, asi, opt_flags, - microParams); + exec_output = doSplitExecute(execute, name, Name, asi, + ["IsStoreConditional"], microParams); return (header_output, decoder_output, exec_output, decode_block) }}; @@ -177,7 +177,7 @@ def format CasAlt(code, postacc_code, asi, mem_flags, *opt_flags) {{ decoder_output, exec_output, decode_block) = doCasFormat(code, SwapFuncs, AlternateASIPrivFaultCheck, - name, Name, flags, opt_flags, postacc_code) + name, Name, flags, ["IsStoreConditional"], postacc_code) }}; diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 6c6d90076..eed05c2f1 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -877,6 +877,11 @@ BaseDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) effAddrValid = true; physEffAddr = req->getPaddr(); memReqFlags = req->getFlags(); + + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } #if 0 if (cpu->system->memctrl->badaddr(physEffAddr)) { fault = TheISA::genMachineCheckFault(); diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index d558e2dfa..44e2cea76 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -647,7 +647,8 @@ LSQUnit::writebackStores() memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); - PacketPtr data_pkt = new Packet(req, MemCmd::WriteReq, + MemCmd command = req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq; + PacketPtr data_pkt = new Packet(req, command, Packet::Broadcast); data_pkt->dataStatic(inst->memData); @@ -664,7 +665,7 @@ LSQUnit::writebackStores() inst->seqNum); // @todo: Remove this SC hack once the memory system handles it. - if (req->isLocked()) { + if (inst->isStoreConditional()) { // Disable recording the result temporarily. Writing to // misc regs normally updates the result, but this is not // the desired behavior when handling store conditionals. From c7bb10688643b5e164d8ebb991f0504613e441d9 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Sun, 8 Apr 2007 23:31:11 +0000 Subject: [PATCH 2/5] Take into account that the flattened integer register space is a different size than the architected one. Also fixed some asserts. --HG-- extra : convert_revision : 26e7863919d1b976ba8cad747af475a6f18e9440 --- src/cpu/o3/regfile.hh | 14 +++++++------- src/cpu/o3/rename_impl.hh | 10 ++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index bbc69fc96..b5b1cd021 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -174,7 +174,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -189,7 +189,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -204,7 +204,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -217,7 +217,7 @@ class PhysRegFile // Remove the base Float reg dependency. reg_idx = reg_idx - numPhysicalIntRegs; - assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + assert(reg_idx < numPhysicalFloatRegs); DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n", int(reg_idx), (uint64_t)val); @@ -232,11 +232,11 @@ class PhysRegFile MiscReg readMiscReg(int misc_reg, unsigned thread_id) { - return miscRegs[thread_id].readReg(misc_reg, - cpu->tcBase(thread_id)); + return miscRegs[thread_id].readReg(misc_reg, cpu->tcBase(thread_id)); } - void setMiscRegNoEffect(int misc_reg, const MiscReg &val, unsigned thread_id) + void setMiscRegNoEffect(int misc_reg, + const MiscReg &val, unsigned thread_id) { miscRegs[thread_id].setRegNoEffect(misc_reg, val); } diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index c0d34116d..6b7fb1e08 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -996,7 +996,12 @@ DefaultRename::renameSrcRegs(DynInstPtr &inst,unsigned tid) if (src_reg < TheISA::FP_Base_DepTag) { flat_src_reg = TheISA::flattenIntIndex(inst->tcBase(), src_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)src_reg, (int)flat_src_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_src_reg = src_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } + inst->flattenSrcReg(src_idx, flat_src_reg); // Look up the source registers to get the phys. register they've @@ -1033,8 +1038,13 @@ DefaultRename::renameDestRegs(DynInstPtr &inst,unsigned tid) RegIndex dest_reg = inst->destRegIdx(dest_idx); RegIndex flat_dest_reg = dest_reg; if (dest_reg < TheISA::FP_Base_DepTag) { + // Integer registers are flattened. flat_dest_reg = TheISA::flattenIntIndex(inst->tcBase(), dest_reg); DPRINTF(Rename, "Flattening index %d to %d.\n", (int)dest_reg, (int)flat_dest_reg); + } else { + // Floating point and Miscellaneous registers need their indexes + // adjusted to account for the expanded number of flattened int regs. + flat_dest_reg = dest_reg - TheISA::FP_Base_DepTag + TheISA::NumIntRegs; } inst->flattenDestReg(dest_idx, flat_dest_reg); From d9da10199eaa878fe0f3071a5aec51a916773bc8 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 9 Apr 2007 03:40:21 -0400 Subject: [PATCH 3/5] Make SPARC build o3 by default. --HG-- extra : convert_revision : 7be49f9d395094d849d8da59b0fd4efe962fb201 --- build_opts/SPARC_SE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_opts/SPARC_SE b/build_opts/SPARC_SE index 62b6841ad..b288d3908 100644 --- a/build_opts/SPARC_SE +++ b/build_opts/SPARC_SE @@ -1,3 +1,3 @@ TARGET_ISA = 'sparc' -CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU' +CPU_MODELS = 'AtomicSimpleCPU,TimingSimpleCPU,O3CPU' FULL_SYSTEM = 0 From 7e1807acfb91d44352e81e6434202256090f3108 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 9 Apr 2007 03:52:15 -0400 Subject: [PATCH 4/5] Added SPARC simple timing regression for insttest --HG-- extra : convert_revision : cb490fd6f1be73b57dceb93edf83d7edc4a67beb --- .../ref/sparc/linux/simple-timing/config.ini | 187 +++++++++++++++ .../ref/sparc/linux/simple-timing/config.out | 178 +++++++++++++++ .../ref/sparc/linux/simple-timing/m5stats.txt | 215 ++++++++++++++++++ .../ref/sparc/linux/simple-timing/stderr | 4 + .../ref/sparc/linux/simple-timing/stdout | 24 ++ 5 files changed, 608 insertions(+) create mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini create mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out create mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt create mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr create mode 100644 tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini new file mode 100644 index 000000000..85d14933a --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.ini @@ -0,0 +1,187 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=TimingSimpleCPU +children=dcache icache l2cache toL2Bus workload +clock=1 +cpu_id=0 +defer_registration=false +function_trace=false +function_trace_start=0 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +phase=0 +progress_interval=0 +system=system +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +cwd= +egid=100 +env= +euid=100 +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out new file mode 100644 index 000000000..ec2d1886a --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/config.out @@ -0,0 +1,178 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +input=cin +output=cout +env= +cwd= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu] +type=TimingSimpleCPU +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +system=system +cpu_id=0 +workload=system.cpu.workload +clock=1 +phase=0 +defer_registration=false +// width not specified +function_trace=false +function_trace_start=0 +// simulate_stalls not specified + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt new file mode 100644 index 000000000..a4396b3da --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/m5stats.txt @@ -0,0 +1,215 @@ + +---------- Begin Simulation Statistics ---------- +host_inst_rate 39129 # Simulator instruction rate (inst/s) +host_mem_usage 153232 # Number of bytes of host memory used +host_seconds 0.28 # Real time elapsed on the host +host_tick_rate 6030675 # Simulator tick rate (ticks/s) +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 11001 # Number of instructions simulated +sim_seconds 0.000002 # Number of seconds simulated +sim_ticks 1698003 # Number of ticks simulated +system.cpu.dcache.ReadReq_accesses 1462 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 3977.759259 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 2977.759259 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 1408 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 214799 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.036936 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 54 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_miss_latency 160799 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.036936 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 54 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits +system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 3963.647727 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 2963.647727 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 1204 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 348801 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.068111 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 88 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_miss_latency 260801 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.068111 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 88 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 18.436620 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 2754 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 3969.014085 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency +system.cpu.dcache.demand_hits 2612 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 563600 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.051561 # miss rate for demand accesses +system.cpu.dcache.demand_misses 142 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 421600 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.051561 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 142 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 2754 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 3969.014085 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 2969.014085 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 2612 # number of overall hits +system.cpu.dcache.overall_miss_latency 563600 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.051561 # miss rate for overall accesses +system.cpu.dcache.overall_misses 142 # number of overall misses +system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 421600 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.051561 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 142 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.sampled_refs 142 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 86.872921 # Cycle average of tags in use +system.cpu.dcache.total_refs 2618 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.icache.ReadReq_accesses 11002 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 3961.367491 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 2961.367491 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 10719 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 1121067 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.025723 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 283 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_miss_latency 838067 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.025723 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 283 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.icache.avg_refs 37.876325 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 11002 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 3961.367491 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency +system.cpu.icache.demand_hits 10719 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 1121067 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.025723 # miss rate for demand accesses +system.cpu.icache.demand_misses 283 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 838067 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.025723 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 283 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 11002 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 3961.367491 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 2961.367491 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 10719 # number of overall hits +system.cpu.icache.overall_miss_latency 1121067 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.025723 # miss rate for overall accesses +system.cpu.icache.overall_misses 283 # number of overall misses +system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 838067 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.025723 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 283 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 0 # number of replacements +system.cpu.icache.sampled_refs 283 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 125.297191 # Cycle average of tags in use +system.cpu.icache.total_refs 10719 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idle_fraction 0 # Percentage of idle cycles +system.cpu.l2cache.ReadReq_accesses 423 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 2968.515366 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 1967.515366 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 1255682 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 423 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 832259 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 423 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 423 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 2968.515366 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 1255682 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 423 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 832259 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 423 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 423 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 2968.515366 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 1967.515366 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 0 # number of overall hits +system.cpu.l2cache.overall_miss_latency 1255682 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 423 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 832259 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 423 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 423 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 211.742547 # Cycle average of tags in use +system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles +system.cpu.numCycles 1698003 # number of cpu cycles simulated +system.cpu.num_insts 11001 # Number of instructions executed +system.cpu.num_refs 2760 # Number of memory references +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr new file mode 100644 index 000000000..fce46c90e --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stderr @@ -0,0 +1,4 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +0: system.remote_gdb.listener: listening for remote gdb on port 7000 +warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout new file mode 100644 index 000000000..100a1ebce --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/simple-timing/stdout @@ -0,0 +1,24 @@ +Begining test of difficult SPARC instructions... +LDSTUB: Passed +SWAP: Passed +CAS FAIL: Passed +CAS WORK: Passed +CASX FAIL: Passed +CASX WORK: Passed +LDTX: Passed +LDTW: Passed +STTW: Passed +Done +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Apr 8 2007 05:25:15 +M5 started Sun Apr 8 22:54:12 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/simple-timing tests/run.py quick/02.insttest/sparc/linux/simple-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1698003 because target called exit() From 97e8111b465c96e6e103fc6549f8921ca0f74da3 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 9 Apr 2007 03:59:56 -0400 Subject: [PATCH 5/5] Added SPARC o3 insttest regression. --HG-- extra : convert_revision : b008b4bda60ced329df9b700ff9b0a978e2d3b19 --- .../ref/sparc/linux/o3-timing/config.ini | 379 ++++++++++++++++ .../ref/sparc/linux/o3-timing/config.out | 367 ++++++++++++++++ .../ref/sparc/linux/o3-timing/m5stats.txt | 410 ++++++++++++++++++ .../ref/sparc/linux/o3-timing/stderr | 4 + .../ref/sparc/linux/o3-timing/stdout | 24 + 5 files changed, 1184 insertions(+) create mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini create mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out create mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt create mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr create mode 100644 tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini new file mode 100644 index 000000000..f804a40fe --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.ini @@ -0,0 +1,379 @@ +[root] +type=Root +children=system +dummy=0 + +[system] +type=System +children=cpu membus physmem +mem_mode=atomic +physmem=system.physmem + +[system.cpu] +type=DerivO3CPU +children=dcache fuPool icache l2cache toL2Bus workload +BTBEntries=4096 +BTBTagSize=16 +LFSTSize=1024 +LQEntries=32 +RASSize=16 +SQEntries=32 +SSITSize=1024 +activity=0 +backComSize=5 +choiceCtrBits=2 +choicePredictorSize=8192 +clock=1 +commitToDecodeDelay=1 +commitToFetchDelay=1 +commitToIEWDelay=1 +commitToRenameDelay=1 +commitWidth=8 +cpu_id=0 +decodeToFetchDelay=1 +decodeToRenameDelay=1 +decodeWidth=8 +defer_registration=false +dispatchWidth=8 +fetchToDecodeDelay=1 +fetchTrapLatency=1 +fetchWidth=8 +forwardComSize=5 +fuPool=system.cpu.fuPool +function_trace=false +function_trace_start=0 +globalCtrBits=2 +globalHistoryBits=13 +globalPredictorSize=8192 +iewToCommitDelay=1 +iewToDecodeDelay=1 +iewToFetchDelay=1 +iewToRenameDelay=1 +instShiftAmt=2 +issueToExecuteDelay=1 +issueWidth=8 +localCtrBits=2 +localHistoryBits=11 +localHistoryTableSize=2048 +localPredictorSize=2048 +max_insts_all_threads=0 +max_insts_any_thread=0 +max_loads_all_threads=0 +max_loads_any_thread=0 +numIQEntries=64 +numPhysFloatRegs=256 +numPhysIntRegs=256 +numROBEntries=192 +numRobs=1 +numThreads=1 +phase=0 +predType=tournament +progress_interval=0 +renameToDecodeDelay=1 +renameToFetchDelay=1 +renameToIEWDelay=2 +renameToROBDelay=1 +renameWidth=8 +squashWidth=8 +system=system +trapLatency=13 +wbDepth=1 +wbWidth=8 +workload=system.cpu.workload +dcache_port=system.cpu.dcache.cpu_side +icache_port=system.cpu.icache.cpu_side + +[system.cpu.dcache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=262144 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.dcache_port +mem_side=system.cpu.toL2Bus.port[1] + +[system.cpu.fuPool] +type=FUPool +children=FUList0 FUList1 FUList2 FUList3 FUList4 FUList5 FUList6 FUList7 +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu.fuPool.FUList0] +type=FUDesc +children=opList0 +count=6 +opList=system.cpu.fuPool.FUList0.opList0 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +issueLat=1 +opClass=IntAlu +opLat=1 + +[system.cpu.fuPool.FUList1] +type=FUDesc +children=opList0 opList1 +count=2 +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +issueLat=1 +opClass=IntMult +opLat=3 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +issueLat=19 +opClass=IntDiv +opLat=20 + +[system.cpu.fuPool.FUList2] +type=FUDesc +children=opList0 opList1 opList2 +count=4 +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +issueLat=1 +opClass=FloatAdd +opLat=2 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +issueLat=1 +opClass=FloatCmp +opLat=2 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +issueLat=1 +opClass=FloatCvt +opLat=2 + +[system.cpu.fuPool.FUList3] +type=FUDesc +children=opList0 opList1 opList2 +count=2 +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +issueLat=1 +opClass=FloatMult +opLat=4 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +issueLat=12 +opClass=FloatDiv +opLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +issueLat=24 +opClass=FloatSqrt +opLat=24 + +[system.cpu.fuPool.FUList4] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList4.opList0 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +children=opList0 +count=0 +opList=system.cpu.fuPool.FUList5.opList0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +children=opList0 opList1 +count=4 +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +issueLat=1 +opClass=MemRead +opLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +issueLat=1 +opClass=MemWrite +opLat=1 + +[system.cpu.fuPool.FUList7] +type=FUDesc +children=opList0 +count=1 +opList=system.cpu.fuPool.FUList7.opList0 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +issueLat=3 +opClass=IprAccess +opLat=3 + +[system.cpu.icache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=131072 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=20 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.icache_port +mem_side=system.cpu.toL2Bus.port[0] + +[system.cpu.l2cache] +type=BaseCache +adaptive_compression=false +assoc=2 +block_size=64 +compressed_bus=false +compression_latency=0 +hash_delay=1 +hit_latency=1 +latency=1 +lifo=false +max_miss_count=0 +mshrs=10 +prefetch_access=false +prefetch_cache_check_push=true +prefetch_data_accesses_only=false +prefetch_degree=1 +prefetch_latency=10 +prefetch_miss=false +prefetch_past_page=false +prefetch_policy=none +prefetch_serial_squash=false +prefetch_use_cpu_id=true +prefetcher_size=100 +prioritizeRequests=false +protocol=Null +repl=Null +size=2097152 +split=false +split_size=0 +store_compressed=false +subblock_size=0 +tgts_per_mshr=5 +trace_addr=0 +two_queue=false +write_buffers=8 +cpu_side=system.cpu.toL2Bus.port[2] +mem_side=system.membus.port[1] + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.cpu.icache.mem_side system.cpu.dcache.mem_side system.cpu.l2cache.cpu_side + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +cwd= +egid=100 +env= +euid=100 +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +gid=100 +input=cin +output=cout +pid=100 +ppid=99 +system=system +uid=100 + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +responder_set=false +width=64 +port=system.physmem.port system.cpu.l2cache.mem_side + +[system.physmem] +type=PhysicalMemory +file= +latency=1 +range=0:134217727 +zero=false +port=system.membus.port[0] + diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out new file mode 100644 index 000000000..d248f77bf --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/config.out @@ -0,0 +1,367 @@ +[root] +type=Root +dummy=0 + +[system.physmem] +type=PhysicalMemory +file= +range=[0,134217727] +latency=1 +zero=false + +[system] +type=System +physmem=system.physmem +mem_mode=atomic + +[system.membus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + +[system.cpu.workload] +type=LiveProcess +cmd=insttest +executable=tests/test-progs/insttest/bin/sparc/linux/insttest +input=cin +output=cout +env= +cwd= +system=system +uid=100 +euid=100 +gid=100 +egid=100 +pid=100 +ppid=99 + +[system.cpu.fuPool.FUList0.opList0] +type=OpDesc +opClass=IntAlu +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList0] +type=FUDesc +opList=system.cpu.fuPool.FUList0.opList0 +count=6 + +[system.cpu.fuPool.FUList1.opList0] +type=OpDesc +opClass=IntMult +opLat=3 +issueLat=1 + +[system.cpu.fuPool.FUList1.opList1] +type=OpDesc +opClass=IntDiv +opLat=20 +issueLat=19 + +[system.cpu.fuPool.FUList1] +type=FUDesc +opList=system.cpu.fuPool.FUList1.opList0 system.cpu.fuPool.FUList1.opList1 +count=2 + +[system.cpu.fuPool.FUList2.opList0] +type=OpDesc +opClass=FloatAdd +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList1] +type=OpDesc +opClass=FloatCmp +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2.opList2] +type=OpDesc +opClass=FloatCvt +opLat=2 +issueLat=1 + +[system.cpu.fuPool.FUList2] +type=FUDesc +opList=system.cpu.fuPool.FUList2.opList0 system.cpu.fuPool.FUList2.opList1 system.cpu.fuPool.FUList2.opList2 +count=4 + +[system.cpu.fuPool.FUList3.opList0] +type=OpDesc +opClass=FloatMult +opLat=4 +issueLat=1 + +[system.cpu.fuPool.FUList3.opList1] +type=OpDesc +opClass=FloatDiv +opLat=12 +issueLat=12 + +[system.cpu.fuPool.FUList3.opList2] +type=OpDesc +opClass=FloatSqrt +opLat=24 +issueLat=24 + +[system.cpu.fuPool.FUList3] +type=FUDesc +opList=system.cpu.fuPool.FUList3.opList0 system.cpu.fuPool.FUList3.opList1 system.cpu.fuPool.FUList3.opList2 +count=2 + +[system.cpu.fuPool.FUList4.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList4] +type=FUDesc +opList=system.cpu.fuPool.FUList4.opList0 +count=0 + +[system.cpu.fuPool.FUList5.opList0] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList5] +type=FUDesc +opList=system.cpu.fuPool.FUList5.opList0 +count=0 + +[system.cpu.fuPool.FUList6.opList0] +type=OpDesc +opClass=MemRead +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6.opList1] +type=OpDesc +opClass=MemWrite +opLat=1 +issueLat=1 + +[system.cpu.fuPool.FUList6] +type=FUDesc +opList=system.cpu.fuPool.FUList6.opList0 system.cpu.fuPool.FUList6.opList1 +count=4 + +[system.cpu.fuPool.FUList7.opList0] +type=OpDesc +opClass=IprAccess +opLat=3 +issueLat=3 + +[system.cpu.fuPool.FUList7] +type=FUDesc +opList=system.cpu.fuPool.FUList7.opList0 +count=1 + +[system.cpu.fuPool] +type=FUPool +FUList=system.cpu.fuPool.FUList0 system.cpu.fuPool.FUList1 system.cpu.fuPool.FUList2 system.cpu.fuPool.FUList3 system.cpu.fuPool.FUList4 system.cpu.fuPool.FUList5 system.cpu.fuPool.FUList6 system.cpu.fuPool.FUList7 + +[system.cpu] +type=DerivO3CPU +clock=1 +phase=0 +numThreads=1 +cpu_id=0 +activity=0 +workload=system.cpu.workload +checker=null +max_insts_any_thread=0 +max_insts_all_threads=0 +max_loads_any_thread=0 +max_loads_all_threads=0 +progress_interval=0 +cachePorts=200 +decodeToFetchDelay=1 +renameToFetchDelay=1 +iewToFetchDelay=1 +commitToFetchDelay=1 +fetchWidth=8 +renameToDecodeDelay=1 +iewToDecodeDelay=1 +commitToDecodeDelay=1 +fetchToDecodeDelay=1 +decodeWidth=8 +iewToRenameDelay=1 +commitToRenameDelay=1 +decodeToRenameDelay=1 +renameWidth=8 +commitToIEWDelay=1 +renameToIEWDelay=2 +issueToExecuteDelay=1 +dispatchWidth=8 +issueWidth=8 +wbWidth=8 +wbDepth=1 +fuPool=system.cpu.fuPool +iewToCommitDelay=1 +renameToROBDelay=1 +commitWidth=8 +squashWidth=8 +trapLatency=13 +backComSize=5 +forwardComSize=5 +predType=tournament +localPredictorSize=2048 +localCtrBits=2 +localHistoryTableSize=2048 +localHistoryBits=11 +globalPredictorSize=8192 +globalCtrBits=2 +globalHistoryBits=13 +choicePredictorSize=8192 +choiceCtrBits=2 +BTBEntries=4096 +BTBTagSize=16 +RASSize=16 +LQEntries=32 +SQEntries=32 +LFSTSize=1024 +SSITSize=1024 +numPhysIntRegs=256 +numPhysFloatRegs=256 +numIQEntries=64 +numROBEntries=192 +smtNumFetchingThreads=1 +smtFetchPolicy=SingleThread +smtLSQPolicy=Partitioned +smtLSQThreshold=100 +smtIQPolicy=Partitioned +smtIQThreshold=100 +smtROBPolicy=Partitioned +smtROBThreshold=100 +smtCommitPolicy=RoundRobin +instShiftAmt=2 +defer_registration=false +function_trace=false +function_trace_start=0 + +[system.cpu.icache] +type=BaseCache +size=131072 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.dcache] +type=BaseCache +size=262144 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=20 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.l2cache] +type=BaseCache +size=2097152 +assoc=2 +block_size=64 +latency=1 +mshrs=10 +tgts_per_mshr=5 +write_buffers=8 +prioritizeRequests=false +protocol=null +trace_addr=0 +hash_delay=1 +repl=null +compressed_bus=false +store_compressed=false +adaptive_compression=false +compression_latency=0 +block_size=64 +max_miss_count=0 +addr_range=[0,18446744073709551615] +split=false +split_size=0 +lifo=false +two_queue=false +prefetch_miss=false +prefetch_access=false +prefetcher_size=100 +prefetch_past_page=false +prefetch_serial_squash=false +prefetch_latency=10 +prefetch_degree=1 +prefetch_policy=none +prefetch_cache_check_push=true +prefetch_use_cpu_id=true +prefetch_data_accesses_only=false +hit_latency=1 + +[system.cpu.toL2Bus] +type=Bus +bus_id=0 +clock=1000 +width=64 +responder_set=false + diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt new file mode 100644 index 000000000..7c0d31494 --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/m5stats.txt @@ -0,0 +1,410 @@ + +---------- Begin Simulation Statistics ---------- +global.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly. +global.BPredUnit.BTBHits 2990 # Number of BTB hits +global.BPredUnit.BTBLookups 7055 # Number of BTB lookups +global.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions. +global.BPredUnit.condIncorrect 2077 # Number of conditional branches incorrect +global.BPredUnit.condPredicted 7846 # Number of conditional branches predicted +global.BPredUnit.lookups 7846 # Number of BP lookups +global.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target. +host_inst_rate 15119 # Simulator instruction rate (inst/s) +host_mem_usage 154868 # Number of bytes of host memory used +host_seconds 0.73 # Real time elapsed on the host +host_tick_rate 1956796 # Simulator tick rate (ticks/s) +memdepunit.memDep.conflictingLoads 12 # Number of conflicting loads. +memdepunit.memDep.conflictingStores 0 # Number of conflicting stores. +memdepunit.memDep.insertedLoads 3250 # Number of loads inserted to the mem dependence unit. +memdepunit.memDep.insertedStores 2817 # Number of stores inserted to the mem dependence unit. +sim_freq 1000000000000 # Frequency of simulated ticks +sim_insts 10976 # Number of instructions simulated +sim_seconds 0.000001 # Number of seconds simulated +sim_ticks 1421211 # Number of ticks simulated +system.cpu.commit.COM:branches 2152 # Number of branches committed +system.cpu.commit.COM:bw_lim_events 172 # number cycles where commit BW limit reached +system.cpu.commit.COM:bw_limited 0 # number of insts not committed due to BW limits +system.cpu.commit.COM:committed_per_cycle.start_dist # Number of insts commited each cycle +system.cpu.commit.COM:committed_per_cycle.samples 221349 +system.cpu.commit.COM:committed_per_cycle.min_value 0 + 0 215844 9751.30% + 1 2970 134.18% + 2 1290 58.28% + 3 631 28.51% + 4 208 9.40% + 5 90 4.07% + 6 133 6.01% + 7 11 0.50% + 8 172 7.77% +system.cpu.commit.COM:committed_per_cycle.max_value 8 +system.cpu.commit.COM:committed_per_cycle.end_dist + +system.cpu.commit.COM:count 10976 # Number of instructions committed +system.cpu.commit.COM:loads 1462 # Number of loads committed +system.cpu.commit.COM:membars 0 # Number of memory barriers committed +system.cpu.commit.COM:refs 2760 # Number of memory references committed +system.cpu.commit.COM:swp_count 0 # Number of s/w prefetches committed +system.cpu.commit.branchMispredicts 2077 # The number of times a branch was mispredicted +system.cpu.commit.commitCommittedInsts 10976 # The number of committed instructions +system.cpu.commit.commitNonSpecStalls 327 # The number of times commit has been forced to stall to communicate backwards +system.cpu.commit.commitSquashedInsts 14263 # The number of squashed insts skipped by commit +system.cpu.committedInsts 10976 # Number of Instructions Simulated +system.cpu.committedInsts_total 10976 # Number of Instructions Simulated +system.cpu.cpi 129.483509 # CPI: Cycles Per Instruction +system.cpu.cpi_total 129.483509 # CPI: Total CPI of All Threads +system.cpu.dcache.ReadReq_accesses 2737 # number of ReadReq accesses(hits+misses) +system.cpu.dcache.ReadReq_avg_miss_latency 6585.044776 # average ReadReq miss latency +system.cpu.dcache.ReadReq_avg_mshr_miss_latency 6511.939394 # average ReadReq mshr miss latency +system.cpu.dcache.ReadReq_hits 2603 # number of ReadReq hits +system.cpu.dcache.ReadReq_miss_latency 882396 # number of ReadReq miss cycles +system.cpu.dcache.ReadReq_miss_rate 0.048959 # miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_misses 134 # number of ReadReq misses +system.cpu.dcache.ReadReq_mshr_hits 68 # number of ReadReq MSHR hits +system.cpu.dcache.ReadReq_mshr_miss_latency 429788 # number of ReadReq MSHR miss cycles +system.cpu.dcache.ReadReq_mshr_miss_rate 0.024114 # mshr miss rate for ReadReq accesses +system.cpu.dcache.ReadReq_mshr_misses 66 # number of ReadReq MSHR misses +system.cpu.dcache.SwapReq_accesses 6 # number of SwapReq accesses(hits+misses) +system.cpu.dcache.SwapReq_hits 6 # number of SwapReq hits +system.cpu.dcache.WriteReq_accesses 1292 # number of WriteReq accesses(hits+misses) +system.cpu.dcache.WriteReq_avg_miss_latency 7960.583924 # average WriteReq miss latency +system.cpu.dcache.WriteReq_avg_mshr_miss_latency 7136.918605 # average WriteReq mshr miss latency +system.cpu.dcache.WriteReq_hits 869 # number of WriteReq hits +system.cpu.dcache.WriteReq_miss_latency 3367327 # number of WriteReq miss cycles +system.cpu.dcache.WriteReq_miss_rate 0.327399 # miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_misses 423 # number of WriteReq misses +system.cpu.dcache.WriteReq_mshr_hits 337 # number of WriteReq MSHR hits +system.cpu.dcache.WriteReq_mshr_miss_latency 613775 # number of WriteReq MSHR miss cycles +system.cpu.dcache.WriteReq_mshr_miss_rate 0.066563 # mshr miss rate for WriteReq accesses +system.cpu.dcache.WriteReq_mshr_misses 86 # number of WriteReq MSHR misses +system.cpu.dcache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.dcache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.dcache.avg_refs 22.881579 # Average number of references to valid blocks. +system.cpu.dcache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.dcache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.dcache.cache_copies 0 # number of cache copies performed +system.cpu.dcache.demand_accesses 4029 # number of demand (read+write) accesses +system.cpu.dcache.demand_avg_miss_latency 7629.664273 # average overall miss latency +system.cpu.dcache.demand_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency +system.cpu.dcache.demand_hits 3472 # number of demand (read+write) hits +system.cpu.dcache.demand_miss_latency 4249723 # number of demand (read+write) miss cycles +system.cpu.dcache.demand_miss_rate 0.138248 # miss rate for demand accesses +system.cpu.dcache.demand_misses 557 # number of demand (read+write) misses +system.cpu.dcache.demand_mshr_hits 405 # number of demand (read+write) MSHR hits +system.cpu.dcache.demand_mshr_miss_latency 1043563 # number of demand (read+write) MSHR miss cycles +system.cpu.dcache.demand_mshr_miss_rate 0.037726 # mshr miss rate for demand accesses +system.cpu.dcache.demand_mshr_misses 152 # number of demand (read+write) MSHR misses +system.cpu.dcache.fast_writes 0 # number of fast writes performed +system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.dcache.overall_accesses 4029 # number of overall (read+write) accesses +system.cpu.dcache.overall_avg_miss_latency 7629.664273 # average overall miss latency +system.cpu.dcache.overall_avg_mshr_miss_latency 6865.546053 # average overall mshr miss latency +system.cpu.dcache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.dcache.overall_hits 3472 # number of overall hits +system.cpu.dcache.overall_miss_latency 4249723 # number of overall miss cycles +system.cpu.dcache.overall_miss_rate 0.138248 # miss rate for overall accesses +system.cpu.dcache.overall_misses 557 # number of overall misses +system.cpu.dcache.overall_mshr_hits 405 # number of overall MSHR hits +system.cpu.dcache.overall_mshr_miss_latency 1043563 # number of overall MSHR miss cycles +system.cpu.dcache.overall_mshr_miss_rate 0.037726 # mshr miss rate for overall accesses +system.cpu.dcache.overall_mshr_misses 152 # number of overall MSHR misses +system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.dcache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.dcache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.dcache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.dcache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.dcache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.dcache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.dcache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.dcache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.dcache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.dcache.replacements 0 # number of replacements +system.cpu.dcache.sampled_refs 152 # Sample count of references to valid blocks. +system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.dcache.tagsinuse 90.938737 # Cycle average of tags in use +system.cpu.dcache.total_refs 3478 # Total number of references to valid blocks. +system.cpu.dcache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.dcache.writebacks 0 # number of writebacks +system.cpu.decode.DECODE:BlockedCycles 192719 # Number of cycles decode is blocked +system.cpu.decode.DECODE:DecodedInsts 39774 # Number of instructions handled by decode +system.cpu.decode.DECODE:IdleCycles 20128 # Number of cycles decode is idle +system.cpu.decode.DECODE:RunCycles 8238 # Number of cycles decode is running +system.cpu.decode.DECODE:SquashCycles 3162 # Number of cycles decode is squashing +system.cpu.decode.DECODE:UnblockCycles 264 # Number of cycles decode is unblocking +system.cpu.fetch.Branches 7846 # Number of branches that fetch encountered +system.cpu.fetch.CacheLines 5085 # Number of cache lines fetched +system.cpu.fetch.Cycles 14399 # Number of cycles fetch has run and was not squashing or blocked +system.cpu.fetch.IcacheSquashes 745 # Number of outstanding Icache misses that were squashed +system.cpu.fetch.Insts 43304 # Number of instructions fetch has processed +system.cpu.fetch.SquashCycles 2134 # Number of cycles fetch has spent squashing +system.cpu.fetch.branchRate 0.034947 # Number of branch fetches per cycle +system.cpu.fetch.icacheStallCycles 5085 # Number of cycles fetch is stalled on an Icache miss +system.cpu.fetch.predictedBranches 2990 # Number of branches that fetch has predicted taken +system.cpu.fetch.rate 0.192881 # Number of inst fetches per cycle +system.cpu.fetch.rateDist.start_dist # Number of instructions fetched each cycle (Total) +system.cpu.fetch.rateDist.samples 224511 +system.cpu.fetch.rateDist.min_value 0 + 0 215198 9585.19% + 1 2258 100.57% + 2 627 27.93% + 3 958 42.67% + 4 553 24.63% + 5 816 36.35% + 6 951 42.36% + 7 280 12.47% + 8 2870 127.83% +system.cpu.fetch.rateDist.max_value 8 +system.cpu.fetch.rateDist.end_dist + +system.cpu.icache.ReadReq_accesses 5085 # number of ReadReq accesses(hits+misses) +system.cpu.icache.ReadReq_avg_miss_latency 5148.266776 # average ReadReq miss latency +system.cpu.icache.ReadReq_avg_mshr_miss_latency 4502.972752 # average ReadReq mshr miss latency +system.cpu.icache.ReadReq_hits 4474 # number of ReadReq hits +system.cpu.icache.ReadReq_miss_latency 3145591 # number of ReadReq miss cycles +system.cpu.icache.ReadReq_miss_rate 0.120157 # miss rate for ReadReq accesses +system.cpu.icache.ReadReq_misses 611 # number of ReadReq misses +system.cpu.icache.ReadReq_mshr_hits 244 # number of ReadReq MSHR hits +system.cpu.icache.ReadReq_mshr_miss_latency 1652591 # number of ReadReq MSHR miss cycles +system.cpu.icache.ReadReq_mshr_miss_rate 0.072173 # mshr miss rate for ReadReq accesses +system.cpu.icache.ReadReq_mshr_misses 367 # number of ReadReq MSHR misses +system.cpu.icache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.icache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.icache.avg_refs 12.325069 # Average number of references to valid blocks. +system.cpu.icache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.icache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.icache.cache_copies 0 # number of cache copies performed +system.cpu.icache.demand_accesses 5085 # number of demand (read+write) accesses +system.cpu.icache.demand_avg_miss_latency 5148.266776 # average overall miss latency +system.cpu.icache.demand_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.demand_hits 4474 # number of demand (read+write) hits +system.cpu.icache.demand_miss_latency 3145591 # number of demand (read+write) miss cycles +system.cpu.icache.demand_miss_rate 0.120157 # miss rate for demand accesses +system.cpu.icache.demand_misses 611 # number of demand (read+write) misses +system.cpu.icache.demand_mshr_hits 244 # number of demand (read+write) MSHR hits +system.cpu.icache.demand_mshr_miss_latency 1652591 # number of demand (read+write) MSHR miss cycles +system.cpu.icache.demand_mshr_miss_rate 0.072173 # mshr miss rate for demand accesses +system.cpu.icache.demand_mshr_misses 367 # number of demand (read+write) MSHR misses +system.cpu.icache.fast_writes 0 # number of fast writes performed +system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.icache.overall_accesses 5085 # number of overall (read+write) accesses +system.cpu.icache.overall_avg_miss_latency 5148.266776 # average overall miss latency +system.cpu.icache.overall_avg_mshr_miss_latency 4502.972752 # average overall mshr miss latency +system.cpu.icache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.icache.overall_hits 4474 # number of overall hits +system.cpu.icache.overall_miss_latency 3145591 # number of overall miss cycles +system.cpu.icache.overall_miss_rate 0.120157 # miss rate for overall accesses +system.cpu.icache.overall_misses 611 # number of overall misses +system.cpu.icache.overall_mshr_hits 244 # number of overall MSHR hits +system.cpu.icache.overall_mshr_miss_latency 1652591 # number of overall MSHR miss cycles +system.cpu.icache.overall_mshr_miss_rate 0.072173 # mshr miss rate for overall accesses +system.cpu.icache.overall_mshr_misses 367 # number of overall MSHR misses +system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.icache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.icache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.icache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.icache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.icache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.icache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.icache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.icache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.icache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.icache.replacements 1 # number of replacements +system.cpu.icache.sampled_refs 363 # Sample count of references to valid blocks. +system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.icache.tagsinuse 172.869174 # Cycle average of tags in use +system.cpu.icache.total_refs 4474 # Total number of references to valid blocks. +system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.icache.writebacks 0 # number of writebacks +system.cpu.idleCycles 1196701 # Total number of cycles that the CPU has spent unscheduled due to idling +system.cpu.iew.EXEC:branches 3576 # Number of branches executed +system.cpu.iew.EXEC:nop 0 # number of nop insts executed +system.cpu.iew.EXEC:rate 0.092548 # Inst execution rate +system.cpu.iew.EXEC:refs 5257 # number of memory reference insts executed +system.cpu.iew.EXEC:stores 2386 # Number of stores executed +system.cpu.iew.EXEC:swp 0 # number of swp insts executed +system.cpu.iew.WB:consumers 9737 # num instructions consuming a value +system.cpu.iew.WB:count 19769 # cumulative count of insts written-back +system.cpu.iew.WB:fanout 0.790901 # average fanout of values written-back +system.cpu.iew.WB:penalized 0 # number of instrctions required to write to 'other' IQ +system.cpu.iew.WB:penalized_rate 0 # fraction of instructions written-back that wrote to 'other' IQ +system.cpu.iew.WB:producers 7701 # num instructions producing a value +system.cpu.iew.WB:rate 0.088054 # insts written-back per cycle +system.cpu.iew.WB:sent 20061 # cumulative count of insts sent to commit +system.cpu.iew.branchMispredicts 2593 # Number of branch mispredicts detected at execute +system.cpu.iew.iewBlockCycles 476 # Number of cycles IEW is blocking +system.cpu.iew.iewDispLoadInsts 3250 # Number of dispatched load instructions +system.cpu.iew.iewDispNonSpecInsts 617 # Number of dispatched non-speculative instructions +system.cpu.iew.iewDispSquashedInsts 2705 # Number of squashed instructions skipped by dispatch +system.cpu.iew.iewDispStoreInsts 2817 # Number of dispatched store instructions +system.cpu.iew.iewDispatchedInsts 25240 # Number of instructions dispatched to IQ +system.cpu.iew.iewExecLoadInsts 2871 # Number of load instructions executed +system.cpu.iew.iewExecSquashedInsts 1780 # Number of squashed instructions skipped in execute +system.cpu.iew.iewExecutedInsts 20778 # Number of executed instructions +system.cpu.iew.iewIQFullEvents 7 # Number of times the IQ has become full, causing a stall +system.cpu.iew.iewIdleCycles 0 # Number of cycles IEW is idle +system.cpu.iew.iewLSQFullEvents 0 # Number of times the LSQ has become full, causing a stall +system.cpu.iew.iewSquashCycles 3162 # Number of cycles IEW is squashing +system.cpu.iew.iewUnblockCycles 35 # Number of cycles IEW is unblocking +system.cpu.iew.lsq.thread.0.blockedLoads 0 # Number of blocked loads due to partial load-store forwarding +system.cpu.iew.lsq.thread.0.cacheBlocked 0 # Number of times an access to memory failed due to the cache being blocked +system.cpu.iew.lsq.thread.0.forwLoads 39 # Number of loads that had data forwarded from stores +system.cpu.iew.lsq.thread.0.ignoredResponses 5 # Number of memory responses ignored because the instruction is squashed +system.cpu.iew.lsq.thread.0.invAddrLoads 0 # Number of loads ignored due to an invalid address +system.cpu.iew.lsq.thread.0.invAddrSwpfs 0 # Number of software prefetches ignored due to an invalid address +system.cpu.iew.lsq.thread.0.memOrderViolation 54 # Number of memory ordering violations +system.cpu.iew.lsq.thread.0.rescheduledLoads 0 # Number of loads that were rescheduled +system.cpu.iew.lsq.thread.0.squashedLoads 1788 # Number of loads squashed +system.cpu.iew.lsq.thread.0.squashedStores 1519 # Number of stores squashed +system.cpu.iew.memOrderViolationEvents 54 # Number of memory order violations +system.cpu.iew.predictedNotTakenIncorrect 962 # Number of branches that were predicted not taken incorrectly +system.cpu.iew.predictedTakenIncorrect 1631 # Number of branches that were predicted taken incorrectly +system.cpu.ipc 0.007723 # IPC: Instructions Per Cycle +system.cpu.ipc_total 0.007723 # IPC: Total IPC of All Threads +system.cpu.iq.ISSUE:FU_type_0 22558 # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.start_dist + (null) 1831 8.12% # Type of FU issued + IntAlu 15054 66.73% # Type of FU issued + IntMult 0 0.00% # Type of FU issued + IntDiv 0 0.00% # Type of FU issued + FloatAdd 0 0.00% # Type of FU issued + FloatCmp 0 0.00% # Type of FU issued + FloatCvt 0 0.00% # Type of FU issued + FloatMult 0 0.00% # Type of FU issued + FloatDiv 0 0.00% # Type of FU issued + FloatSqrt 0 0.00% # Type of FU issued + MemRead 3091 13.70% # Type of FU issued + MemWrite 2582 11.45% # Type of FU issued + IprAccess 0 0.00% # Type of FU issued + InstPrefetch 0 0.00% # Type of FU issued +system.cpu.iq.ISSUE:FU_type_0.end_dist +system.cpu.iq.ISSUE:fu_busy_cnt 162 # FU busy when requested +system.cpu.iq.ISSUE:fu_busy_rate 0.007181 # FU busy rate (busy events/executed inst) +system.cpu.iq.ISSUE:fu_full.start_dist + (null) 0 0.00% # attempts to use FU when none available + IntAlu 42 25.93% # attempts to use FU when none available + IntMult 0 0.00% # attempts to use FU when none available + IntDiv 0 0.00% # attempts to use FU when none available + FloatAdd 0 0.00% # attempts to use FU when none available + FloatCmp 0 0.00% # attempts to use FU when none available + FloatCvt 0 0.00% # attempts to use FU when none available + FloatMult 0 0.00% # attempts to use FU when none available + FloatDiv 0 0.00% # attempts to use FU when none available + FloatSqrt 0 0.00% # attempts to use FU when none available + MemRead 14 8.64% # attempts to use FU when none available + MemWrite 106 65.43% # attempts to use FU when none available + IprAccess 0 0.00% # attempts to use FU when none available + InstPrefetch 0 0.00% # attempts to use FU when none available +system.cpu.iq.ISSUE:fu_full.end_dist +system.cpu.iq.ISSUE:issued_per_cycle.start_dist # Number of insts issued each cycle +system.cpu.iq.ISSUE:issued_per_cycle.samples 224511 +system.cpu.iq.ISSUE:issued_per_cycle.min_value 0 + 0 215315 9590.40% + 1 4124 183.69% + 2 1297 57.77% + 3 1306 58.17% + 4 1190 53.00% + 5 707 31.49% + 6 433 19.29% + 7 83 3.70% + 8 56 2.49% +system.cpu.iq.ISSUE:issued_per_cycle.max_value 8 +system.cpu.iq.ISSUE:issued_per_cycle.end_dist + +system.cpu.iq.ISSUE:rate 0.100476 # Inst issue rate +system.cpu.iq.iqInstsAdded 24623 # Number of instructions added to the IQ (excludes non-spec) +system.cpu.iq.iqInstsIssued 22558 # Number of instructions issued +system.cpu.iq.iqNonSpecInstsAdded 617 # Number of non-speculative instructions added to the IQ +system.cpu.iq.iqSquashedInstsExamined 11469 # Number of squashed instructions iterated over during squash; mainly for profiling +system.cpu.iq.iqSquashedInstsIssued 174 # Number of squashed instructions issued +system.cpu.iq.iqSquashedNonSpecRemoved 290 # Number of squashed non-spec instructions that were removed +system.cpu.iq.iqSquashedOperandsExamined 5834 # Number of squashed operands that are examined and possibly removed from graph +system.cpu.l2cache.ReadReq_accesses 513 # number of ReadReq accesses(hits+misses) +system.cpu.l2cache.ReadReq_avg_miss_latency 4754.779727 # average ReadReq miss latency +system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 2343.506823 # average ReadReq mshr miss latency +system.cpu.l2cache.ReadReq_miss_latency 2439202 # number of ReadReq miss cycles +system.cpu.l2cache.ReadReq_miss_rate 1 # miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_misses 513 # number of ReadReq misses +system.cpu.l2cache.ReadReq_mshr_miss_latency 1202219 # number of ReadReq MSHR miss cycles +system.cpu.l2cache.ReadReq_mshr_miss_rate 1 # mshr miss rate for ReadReq accesses +system.cpu.l2cache.ReadReq_mshr_misses 513 # number of ReadReq MSHR misses +system.cpu.l2cache.avg_blocked_cycles_no_mshrs # average number of cycles each access was blocked +system.cpu.l2cache.avg_blocked_cycles_no_targets # average number of cycles each access was blocked +system.cpu.l2cache.avg_refs 0 # Average number of references to valid blocks. +system.cpu.l2cache.blocked_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_mshrs 0 # number of cycles access was blocked +system.cpu.l2cache.blocked_cycles_no_targets 0 # number of cycles access was blocked +system.cpu.l2cache.cache_copies 0 # number of cache copies performed +system.cpu.l2cache.demand_accesses 513 # number of demand (read+write) accesses +system.cpu.l2cache.demand_avg_miss_latency 4754.779727 # average overall miss latency +system.cpu.l2cache.demand_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.demand_hits 0 # number of demand (read+write) hits +system.cpu.l2cache.demand_miss_latency 2439202 # number of demand (read+write) miss cycles +system.cpu.l2cache.demand_miss_rate 1 # miss rate for demand accesses +system.cpu.l2cache.demand_misses 513 # number of demand (read+write) misses +system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits +system.cpu.l2cache.demand_mshr_miss_latency 1202219 # number of demand (read+write) MSHR miss cycles +system.cpu.l2cache.demand_mshr_miss_rate 1 # mshr miss rate for demand accesses +system.cpu.l2cache.demand_mshr_misses 513 # number of demand (read+write) MSHR misses +system.cpu.l2cache.fast_writes 0 # number of fast writes performed +system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated +system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate +system.cpu.l2cache.overall_accesses 513 # number of overall (read+write) accesses +system.cpu.l2cache.overall_avg_miss_latency 4754.779727 # average overall miss latency +system.cpu.l2cache.overall_avg_mshr_miss_latency 2343.506823 # average overall mshr miss latency +system.cpu.l2cache.overall_avg_mshr_uncacheable_latency # average overall mshr uncacheable latency +system.cpu.l2cache.overall_hits 0 # number of overall hits +system.cpu.l2cache.overall_miss_latency 2439202 # number of overall miss cycles +system.cpu.l2cache.overall_miss_rate 1 # miss rate for overall accesses +system.cpu.l2cache.overall_misses 513 # number of overall misses +system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits +system.cpu.l2cache.overall_mshr_miss_latency 1202219 # number of overall MSHR miss cycles +system.cpu.l2cache.overall_mshr_miss_rate 1 # mshr miss rate for overall accesses +system.cpu.l2cache.overall_mshr_misses 513 # number of overall MSHR misses +system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles +system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses +system.cpu.l2cache.prefetcher.num_hwpf_already_in_cache 0 # number of hwpf that were already in the cache +system.cpu.l2cache.prefetcher.num_hwpf_already_in_mshr 0 # number of hwpf that were already in mshr +system.cpu.l2cache.prefetcher.num_hwpf_already_in_prefetcher 0 # number of hwpf that were already in the prefetch queue +system.cpu.l2cache.prefetcher.num_hwpf_evicted 0 # number of hwpf removed due to no buffer left +system.cpu.l2cache.prefetcher.num_hwpf_identified 0 # number of hwpf identified +system.cpu.l2cache.prefetcher.num_hwpf_issued 0 # number of hwpf issued +system.cpu.l2cache.prefetcher.num_hwpf_removed_MSHR_hit 0 # number of hwpf removed because MSHR allocated +system.cpu.l2cache.prefetcher.num_hwpf_span_page 0 # number of hwpf spanning a virtual page +system.cpu.l2cache.prefetcher.num_hwpf_squashed_from_miss 0 # number of hwpf that got squashed due to a miss aborting calculation time +system.cpu.l2cache.replacements 0 # number of replacements +system.cpu.l2cache.sampled_refs 512 # Sample count of references to valid blocks. +system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions +system.cpu.l2cache.tagsinuse 262.946375 # Cycle average of tags in use +system.cpu.l2cache.total_refs 0 # Total number of references to valid blocks. +system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit. +system.cpu.l2cache.writebacks 0 # number of writebacks +system.cpu.numCycles 224511 # number of cpu cycles simulated +system.cpu.rename.RENAME:BlockCycles 960 # Number of cycles rename is blocking +system.cpu.rename.RENAME:CommittedMaps 9868 # Number of HB maps that are committed +system.cpu.rename.RENAME:IQFullEvents 2 # Number of times rename has blocked due to IQ full +system.cpu.rename.RENAME:IdleCycles 20098 # Number of cycles rename is idle +system.cpu.rename.RENAME:LSQFullEvents 481 # Number of times rename has blocked due to LSQ full +system.cpu.rename.RENAME:ROBFullEvents 4 # Number of times rename has blocked due to ROB full +system.cpu.rename.RENAME:RenameLookups 46931 # Number of register rename lookups that rename has made +system.cpu.rename.RENAME:RenamedInsts 31260 # Number of instructions processed by rename +system.cpu.rename.RENAME:RenamedOperands 25831 # Number of destination operands rename has renamed +system.cpu.rename.RENAME:RunCycles 7921 # Number of cycles rename is running +system.cpu.rename.RENAME:SquashCycles 3162 # Number of cycles rename is squashing +system.cpu.rename.RENAME:SquashedInsts 8042 # Number of squashed instructions processed by rename +system.cpu.rename.RENAME:UnblockCycles 1212 # Number of cycles rename is unblocking +system.cpu.rename.RENAME:UndoneMaps 15963 # Number of HB maps that are undone due to squashing +system.cpu.rename.RENAME:serializeStallCycles 190573 # count of cycles rename stalled for serializing inst +system.cpu.rename.RENAME:serializingInsts 638 # count of serializing insts renamed +system.cpu.rename.RENAME:skidInsts 5594 # count of insts added to the skid buffer +system.cpu.rename.RENAME:tempSerializingInsts 629 # count of temporary serializing insts renamed +system.cpu.timesIdled 289 # Number of times that the entire CPU went into an idle state and unscheduled itself +system.cpu.workload.PROG:num_syscalls 8 # Number of system calls + +---------- End Simulation Statistics ---------- diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr new file mode 100644 index 000000000..48affb0e2 --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stderr @@ -0,0 +1,4 @@ +warn: More than two loadable segments in ELF object. +warn: Ignoring segment @ 0x0 length 0x0. +0: system.remote_gdb.listener: listening for remote gdb on port 7003 +warn: Entering event queue @ 0. Starting simulation... diff --git a/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout new file mode 100644 index 000000000..6cba2ba7e --- /dev/null +++ b/tests/quick/02.insttest/ref/sparc/linux/o3-timing/stdout @@ -0,0 +1,24 @@ +Begining test of difficult SPARC instructions... +LDSTUB: Passed +SWAP: Passed +CAS FAIL: Passed +CAS WORK: Passed +CASX FAIL: Passed +CASX WORK: Passed +LDTX: Passed +LDTW: Passed +STTW: Passed +Done +M5 Simulator System + +Copyright (c) 2001-2006 +The Regents of The University of Michigan +All Rights Reserved + + +M5 compiled Apr 9 2007 03:06:26 +M5 started Mon Apr 9 03:06:54 2007 +M5 executing on zizzer.eecs.umich.edu +command line: build/SPARC_SE/m5.fast -d build/SPARC_SE/tests/fast/quick/02.insttest/sparc/linux/o3-timing tests/run.py quick/02.insttest/sparc/linux/o3-timing +Global frequency set at 1000000000000 ticks per second +Exiting @ tick 1421211 because target called exit()