From e88e7d88b9a9876ee040dad96acf3deabebe1fa7 Mon Sep 17 00:00:00 2001 From: Matt Horsnell Date: Fri, 15 Feb 2013 17:40:09 -0500 Subject: [PATCH] o3: fix tick used for renaming and issue with range selection Fixes the tick used from rename: - previously this gathered the tick on leaving rename which was always 1 less than the dispatch. This conflated the decode ticks when back pressure built in the pipeline. - now picks up tick on entry. Added --store_completions flag: - will additionally display the store completion tail in the viewer. - this highlights periods when large numbers of stores are outstanding (>16 LSQ blocking) Allows selection by tick range (previously this caused an infinite loop) --- src/cpu/o3/commit_impl.hh | 5 ++- src/cpu/o3/decode_impl.hh | 5 ++- src/cpu/o3/dyn_inst.hh | 1 + src/cpu/o3/dyn_inst_impl.hh | 50 ++++++++++++++--------- src/cpu/o3/fetch_impl.hh | 5 ++- src/cpu/o3/iew_impl.hh | 5 ++- src/cpu/o3/lsq_unit_impl.hh | 8 ++++ src/cpu/o3/rename_impl.hh | 9 +++-- util/o3-pipeview.py | 81 +++++++++++++++++++++++-------------- 9 files changed, 111 insertions(+), 58 deletions(-) diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 776968f8c..c7b255123 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -60,6 +60,7 @@ #include "debug/CommitRate.hh" #include "debug/Drain.hh" #include "debug/ExecFaulting.hh" +#include "debug/O3PipeView.hh" #include "params/DerivO3CPU.hh" #include "sim/faults.hh" #include "sim/full_system.hh" @@ -1267,7 +1268,9 @@ DefaultCommit::commitHead(DynInstPtr &head_inst, unsigned inst_num) rob->retireHead(tid); #if TRACING_ON - head_inst->commitTick = curTick() - head_inst->fetchTick; + if (DTRACE(O3PipeView)) { + head_inst->commitTick = curTick() - head_inst->fetchTick; + } #endif // If this was a store, record it for this cycle. diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index f87ec0bfa..9ab672931 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -47,6 +47,7 @@ #include "cpu/inst_seq.hh" #include "debug/Activity.hh" #include "debug/Decode.hh" +#include "debug/O3PipeView.hh" #include "params/DerivO3CPU.hh" #include "sim/full_system.hh" @@ -714,7 +715,9 @@ DefaultDecode::decodeInsts(ThreadID tid) --insts_available; #if TRACING_ON - inst->decodeTick = curTick() - inst->fetchTick; + if (DTRACE(O3PipeView)) { + inst->decodeTick = curTick() - inst->fetchTick; + } #endif // Ensure that if it was predicted as a branch, it really is a diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index de50bbda9..c8cdf7a1f 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -132,6 +132,7 @@ class BaseO3DynInst : public BaseDynInst int32_t issueTick; int32_t completeTick; int32_t commitTick; + int32_t storeTick; #endif /** Reads a misc. register, including any side-effects the read diff --git a/src/cpu/o3/dyn_inst_impl.hh b/src/cpu/o3/dyn_inst_impl.hh index e37caf7f0..347d30324 100644 --- a/src/cpu/o3/dyn_inst_impl.hh +++ b/src/cpu/o3/dyn_inst_impl.hh @@ -66,26 +66,35 @@ BaseO3DynInst::BaseO3DynInst(StaticInstPtr _staticInst, template BaseO3DynInst::~BaseO3DynInst() { #if TRACING_ON - Tick val, fetch = this->fetchTick; - // Print info needed by the pipeline activity viewer. - DPRINTFR(O3PipeView, "O3PipeView:fetch:%llu:0x%08llx:%d:%llu:%s\n", - fetch, - this->instAddr(), - this->microPC(), - this->seqNum, - this->staticInst->disassemble(this->instAddr())); - val = (this->decodeTick == -1) ? 0 : fetch + this->decodeTick; - DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", val); - val = (this->renameTick == -1) ? 0 : fetch + this->renameTick; - DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", val); - val = (this->dispatchTick == -1) ? 0 : fetch + this->dispatchTick; - DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", val); - val = (this->issueTick == -1) ? 0 : fetch + this->issueTick; - DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", val); - val = (this->completeTick == -1) ? 0 : fetch + this->completeTick; - DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", val); - val = (this->commitTick == -1) ? 0 : fetch + this->commitTick; - DPRINTFR(O3PipeView, "O3PipeView:retire:%llu\n", val); + if (DTRACE(O3PipeView)) { + Tick fetch = this->fetchTick; + // fetchTick can be -1 if the instruction fetched outside the trace window. + if (fetch != -1) { + Tick val; + // Print info needed by the pipeline activity viewer. + DPRINTFR(O3PipeView, "O3PipeView:fetch:%llu:0x%08llx:%d:%llu:%s\n", + fetch, + this->instAddr(), + this->microPC(), + this->seqNum, + this->staticInst->disassemble(this->instAddr())); + + val = (this->decodeTick == -1) ? 0 : fetch + this->decodeTick; + DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", val); + val = (this->renameTick == -1) ? 0 : fetch + this->renameTick; + DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", val); + val = (this->dispatchTick == -1) ? 0 : fetch + this->dispatchTick; + DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", val); + val = (this->issueTick == -1) ? 0 : fetch + this->issueTick; + DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", val); + val = (this->completeTick == -1) ? 0 : fetch + this->completeTick; + DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", val); + val = (this->commitTick == -1) ? 0 : fetch + this->commitTick; + + Tick valS = (this->storeTick == -1) ? 0 : fetch + this->storeTick; + DPRINTFR(O3PipeView, "O3PipeView:retire:%llu:store:%llu\n", val, valS); + } + } #endif }; @@ -119,6 +128,7 @@ BaseO3DynInst::initVars() issueTick = -1; completeTick = -1; commitTick = -1; + storeTick = -1; #endif } diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index 07033fc8a..e0eb10334 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -60,6 +60,7 @@ #include "debug/Activity.hh" #include "debug/Drain.hh" #include "debug/Fetch.hh" +#include "debug/O3PipeView.hh" #include "mem/packet.hh" #include "params/DerivO3CPU.hh" #include "sim/byteswap.hh" @@ -1313,7 +1314,9 @@ DefaultFetch::fetch(bool &status_change) numInst++; #if TRACING_ON - instruction->fetchTick = curTick(); + if (DTRACE(O3PipeView)) { + instruction->fetchTick = curTick(); + } #endif nextPC = thisPC; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index 4b4f66a1f..5bd5f6ae9 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -56,6 +56,7 @@ #include "debug/Decode.hh" #include "debug/Drain.hh" #include "debug/IEW.hh" +#include "debug/O3PipeView.hh" #include "params/DerivO3CPU.hh" using namespace std; @@ -1611,7 +1612,9 @@ DefaultIEW::updateExeInstStats(DynInstPtr &inst) iewExecutedInsts++; #if TRACING_ON - inst->completeTick = curTick() - inst->fetchTick; + if (DTRACE(O3PipeView)) { + inst->completeTick = curTick() - inst->fetchTick; + } #endif // diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index a4cb56767..f0b27ba41 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -51,6 +51,7 @@ #include "debug/Activity.hh" #include "debug/IEW.hh" #include "debug/LSQUnit.hh" +#include "debug/O3PipeView.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -1137,6 +1138,13 @@ LSQUnit::completeStore(int store_idx) "idx:%i\n", storeQueue[store_idx].inst->seqNum, store_idx, storeHead); +#if TRACING_ON + if (DTRACE(O3PipeView)) { + storeQueue[store_idx].inst->storeTick = + curTick() - storeQueue[store_idx].inst->fetchTick; + } +#endif + if (isStalled() && storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { DPRINTF(LSQUnit, "Unstalling, stalling store [sn:%lli] " diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index 15a4ebc13..e27c66dcd 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -49,6 +49,7 @@ #include "cpu/o3/rename.hh" #include "debug/Activity.hh" #include "debug/Rename.hh" +#include "debug/O3PipeView.hh" #include "params/DerivO3CPU.hh" using namespace std; @@ -658,9 +659,6 @@ DefaultRename::renameInsts(ThreadID tid) ++renamed_insts; -#if TRACING_ON - inst->renameTick = curTick() - inst->fetchTick; -#endif // Put instruction in rename queue. toIEW->insts[toIEWIndex] = inst; @@ -736,6 +734,11 @@ DefaultRename::sortInsts() for (int i = 0; i < insts_from_decode; ++i) { DynInstPtr inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); +#if TRACING_ON + if (DTRACE(O3PipeView)) { + inst->renameTick = curTick() - inst->fetchTick; + } +#endif } } diff --git a/util/o3-pipeview.py b/util/o3-pipeview.py index 897e4a307..4d3a59a29 100755 --- a/util/o3-pipeview.py +++ b/util/o3-pipeview.py @@ -64,11 +64,11 @@ insts = { # out of order for more then 2000 CPU ticks, # otherwise the print may not start/stop # at the time specified by tick_start/stop. - 'only_committed':0 # Set if only committed instructions are printed. + 'only_committed':0, # Set if only committed instructions are printed. } def process_trace(trace, outfile, cycle_time, width, color, timestamps, - committed_only, start_tick, stop_tick, start_sn, stop_sn): + committed_only, store_completions, start_tick, stop_tick, start_sn, stop_sn): global insts insts['sn_start'] = start_sn @@ -80,31 +80,25 @@ def process_trace(trace, outfile, cycle_time, width, color, timestamps, line = None fields = None - # Read the first line - line = trace.readline() - if not line: return - fields = line.split(':') - # Skip lines up to the starting tick if start_tick != 0: while True: - if fields[0] != 'O3PipeView': continue - if (int(fields[2]) > 0 and - int(fields[2]) >= start_tick-insts['tick_drift']): break line = trace.readline() if not line: return fields = line.split(':') - - # Skip lines up to the starting sequence number - if start_sn != 0: + if fields[0] != 'O3PipeView': continue + if int(fields[2]) >= start_tick: break + elif start_sn != 0: while True: - if fields[0] != 'O3PipeView': continue - if (fields[1] == 'fetch' and - int(fields[5]) >= (start_sn-insts['max_threshold'])): - break line = trace.readline() if not line: return fields = line.split(':') + if fields[0] != 'O3PipeView': continue + if fields[1] == 'fetch' and int(fields[5]) >= start_sn: break + else: + line = trace.readline() + if not line: return + fields = line.split(':') # Skip lines up to next instruction fetch while fields[0] != 'O3PipeView' or fields[1] != 'fetch': @@ -114,12 +108,17 @@ def process_trace(trace, outfile, cycle_time, width, color, timestamps, # Print header outfile.write('// f = fetch, d = decode, n = rename, p = dispatch, ' - 'i = issue, c = complete, r = retire\n\n') + 'i = issue, c = complete, r = retire') + + if store_completions: + outfile.write(', s = store-complete') + outfile.write('\n\n') + outfile.write(' ' + 'timeline'.center(width) + ' ' + 'tick'.center(15) + ' ' + 'pc.upc'.center(12) + ' ' + 'disasm'.ljust(25) + - ' ' + 'seq_num'.center(15)) + ' ' + 'seq_num'.center(10)) if timestamps: outfile.write('timestamps'.center(25)) outfile.write('\n') @@ -138,9 +137,16 @@ def process_trace(trace, outfile, cycle_time, width, color, timestamps, curr_inst['sn'] = int(fields[5]) curr_inst['disasm'] = ' '.join(fields[6][:-1].split()) elif fields[1] == 'retire': - queue_inst(outfile, curr_inst, cycle_time, width, color, timestamps) + if curr_inst['retire'] == 0: + curr_inst['disasm'] = '-----' + curr_inst['disasm'] + if store_completions: + curr_inst[fields[3]] = int(fields[4]) + queue_inst(outfile, curr_inst, cycle_time, width, color, timestamps, store_completions) + line = trace.readline() - if not line: return + if not line: + print_insts(outfile, cycle_time, width, color, timestamps, store_completions, 0) + return fields = line.split(':') @@ -150,15 +156,15 @@ def compare_by_sn(a, b): # Puts new instruction into the print queue. # Sorts out and prints instructions when their number reaches threshold value -def queue_inst(outfile, inst, cycle_time, width, color, timestamps): +def queue_inst(outfile, inst, cycle_time, width, color, timestamps, store_completions): global insts l_copy = copy.deepcopy(inst) insts['queue'].append(l_copy) if len(insts['queue']) > insts['max_threshold']: - print_insts(outfile, cycle_time, width, color, timestamps, insts['min_threshold']) + print_insts(outfile, cycle_time, width, color, timestamps, store_completions, insts['min_threshold']) # Sorts out and prints instructions in print queue -def print_insts(outfile, cycle_time, width, color, timestamps, lower_threshold): +def print_insts(outfile, cycle_time, width, color, timestamps, store_completions, lower_threshold): global insts insts['queue'].sort(compare_by_sn) while len(insts['queue']) > lower_threshold: @@ -179,10 +185,10 @@ def print_insts(outfile, cycle_time, width, color, timestamps, lower_threshold): if (insts['only_committed'] != 0 and print_item['retire'] == 0): continue; # retire is set to zero if it hasn't been completed - print_inst(outfile, print_item, cycle_time, width, color, timestamps) + print_inst(outfile, print_item, cycle_time, width, color, timestamps, store_completions) # Prints a single instruction -def print_inst(outfile, inst, cycle_time, width, color, timestamps): +def print_inst(outfile, inst, cycle_time, width, color, timestamps, store_completions): if color: from m5.util.terminal import termcap else: @@ -208,7 +214,13 @@ def print_inst(outfile, inst, cycle_time, width, color, timestamps): 'shorthand': 'c'}, {'name': 'retire', 'color': termcap.Blue + termcap.Reverse, - 'shorthand': 'r'}] + 'shorthand': 'r'} + ] + if store_completions: + stages.append( + {'name': 'store', + 'color': termcap.Yellow + termcap.Reverse, + 'shorthand': 's'}) # Print @@ -218,7 +230,9 @@ def print_inst(outfile, inst, cycle_time, width, color, timestamps): # Find out the time of the last event - it may not # be 'retire' if the instruction is not comlpeted. last_event_time = max(inst['fetch'], inst['decode'],inst['rename'], - inst['dispatch'],inst['issue'], inst['complete'], inst['retire']) + inst['dispatch'],inst['issue'], inst['complete'], inst['retire']) + if store_completions: + last_event_time = max(last_event_time, inst['store']) # Timeline shorter then time_width is printed in compact form where # the print continues at the start of the same line. @@ -268,11 +282,11 @@ def print_inst(outfile, inst, cycle_time, width, color, timestamps): outfile.write(curr_color + dot * (width - pos) + termcap.Normal + ']-(' + str(base_tick + i * time_width).rjust(15) + ') ') if i == 0: - outfile.write('%s.%s %s [%s]' % ( + outfile.write('%s.%s %s [%s]' % ( inst['pc'].rjust(10), inst['upc'], inst['disasm'].ljust(25), - str(inst['sn']).rjust(15))) + str(inst['sn']).rjust(10))) if timestamps: outfile.write(' f=%s, r=%s' % (inst['fetch'], inst['retire'])) outfile.write('\n') @@ -329,6 +343,10 @@ def main(): '--only_committed', action='store_true', default=False, help="display only committed (completed) instructions (default: '%default')") + parser.add_option( + '--store_completions', + action='store_true', default=False, + help="additionally display store completion ticks (default: '%default')") (options, args) = parser.parse_args() if len(args) != 1: parser.error('incorrect number of arguments') @@ -347,7 +365,8 @@ def main(): with open(options.outfile, 'w') as out: process_trace(trace, out, options.cycle_time, options.width, options.color, options.timestamps, - options.only_committed, *(tick_range + inst_range)) + options.only_committed, options.store_completions, + *(tick_range + inst_range)) print 'done!'