cpu: Add an indirect branch target predictor

This patch adds a configurable indirect branch predictor that can be indexed
by a combination of GHR and path history hashes. Implements the functionality
described in:

"Target prediction for indirect jumps" by Chang, Hao, and Patt
http://dl.acm.org/citation.cfm?id=264209

This is a re-spin of fb9d142 after the revert (bd1c6789).
This commit is contained in:
Mitch Hayenga 2016-04-05 11:48:37 -05:00
parent 3f6874cb29
commit 0fd4bb7f12
10 changed files with 426 additions and 27 deletions

View file

@ -42,6 +42,16 @@ class BranchPredictor(SimObject):
RASSize = Param.Unsigned(16, "RAS size")
instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
useIndirect = Param.Bool(True, "Use indirect branch predictor")
indirectHashGHR = Param.Bool(True, "Hash branch predictor GHR")
indirectHashTargets = Param.Bool(True, "Hash path history targets")
indirectSets = Param.Unsigned(256, "Cache sets for indirect predictor")
indirectWays = Param.Unsigned(2, "Ways for indirect predictor")
indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits")
indirectPathLength = Param.Unsigned(3,
"Previous indirect targets to use for path history")
class LocalBP(BranchPredictor):
type = 'LocalBP'

View file

@ -35,9 +35,11 @@ if env['TARGET_ISA'] == 'null':
SimObject('BranchPredictor.py')
DebugFlag('Indirect')
Source('bpred_unit.cc')
Source('2bit_local.cc')
Source('btb.cc')
Source('indirect.cc')
Source('ras.cc')
Source('tournament.cc')
Source ('bi_mode.cc')

View file

@ -236,6 +236,12 @@ BiModeBP::retireSquashed(void *bp_history)
delete history;
}
unsigned
BiModeBP::getGHR(void *bp_history) const
{
return static_cast<BPHistory*>(bp_history)->globalHistoryReg;
}
void
BiModeBP::updateGlobalHistReg(bool taken)
{

View file

@ -63,6 +63,7 @@ class BiModeBP : public BPredUnit
void btbUpdate(Addr branch_addr, void * &bp_history);
void update(Addr branch_addr, bool taken, void *bp_history, bool squashed);
void retireSquashed(void *bp_history);
unsigned getGHR(void *bp_history) const;
private:
void updateGlobalHistReg(bool taken);

View file

@ -62,6 +62,15 @@ BPredUnit::BPredUnit(const Params *params)
params->instShiftAmt,
params->numThreads),
RAS(numThreads),
useIndirect(params->useIndirect),
iPred(params->indirectHashGHR,
params->indirectHashTargets,
params->indirectSets,
params->indirectWays,
params->indirectTagSize,
params->indirectPathLength,
params->instShiftAmt,
params->numThreads),
instShiftAmt(params->instShiftAmt)
{
for (auto& r : RAS)
@ -117,6 +126,27 @@ BPredUnit::regStats()
.name(name() + ".RASInCorrect")
.desc("Number of incorrect RAS predictions.")
;
indirectLookups
.name(name() + ".indirectLookups")
.desc("Number of indirect predictor lookups.")
;
indirectHits
.name(name() + ".indirectHits")
.desc("Number of indirect target hits.")
;
indirectMisses
.name(name() + ".indirectMisses")
.desc("Number of indirect misses.")
;
indirectMispredicted
.name(name() + "indirectMispredicted")
.desc("Number of mispredicted indirect branches.")
;
}
ProbePoints::PMUUPtr
@ -216,31 +246,59 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
tid, pc, pc, RAS[tid].topIdx());
}
if (BTB.valid(pc.instAddr(), tid)) {
++BTBHits;
if (inst->isDirectCtrl() || !useIndirect) {
// Check BTB on direct branches
if (BTB.valid(pc.instAddr(), tid)) {
++BTBHits;
// If it's not a return, use the BTB to get the target addr.
target = BTB.lookup(pc.instAddr(), tid);
// If it's not a return, use the BTB to get target addr.
target = BTB.lookup(pc.instAddr(), tid);
DPRINTF(Branch, "[tid:%i]: Instruction %s predicted"
" target is %s.\n", tid, pc, target);
DPRINTF(Branch, "[tid:%i]: Instruction %s predicted"
" target is %s.\n", tid, pc, target);
} else {
DPRINTF(Branch, "[tid:%i]: BTB doesn't have a "
"valid entry.\n",tid);
pred_taken = false;
// The Direction of the branch predictor is altered because the
// BTB did not have an entry
// The predictor needs to be updated accordingly
if (!inst->isCall() && !inst->isReturn()) {
btbUpdate(pc.instAddr(), bp_history);
DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate"
" called for %s\n", tid, seqNum, pc);
} else if (inst->isCall() && !inst->isUncondCtrl()) {
RAS[tid].pop();
predict_record.pushedRAS = false;
} else {
DPRINTF(Branch, "[tid:%i]: BTB doesn't have a "
"valid entry.\n",tid);
pred_taken = false;
// The Direction of the branch predictor is altered
// because the BTB did not have an entry
// The predictor needs to be updated accordingly
if (!inst->isCall() && !inst->isReturn()) {
btbUpdate(pc.instAddr(), bp_history);
DPRINTF(Branch, "[tid:%i]:[sn:%i] btbUpdate"
" called for %s\n", tid, seqNum, pc);
} else if (inst->isCall() && !inst->isUncondCtrl()) {
RAS[tid].pop();
predict_record.pushedRAS = false;
}
TheISA::advancePC(target, inst);
}
TheISA::advancePC(target, inst);
} else {
predict_record.wasIndirect = true;
++indirectLookups;
//Consult indirect predictor on indirect control
if (iPred.lookup(pc.instAddr(), getGHR(bp_history), target,
tid)) {
// Indirect predictor hit
++indirectHits;
DPRINTF(Branch, "[tid:%i]: Instruction %s predicted "
"indirect target is %s.\n", tid, pc, target);
} else {
++indirectMisses;
pred_taken = false;
DPRINTF(Branch, "[tid:%i]: Instruction %s no indirect "
"target.\n", tid, pc);
if (!inst->isCall() && !inst->isReturn()) {
} else if (inst->isCall() && !inst->isUncondCtrl()) {
RAS[tid].pop();
predict_record.pushedRAS = false;
}
TheISA::advancePC(target, inst);
}
iPred.recordIndirect(pc.instAddr(), target.instAddr(), seqNum,
tid);
}
}
} else {
@ -388,6 +446,7 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid)
DPRINTF(Branch, "[tid:%i]: Committing branches until "
"[sn:%lli].\n", tid, done_sn);
iPred.commit(done_sn, tid);
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
// Update the branch predictor with the correct results.
@ -407,6 +466,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
{
History &pred_hist = predHist[tid];
iPred.squash(squashed_sn, tid);
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
@ -485,8 +545,13 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
if ((*hist_it).usedRAS) {
++RASIncorrect;
DPRINTF(Branch, "[tid:%i]: Incorrect RAS [sn:%i]\n",
tid, hist_it->seqNum);
}
// Have to get GHR here because the update deletes bpHistory
unsigned ghr = getGHR(hist_it->bpHistory);
update((*hist_it).pc, actually_taken,
pred_hist.front().bpHistory, true);
hist_it->wasSquashed = true;
@ -499,12 +564,15 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
RAS[tid].pop();
hist_it->usedRAS = true;
}
if (hist_it->wasIndirect) {
++indirectMispredicted;
iPred.recordTarget(hist_it->seqNum, ghr, corrTarget, tid);
} else {
DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
" PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
" PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
BTB.update((*hist_it).pc, corrTarget, tid);
BTB.update((*hist_it).pc, corrTarget, tid);
}
} else {
//Actually not Taken
if (hist_it->usedRAS) {

View file

@ -52,6 +52,7 @@
#include "base/statistics.hh"
#include "base/types.hh"
#include "cpu/pred/btb.hh"
#include "cpu/pred/indirect.hh"
#include "cpu/pred/ras.hh"
#include "cpu/inst_seq.hh"
#include "cpu/static_inst.hh"
@ -197,6 +198,9 @@ class BPredUnit : public SimObject
void BTBUpdate(Addr instPC, const TheISA::PCState &target)
{ BTB.update(instPC, target, 0); }
virtual unsigned getGHR(void* bp_history) const { return 0; }
void dump();
private:
@ -210,7 +214,7 @@ class BPredUnit : public SimObject
ThreadID _tid)
: seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0),
wasCall(0), wasReturn(0), wasSquashed(0)
wasCall(0), wasReturn(0), wasSquashed(0), wasIndirect(0)
{}
bool operator==(const PredictorHistory &entry) const {
@ -255,6 +259,9 @@ class BPredUnit : public SimObject
/** Whether this instruction has already mispredicted/updated bp */
bool wasSquashed;
/** Wether this instruction was an indirect branch */
bool wasIndirect;
};
typedef std::deque<PredictorHistory> History;
@ -276,6 +283,12 @@ class BPredUnit : public SimObject
/** The per-thread return address stack. */
std::vector<ReturnAddrStack> RAS;
/** Option to disable indirect predictor. */
const bool useIndirect;
/** The indirect target predictor. */
IndirectPredictor iPred;
/** Stat for number of BP lookups. */
Stats::Scalar lookups;
/** Stat for number of conditional branches predicted. */
@ -295,6 +308,15 @@ class BPredUnit : public SimObject
/** Stat for number of times the RAS is incorrect. */
Stats::Scalar RASIncorrect;
/** Stat for the number of indirect target lookups.*/
Stats::Scalar indirectLookups;
/** Stat for the number of indirect target hits.*/
Stats::Scalar indirectHits;
/** Stat for the number of indirect target misses.*/
Stats::Scalar indirectMisses;
/** Stat for the number of indirect target mispredictions.*/
Stats::Scalar indirectMispredicted;
protected:
/** Number of bits to shift instructions by for predictor addresses. */
const unsigned instShiftAmt;

185
src/cpu/pred/indirect.cc Normal file
View file

@ -0,0 +1,185 @@
/*
* Copyright (c) 2014 ARM Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Mitch Hayenga
*/
#include "cpu/pred/indirect.hh"
#include "base/intmath.hh"
#include "debug/Indirect.hh"
IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
unsigned num_sets, unsigned num_ways,
unsigned tag_bits, unsigned path_len, unsigned inst_shift,
unsigned num_threads)
: hashGHR(hash_ghr), hashTargets(hash_targets),
numSets(num_sets), numWays(num_ways), tagBits(tag_bits),
pathLength(path_len), instShift(inst_shift)
{
if (!isPowerOf2(numSets)) {
panic("Indirect predictor requires power of 2 number of sets");
}
threadInfo.resize(num_threads);
targetCache.resize(numSets);
for (unsigned i = 0; i < numSets; i++) {
targetCache[i].resize(numWays);
}
}
bool
IndirectPredictor::lookup(Addr br_addr, unsigned ghr, TheISA::PCState& target,
ThreadID tid)
{
Addr set_index = getSetIndex(br_addr, ghr, tid);
Addr tag = getTag(br_addr);
assert(set_index < numSets);
DPRINTF(Indirect, "Looking up %x (set:%d)\n", br_addr, set_index);
const auto &iset = targetCache[set_index];
for (auto way = iset.begin(); way != iset.end(); ++way) {
if (way->tag == tag) {
DPRINTF(Indirect, "Hit %x (target:%s)\n", br_addr, way->target);
target = way->target;
return true;
}
}
DPRINTF(Indirect, "Miss %x\n", br_addr);
return false;
}
void
IndirectPredictor::recordIndirect(Addr br_addr, Addr tgt_addr,
InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Indirect, "Recording %x seq:%d\n", br_addr, seq_num);
HistoryEntry entry(br_addr, tgt_addr, seq_num);
threadInfo[tid].pathHist.push_back(entry);
}
void
IndirectPredictor::commit(InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Indirect, "Committing seq:%d\n", seq_num);
ThreadInfo &t_info = threadInfo[tid];
if (t_info.pathHist.empty()) return;
if (t_info.headHistEntry < t_info.pathHist.size() &&
t_info.pathHist[t_info.headHistEntry].seqNum <= seq_num) {
if (t_info.headHistEntry >= pathLength) {
t_info.pathHist.pop_front();
} else {
++t_info.headHistEntry;
}
}
}
void
IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Indirect, "Squashing seq:%d\n", seq_num);
ThreadInfo &t_info = threadInfo[tid];
auto squash_itr = t_info.pathHist.begin();
while (squash_itr != t_info.pathHist.end()) {
if (squash_itr->seqNum > seq_num) {
break;
}
++squash_itr;
}
if (squash_itr != t_info.pathHist.end()) {
DPRINTF(Indirect, "Squashing series starting with sn:%d\n",
squash_itr->seqNum);
}
t_info.pathHist.erase(squash_itr, t_info.pathHist.end());
}
void
IndirectPredictor::recordTarget(InstSeqNum seq_num, unsigned ghr,
const TheISA::PCState& target, ThreadID tid)
{
ThreadInfo &t_info = threadInfo[tid];
// Should have just squashed so this branch should be the oldest
auto hist_entry = *(t_info.pathHist.rbegin());
// Temporarily pop it off the history so we can calculate the set
t_info.pathHist.pop_back();
Addr set_index = getSetIndex(hist_entry.pcAddr, ghr, tid);
Addr tag = getTag(hist_entry.pcAddr);
hist_entry.targetAddr = target.instAddr();
t_info.pathHist.push_back(hist_entry);
assert(set_index < numSets);
auto &iset = targetCache[set_index];
for (auto way = iset.begin(); way != iset.end(); ++way) {
if (way->tag == tag) {
DPRINTF(Indirect, "Updating Target (seq: %d br:%x set:%d target:"
"%s)\n", seq_num, hist_entry.pcAddr, set_index, target);
way->target = target;
return;
}
}
DPRINTF(Indirect, "Allocating Target (seq: %d br:%x set:%d target:%s)\n",
seq_num, hist_entry.pcAddr, set_index, target);
// Did not find entry, random replacement
auto &way = iset[rand() % numWays];
way.tag = tag;
way.target = target;
}
inline Addr
IndirectPredictor::getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid)
{
ThreadInfo &t_info = threadInfo[tid];
Addr hash = br_addr >> instShift;
if (hashGHR) {
hash ^= ghr;
}
if (hashTargets) {
unsigned hash_shift = floorLog2(numSets) / pathLength;
for (int i = t_info.pathHist.size()-1, p = 0;
i >= 0 && p < pathLength; i--, p++) {
hash ^= (t_info.pathHist[i].targetAddr >>
(instShift + p*hash_shift));
}
}
return hash & (numSets-1);
}
inline Addr
IndirectPredictor::getTag(Addr br_addr)
{
return (br_addr >> instShift) & ((0x1<<tagBits)-1);
}

97
src/cpu/pred/indirect.hh Normal file
View file

@ -0,0 +1,97 @@
/*
* Copyright (c) 2014 ARM Limited
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Mitch Hayenga
*/
#ifndef __CPU_PRED_INDIRECT_HH__
#define __CPU_PRED_INDIRECT_HH__
#include <deque>
#include "arch/isa_traits.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
class IndirectPredictor
{
public:
IndirectPredictor(bool hash_ghr, bool hash_targets,
unsigned num_sets, unsigned num_ways,
unsigned tag_bits, unsigned path_len,
unsigned inst_shift, unsigned num_threads);
bool lookup(Addr br_addr, unsigned ghr, TheISA::PCState& br_target,
ThreadID tid);
void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
ThreadID tid);
void commit(InstSeqNum seq_num, ThreadID tid);
void squash(InstSeqNum seq_num, ThreadID tid);
void recordTarget(InstSeqNum seq_num, unsigned ghr,
const TheISA::PCState& target, ThreadID tid);
private:
const bool hashGHR;
const bool hashTargets;
const unsigned numSets;
const unsigned numWays;
const unsigned tagBits;
const unsigned pathLength;
const unsigned instShift;
struct IPredEntry
{
IPredEntry() : tag(0), target(0) { }
Addr tag;
TheISA::PCState target;
};
std::vector<std::vector<IPredEntry> > targetCache;
Addr getSetIndex(Addr br_addr, unsigned ghr, ThreadID tid);
Addr getTag(Addr br_addr);
struct HistoryEntry
{
HistoryEntry(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num)
: pcAddr(br_addr), targetAddr(tgt_addr), seqNum(seq_num) { }
Addr pcAddr;
Addr targetAddr;
InstSeqNum seqNum;
};
struct ThreadInfo {
ThreadInfo() : headHistEntry(0) { }
std::deque<HistoryEntry> pathHist;
unsigned headHistEntry;
};
std::vector<ThreadInfo> threadInfo;
};
#endif // __CPU_PRED_INDIRECT_HH__

View file

@ -388,6 +388,12 @@ TournamentBPParams::create()
return new TournamentBP(this);
}
unsigned
TournamentBP::getGHR(void *bp_history) const
{
return static_cast<BPHistory *>(bp_history)->globalHistory;
}
#ifdef DEBUG
int
TournamentBP::BPHistory::newCount = 0;

View file

@ -114,6 +114,8 @@ class TournamentBP : public BPredUnit
*/
void squash(void *bp_history);
unsigned getGHR(void *bp_history) const;
/** Returns the global history. */
inline unsigned readGlobalHist() { return globalHistory; }