253 lines
8.4 KiB
C++
253 lines
8.4 KiB
C++
/*
|
|
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* For use for simulation and test purposes only
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* Author: Lisa Hsu
|
|
*/
|
|
|
|
#ifndef __TLB_COALESCER_HH__
|
|
#define __TLB_COALESCER_HH__
|
|
|
|
#include <list>
|
|
#include <queue>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "arch/generic/tlb.hh"
|
|
#include "arch/isa.hh"
|
|
#include "arch/isa_traits.hh"
|
|
#include "arch/x86/pagetable.hh"
|
|
#include "arch/x86/regs/segment.hh"
|
|
#include "base/misc.hh"
|
|
#include "base/statistics.hh"
|
|
#include "gpu-compute/gpu_tlb.hh"
|
|
#include "mem/mem_object.hh"
|
|
#include "mem/port.hh"
|
|
#include "mem/request.hh"
|
|
#include "params/TLBCoalescer.hh"
|
|
|
|
class BaseTLB;
|
|
class Packet;
|
|
class ThreadContext;
|
|
|
|
/**
|
|
* The TLBCoalescer is a MemObject sitting on the front side (CPUSide) of
|
|
* each TLB. It receives packets and issues coalesced requests to the
|
|
* TLB below it. It controls how requests are coalesced (the rules)
|
|
* and the permitted number of TLB probes per cycle (i.e., how many
|
|
* coalesced requests it feeds the TLB per cycle).
|
|
*/
|
|
class TLBCoalescer : public MemObject
|
|
{
|
|
protected:
|
|
// TLB clock: will inherit clock from shader's clock period in terms
|
|
// of nuber of ticks of curTime (aka global simulation clock)
|
|
// The assignment of TLB clock from shader clock is done in the
|
|
// python config files.
|
|
int clock;
|
|
|
|
public:
|
|
typedef TLBCoalescerParams Params;
|
|
TLBCoalescer(const Params *p);
|
|
~TLBCoalescer() { }
|
|
|
|
// Number of TLB probes per cycle. Parameterizable - default 2.
|
|
int TLBProbesPerCycle;
|
|
|
|
// Consider coalescing across that many ticks.
|
|
// Paraemterizable - default 1.
|
|
int coalescingWindow;
|
|
|
|
// Each coalesced request consists of multiple packets
|
|
// that all fall within the same virtual page
|
|
typedef std::vector<PacketPtr> coalescedReq;
|
|
|
|
// disables coalescing when true
|
|
bool disableCoalescing;
|
|
|
|
/*
|
|
* This is a hash map with <tick_index> as a key.
|
|
* It contains a vector of coalescedReqs per <tick_index>.
|
|
* Requests are buffered here until they can be issued to
|
|
* the TLB, at which point they are copied to the
|
|
* issuedTranslationsTable hash map.
|
|
*
|
|
* In terms of coalescing, we coalesce requests in a given
|
|
* window of x cycles by using tick_index = issueTime/x as a
|
|
* key, where x = coalescingWindow. issueTime is the issueTime
|
|
* of the pkt from the ComputeUnit's perspective, but another
|
|
* option is to change it to curTick(), so we coalesce based
|
|
* on the receive time.
|
|
*/
|
|
typedef std::unordered_map<int64_t, std::vector<coalescedReq>> CoalescingFIFO;
|
|
|
|
CoalescingFIFO coalescerFIFO;
|
|
|
|
/*
|
|
* issuedTranslationsTabler: a hash_map indexed by virtual page
|
|
* address. Each hash_map entry has a vector of PacketPtr associated
|
|
* with it denoting the different packets that share an outstanding
|
|
* coalesced translation request for the same virtual page.
|
|
*
|
|
* The rules that determine which requests we can coalesce are
|
|
* specified in the canCoalesce() method.
|
|
*/
|
|
typedef std::unordered_map<Addr, coalescedReq> CoalescingTable;
|
|
|
|
CoalescingTable issuedTranslationsTable;
|
|
|
|
// number of packets the coalescer receives
|
|
Stats::Scalar uncoalescedAccesses;
|
|
// number packets the coalescer send to the TLB
|
|
Stats::Scalar coalescedAccesses;
|
|
|
|
// Number of cycles the coalesced requests spend waiting in
|
|
// coalescerFIFO. For each packet the coalescer receives we take into
|
|
// account the number of all uncoalesced requests this pkt "represents"
|
|
Stats::Scalar queuingCycles;
|
|
|
|
// On average how much time a request from the
|
|
// uncoalescedAccesses that reaches the TLB
|
|
// spends waiting?
|
|
Stats::Scalar localqueuingCycles;
|
|
// localqueuingCycles/uncoalescedAccesses
|
|
Stats::Formula localLatency;
|
|
|
|
bool canCoalesce(PacketPtr pkt1, PacketPtr pkt2);
|
|
void updatePhysAddresses(PacketPtr pkt);
|
|
void regStats();
|
|
|
|
// Clock related functions. Maps to-and-from
|
|
// Simulation ticks and object clocks.
|
|
Tick frequency() const { return SimClock::Frequency / clock; }
|
|
Tick ticks(int numCycles) const { return (Tick)clock * numCycles; }
|
|
Tick curCycle() const { return curTick() / clock; }
|
|
Tick tickToCycles(Tick val) const { return val / clock;}
|
|
|
|
class CpuSidePort : public SlavePort
|
|
{
|
|
public:
|
|
CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
|
|
PortID _index)
|
|
: SlavePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
|
|
index(_index) { }
|
|
|
|
protected:
|
|
TLBCoalescer *coalescer;
|
|
int index;
|
|
|
|
virtual bool recvTimingReq(PacketPtr pkt);
|
|
virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
|
|
virtual void recvFunctional(PacketPtr pkt);
|
|
virtual void recvRangeChange() { }
|
|
virtual void recvReqRetry();
|
|
|
|
virtual void
|
|
recvRespRetry()
|
|
{
|
|
fatal("recvRespRetry() is not implemented in the TLB coalescer.\n");
|
|
}
|
|
|
|
virtual AddrRangeList getAddrRanges() const;
|
|
};
|
|
|
|
class MemSidePort : public MasterPort
|
|
{
|
|
public:
|
|
MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
|
|
PortID _index)
|
|
: MasterPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
|
|
index(_index) { }
|
|
|
|
std::deque<PacketPtr> retries;
|
|
|
|
protected:
|
|
TLBCoalescer *coalescer;
|
|
int index;
|
|
|
|
virtual bool recvTimingResp(PacketPtr pkt);
|
|
virtual Tick recvAtomic(PacketPtr pkt) { return 0; }
|
|
virtual void recvFunctional(PacketPtr pkt);
|
|
virtual void recvRangeChange() { }
|
|
virtual void recvReqRetry();
|
|
|
|
virtual void
|
|
recvRespRetry()
|
|
{
|
|
fatal("recvRespRetry() not implemented in TLB coalescer");
|
|
}
|
|
};
|
|
|
|
// Coalescer slave ports on the cpu Side
|
|
std::vector<CpuSidePort*> cpuSidePort;
|
|
// Coalescer master ports on the memory side
|
|
std::vector<MemSidePort*> memSidePort;
|
|
|
|
BaseMasterPort& getMasterPort(const std::string &if_name, PortID idx);
|
|
BaseSlavePort& getSlavePort(const std::string &if_name, PortID idx);
|
|
|
|
class IssueProbeEvent : public Event
|
|
{
|
|
private:
|
|
TLBCoalescer *coalescer;
|
|
|
|
public:
|
|
IssueProbeEvent(TLBCoalescer *_coalescer);
|
|
void process();
|
|
const char *description() const;
|
|
};
|
|
|
|
// this event issues the TLB probes
|
|
IssueProbeEvent probeTLBEvent;
|
|
|
|
// the cleanupEvent is scheduled after a TLBEvent triggers
|
|
// in order to free memory and do the required clean-up
|
|
class CleanupEvent : public Event
|
|
{
|
|
private:
|
|
TLBCoalescer *coalescer;
|
|
|
|
public:
|
|
CleanupEvent(TLBCoalescer *_coalescer);
|
|
void process();
|
|
const char* description() const;
|
|
};
|
|
|
|
// schedule cleanup
|
|
CleanupEvent cleanupEvent;
|
|
|
|
// this FIFO queue keeps track of the virt. page
|
|
// addresses that are pending cleanup
|
|
std::queue<Addr> cleanupQueue;
|
|
};
|
|
|
|
#endif // __TLB_COALESCER_HH__
|