2016-01-19 20:28:22 +01:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* For use for simulation and test purposes only
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
|
|
* and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* 3. Neither the name of the copyright holder nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* Author: Lisa Hsu
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "gpu-compute/tlb_coalescer.hh"
|
|
|
|
|
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
#include "debug/GPUTLB.hh"
|
|
|
|
|
|
|
|
TLBCoalescer::TLBCoalescer(const Params *p) : MemObject(p),
|
|
|
|
clock(p->clk_domain->clockPeriod()), TLBProbesPerCycle(p->probesPerCycle),
|
|
|
|
coalescingWindow(p->coalescingWindow),
|
|
|
|
disableCoalescing(p->disableCoalescing), probeTLBEvent(this),
|
|
|
|
cleanupEvent(this)
|
|
|
|
{
|
|
|
|
// create the slave ports based on the number of connected ports
|
|
|
|
for (size_t i = 0; i < p->port_slave_connection_count; ++i) {
|
|
|
|
cpuSidePort.push_back(new CpuSidePort(csprintf("%s-port%d", name(), i),
|
|
|
|
this, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
// create the master ports based on the number of connected ports
|
|
|
|
for (size_t i = 0; i < p->port_master_connection_count; ++i) {
|
|
|
|
memSidePort.push_back(new MemSidePort(csprintf("%s-port%d", name(), i),
|
|
|
|
this, i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BaseSlavePort&
|
|
|
|
TLBCoalescer::getSlavePort(const std::string &if_name, PortID idx)
|
|
|
|
{
|
|
|
|
if (if_name == "slave") {
|
|
|
|
if (idx >= static_cast<PortID>(cpuSidePort.size())) {
|
|
|
|
panic("TLBCoalescer::getSlavePort: unknown index %d\n", idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return *cpuSidePort[idx];
|
|
|
|
} else {
|
|
|
|
panic("TLBCoalescer::getSlavePort: unknown port %s\n", if_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
BaseMasterPort&
|
|
|
|
TLBCoalescer::getMasterPort(const std::string &if_name, PortID idx)
|
|
|
|
{
|
|
|
|
if (if_name == "master") {
|
|
|
|
if (idx >= static_cast<PortID>(memSidePort.size())) {
|
|
|
|
panic("TLBCoalescer::getMasterPort: unknown index %d\n", idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return *memSidePort[idx];
|
|
|
|
} else {
|
|
|
|
panic("TLBCoalescer::getMasterPort: unknown port %s\n", if_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This method returns true if the <incoming_pkt>
|
|
|
|
* can be coalesced with <coalesced_pkt> and false otherwise.
|
|
|
|
* A given set of rules is checked.
|
|
|
|
* The rules can potentially be modified based on the TLB level.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
TLBCoalescer::canCoalesce(PacketPtr incoming_pkt, PacketPtr coalesced_pkt)
|
|
|
|
{
|
|
|
|
if (disableCoalescing)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
TheISA::GpuTLB::TranslationState *incoming_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
|
|
|
|
|
|
|
|
TheISA::GpuTLB::TranslationState *coalesced_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
|
|
|
|
|
|
|
|
// Rule 1: Coalesce requests only if they
|
|
|
|
// fall within the same virtual page
|
|
|
|
Addr incoming_virt_page_addr = roundDown(incoming_pkt->req->getVaddr(),
|
|
|
|
TheISA::PageBytes);
|
|
|
|
|
|
|
|
Addr coalesced_virt_page_addr = roundDown(coalesced_pkt->req->getVaddr(),
|
|
|
|
TheISA::PageBytes);
|
|
|
|
|
|
|
|
if (incoming_virt_page_addr != coalesced_virt_page_addr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
//* Rule 2: Coalesce requests only if they
|
|
|
|
// share a TLB Mode, i.e. they are both read
|
|
|
|
// or write requests.
|
|
|
|
BaseTLB::Mode incoming_mode = incoming_state->tlbMode;
|
|
|
|
BaseTLB::Mode coalesced_mode = coalesced_state->tlbMode;
|
|
|
|
|
|
|
|
if (incoming_mode != coalesced_mode)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// when we can coalesce a packet update the reqCnt
|
|
|
|
// that is the number of packets represented by
|
|
|
|
// this coalesced packet
|
|
|
|
if (!incoming_state->prefetch)
|
|
|
|
coalesced_state->reqCnt.back() += incoming_state->reqCnt.back();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to update the physical addresses of all the translation requests
|
|
|
|
* that were coalesced into the one that just returned.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
|
|
|
|
{
|
|
|
|
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for page %#x\n",
|
|
|
|
issuedTranslationsTable[virt_page_addr].size(), virt_page_addr);
|
|
|
|
|
|
|
|
TheISA::GpuTLB::TranslationState *sender_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
|
|
|
|
|
|
|
TheISA::GpuTlbEntry *tlb_entry = sender_state->tlbEntry;
|
|
|
|
assert(tlb_entry);
|
|
|
|
Addr first_entry_vaddr = tlb_entry->vaddr;
|
|
|
|
Addr first_entry_paddr = tlb_entry->paddr;
|
|
|
|
int page_size = tlb_entry->size();
|
|
|
|
bool uncacheable = tlb_entry->uncacheable;
|
|
|
|
int first_hit_level = sender_state->hitLevel;
|
|
|
|
bool valid = tlb_entry->valid;
|
|
|
|
|
|
|
|
// Get the physical page address of the translated request
|
|
|
|
// Using the page_size specified in the TLBEntry allows us
|
|
|
|
// to support different page sizes.
|
|
|
|
Addr phys_page_paddr = pkt->req->getPaddr();
|
|
|
|
phys_page_paddr &= ~(page_size - 1);
|
|
|
|
|
|
|
|
for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
|
|
|
|
PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
|
|
|
|
TheISA::GpuTLB::TranslationState *sender_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(
|
|
|
|
local_pkt->senderState);
|
|
|
|
|
|
|
|
// we are sending the packet back, so pop the reqCnt associated
|
|
|
|
// with this level in the TLB hiearchy
|
|
|
|
if (!sender_state->prefetch)
|
|
|
|
sender_state->reqCnt.pop_back();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only the first packet from this coalesced request has been
|
|
|
|
* translated. Grab the translated phys. page addr and update the
|
|
|
|
* physical addresses of the remaining packets with the appropriate
|
|
|
|
* page offsets.
|
|
|
|
*/
|
|
|
|
if (i) {
|
|
|
|
Addr paddr = phys_page_paddr;
|
|
|
|
paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
|
|
|
|
local_pkt->req->setPaddr(paddr);
|
|
|
|
|
|
|
|
if (uncacheable)
|
|
|
|
local_pkt->req->setFlags(Request::UNCACHEABLE);
|
|
|
|
|
|
|
|
// update senderState->tlbEntry, so we can insert
|
|
|
|
// the correct TLBEentry in the TLBs above.
|
|
|
|
sender_state->tlbEntry =
|
|
|
|
new TheISA::GpuTlbEntry(0, first_entry_vaddr, first_entry_paddr,
|
|
|
|
valid);
|
|
|
|
|
|
|
|
// update the hitLevel for all uncoalesced reqs
|
|
|
|
// so that each packet knows where it hit
|
|
|
|
// (used for statistics in the CUs)
|
|
|
|
sender_state->hitLevel = first_hit_level;
|
|
|
|
}
|
|
|
|
|
|
|
|
SlavePort *return_port = sender_state->ports.back();
|
|
|
|
sender_state->ports.pop_back();
|
|
|
|
|
|
|
|
// Translation is done - Convert to a response pkt if necessary and
|
|
|
|
// send the translation back
|
|
|
|
if (local_pkt->isRequest()) {
|
|
|
|
local_pkt->makeTimingResponse();
|
|
|
|
}
|
|
|
|
|
|
|
|
return_port->sendTimingResp(local_pkt);
|
|
|
|
}
|
|
|
|
|
|
|
|
// schedule clean up for end of this cycle
|
|
|
|
// This is a maximum priority event and must be on
|
|
|
|
// the same cycle as GPUTLB cleanup event to prevent
|
|
|
|
// race conditions with an IssueProbeEvent caused by
|
|
|
|
// MemSidePort::recvReqRetry
|
|
|
|
cleanupQueue.push(virt_page_addr);
|
|
|
|
|
|
|
|
if (!cleanupEvent.scheduled())
|
|
|
|
schedule(cleanupEvent, curTick());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Receive translation requests, create a coalesced request,
|
|
|
|
// and send them to the TLB (TLBProbesPerCycle)
|
|
|
|
bool
|
|
|
|
TLBCoalescer::CpuSidePort::recvTimingReq(PacketPtr pkt)
|
|
|
|
{
|
|
|
|
// first packet of a coalesced request
|
|
|
|
PacketPtr first_packet = nullptr;
|
|
|
|
// true if we are able to do coalescing
|
|
|
|
bool didCoalesce = false;
|
|
|
|
// number of coalesced reqs for a given window
|
|
|
|
int coalescedReq_cnt = 0;
|
|
|
|
|
|
|
|
TheISA::GpuTLB::TranslationState *sender_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
|
|
|
|
|
|
|
// push back the port to remember the path back
|
|
|
|
sender_state->ports.push_back(this);
|
|
|
|
|
|
|
|
bool update_stats = !sender_state->prefetch;
|
|
|
|
|
|
|
|
if (update_stats) {
|
|
|
|
// if reqCnt is empty then this packet does not represent
|
|
|
|
// multiple uncoalesced reqs(pkts) but just a single pkt.
|
|
|
|
// If it does though then the reqCnt for each level in the
|
|
|
|
// hierarchy accumulates the total number of reqs this packet
|
|
|
|
// represents
|
|
|
|
int req_cnt = 1;
|
|
|
|
|
|
|
|
if (!sender_state->reqCnt.empty())
|
|
|
|
req_cnt = sender_state->reqCnt.back();
|
|
|
|
|
|
|
|
sender_state->reqCnt.push_back(req_cnt);
|
|
|
|
|
|
|
|
// update statistics
|
|
|
|
coalescer->uncoalescedAccesses++;
|
|
|
|
req_cnt = sender_state->reqCnt.back();
|
|
|
|
DPRINTF(GPUTLB, "receiving pkt w/ req_cnt %d\n", req_cnt);
|
|
|
|
coalescer->queuingCycles -= (curTick() * req_cnt);
|
|
|
|
coalescer->localqueuingCycles -= curTick();
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME if you want to coalesce not based on the issueTime
|
|
|
|
// of the packets (i.e., from the compute unit's perspective)
|
|
|
|
// but based on when they reached this coalescer then
|
|
|
|
// remove the following if statement and use curTick() or
|
|
|
|
// coalescingWindow for the tick_index.
|
|
|
|
if (!sender_state->issueTime)
|
|
|
|
sender_state->issueTime = curTick();
|
|
|
|
|
|
|
|
// The tick index is used as a key to the coalescerFIFO hashmap.
|
|
|
|
// It is shared by all candidates that fall within the
|
|
|
|
// given coalescingWindow.
|
|
|
|
int64_t tick_index = sender_state->issueTime / coalescer->coalescingWindow;
|
|
|
|
|
|
|
|
if (coalescer->coalescerFIFO.count(tick_index)) {
|
|
|
|
coalescedReq_cnt = coalescer->coalescerFIFO[tick_index].size();
|
|
|
|
}
|
|
|
|
|
|
|
|
// see if we can coalesce the incoming pkt with another
|
|
|
|
// coalesced request with the same tick_index
|
|
|
|
for (int i = 0; i < coalescedReq_cnt; ++i) {
|
|
|
|
first_packet = coalescer->coalescerFIFO[tick_index][i][0];
|
|
|
|
|
|
|
|
if (coalescer->canCoalesce(pkt, first_packet)) {
|
|
|
|
coalescer->coalescerFIFO[tick_index][i].push_back(pkt);
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "Coalesced req %i w/ tick_index %d has %d reqs\n",
|
|
|
|
i, tick_index,
|
|
|
|
coalescer->coalescerFIFO[tick_index][i].size());
|
|
|
|
|
|
|
|
didCoalesce = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if this is the first request for this tick_index
|
|
|
|
// or we did not manage to coalesce, update stats
|
|
|
|
// and make necessary allocations.
|
|
|
|
if (!coalescedReq_cnt || !didCoalesce) {
|
|
|
|
if (update_stats)
|
|
|
|
coalescer->coalescedAccesses++;
|
|
|
|
|
|
|
|
std::vector<PacketPtr> new_array;
|
|
|
|
new_array.push_back(pkt);
|
|
|
|
coalescer->coalescerFIFO[tick_index].push_back(new_array);
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "coalescerFIFO[%d] now has %d coalesced reqs after "
|
|
|
|
"push\n", tick_index,
|
|
|
|
coalescer->coalescerFIFO[tick_index].size());
|
|
|
|
}
|
|
|
|
|
|
|
|
//schedule probeTLBEvent next cycle to send the
|
|
|
|
//coalesced requests to the TLB
|
|
|
|
if (!coalescer->probeTLBEvent.scheduled()) {
|
|
|
|
coalescer->schedule(coalescer->probeTLBEvent,
|
|
|
|
curTick() + coalescer->ticks(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::CpuSidePort::recvReqRetry()
|
|
|
|
{
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
|
|
|
|
{
|
|
|
|
|
|
|
|
TheISA::GpuTLB::TranslationState *sender_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
|
|
|
|
|
|
|
|
bool update_stats = !sender_state->prefetch;
|
|
|
|
|
|
|
|
if (update_stats)
|
|
|
|
coalescer->uncoalescedAccesses++;
|
|
|
|
|
|
|
|
// If there is a pending timing request for this virtual address
|
|
|
|
// print a warning message. This is a temporary caveat of
|
|
|
|
// the current simulator where atomic and timing requests can
|
|
|
|
// coexist. FIXME remove this check/warning in the future.
|
|
|
|
Addr virt_page_addr = roundDown(pkt->req->getVaddr(), TheISA::PageBytes);
|
|
|
|
int map_count = coalescer->issuedTranslationsTable.count(virt_page_addr);
|
|
|
|
|
|
|
|
if (map_count) {
|
|
|
|
DPRINTF(GPUTLB, "Warning! Functional access to addr %#x sees timing "
|
|
|
|
"req. pending\n", virt_page_addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
coalescer->memSidePort[0]->sendFunctional(pkt);
|
|
|
|
}
|
|
|
|
|
|
|
|
AddrRangeList
|
|
|
|
TLBCoalescer::CpuSidePort::getAddrRanges() const
|
|
|
|
{
|
|
|
|
// currently not checked by the master
|
|
|
|
AddrRangeList ranges;
|
|
|
|
|
|
|
|
return ranges;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
TLBCoalescer::MemSidePort::recvTimingResp(PacketPtr pkt)
|
|
|
|
{
|
|
|
|
// a translation completed and returned
|
|
|
|
coalescer->updatePhysAddresses(pkt);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::MemSidePort::recvReqRetry()
|
|
|
|
{
|
|
|
|
//we've receeived a retry. Schedule a probeTLBEvent
|
|
|
|
if (!coalescer->probeTLBEvent.scheduled())
|
|
|
|
coalescer->schedule(coalescer->probeTLBEvent,
|
|
|
|
curTick() + coalescer->ticks(1));
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::MemSidePort::recvFunctional(PacketPtr pkt)
|
|
|
|
{
|
|
|
|
fatal("Memory side recvFunctional() not implemented in TLB coalescer.\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
TLBCoalescer::IssueProbeEvent::IssueProbeEvent(TLBCoalescer * _coalescer)
|
|
|
|
: Event(CPU_Tick_Pri), coalescer(_coalescer)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
const char*
|
|
|
|
TLBCoalescer::IssueProbeEvent::description() const
|
|
|
|
{
|
|
|
|
return "Probe the TLB below";
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we scan the coalescer FIFO and issue the max
|
|
|
|
* number of permitted probes to the TLB below. We
|
|
|
|
* permit bypassing of coalesced requests for the same
|
|
|
|
* tick_index.
|
|
|
|
*
|
|
|
|
* We do not access the next tick_index unless we've
|
|
|
|
* drained the previous one. The coalesced requests
|
|
|
|
* that are successfully sent are moved to the
|
|
|
|
* issuedTranslationsTable table (the table which keeps
|
|
|
|
* track of the outstanding reqs)
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
TLBCoalescer::IssueProbeEvent::process()
|
|
|
|
{
|
|
|
|
// number of TLB probes sent so far
|
|
|
|
int sent_probes = 0;
|
|
|
|
// rejected denotes a blocking event
|
|
|
|
bool rejected = false;
|
|
|
|
|
|
|
|
// It is set to true either when the recvTiming of the TLB below
|
|
|
|
// returns false or when there is another outstanding request for the
|
|
|
|
// same virt. page.
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "triggered TLBCoalescer IssueProbeEvent\n");
|
|
|
|
|
|
|
|
for (auto iter = coalescer->coalescerFIFO.begin();
|
|
|
|
iter != coalescer->coalescerFIFO.end() && !rejected; ) {
|
|
|
|
int coalescedReq_cnt = iter->second.size();
|
|
|
|
int i = 0;
|
|
|
|
int vector_index = 0;
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "coalescedReq_cnt is %d for tick_index %d\n",
|
|
|
|
coalescedReq_cnt, iter->first);
|
|
|
|
|
|
|
|
while (i < coalescedReq_cnt) {
|
|
|
|
++i;
|
|
|
|
PacketPtr first_packet = iter->second[vector_index][0];
|
|
|
|
|
|
|
|
// compute virtual page address for this request
|
|
|
|
Addr virt_page_addr = roundDown(first_packet->req->getVaddr(),
|
|
|
|
TheISA::PageBytes);
|
|
|
|
|
|
|
|
// is there another outstanding request for the same page addr?
|
|
|
|
int pending_reqs =
|
|
|
|
coalescer->issuedTranslationsTable.count(virt_page_addr);
|
|
|
|
|
|
|
|
if (pending_reqs) {
|
|
|
|
DPRINTF(GPUTLB, "Cannot issue - There are pending reqs for "
|
|
|
|
"page %#x\n", virt_page_addr);
|
|
|
|
|
|
|
|
++vector_index;
|
|
|
|
rejected = true;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// send the coalesced request for virt_page_addr
|
|
|
|
if (!coalescer->memSidePort[0]->sendTimingReq(first_packet)) {
|
|
|
|
DPRINTF(GPUTLB, "Failed to send TLB request for page %#x",
|
|
|
|
virt_page_addr);
|
|
|
|
|
|
|
|
// No need for a retries queue since we are already buffering
|
|
|
|
// the coalesced request in coalescerFIFO.
|
|
|
|
rejected = true;
|
|
|
|
++vector_index;
|
|
|
|
} else {
|
|
|
|
TheISA::GpuTLB::TranslationState *tmp_sender_state =
|
|
|
|
safe_cast<TheISA::GpuTLB::TranslationState*>
|
|
|
|
(first_packet->senderState);
|
|
|
|
|
|
|
|
bool update_stats = !tmp_sender_state->prefetch;
|
|
|
|
|
|
|
|
if (update_stats) {
|
|
|
|
// req_cnt is total number of packets represented
|
|
|
|
// by the one we just sent counting all the way from
|
|
|
|
// the top of TLB hiearchy (i.e., from the CU)
|
|
|
|
int req_cnt = tmp_sender_state->reqCnt.back();
|
|
|
|
coalescer->queuingCycles += (curTick() * req_cnt);
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "%s sending pkt w/ req_cnt %d\n",
|
|
|
|
coalescer->name(), req_cnt);
|
|
|
|
|
|
|
|
// pkt_cnt is number of packets we coalesced into the one
|
|
|
|
// we just sent but only at this coalescer level
|
|
|
|
int pkt_cnt = iter->second[vector_index].size();
|
|
|
|
coalescer->localqueuingCycles += (curTick() * pkt_cnt);
|
|
|
|
}
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "Successfully sent TLB request for page %#x",
|
|
|
|
virt_page_addr);
|
|
|
|
|
|
|
|
//copy coalescedReq to issuedTranslationsTable
|
|
|
|
coalescer->issuedTranslationsTable[virt_page_addr]
|
|
|
|
= iter->second[vector_index];
|
|
|
|
|
|
|
|
//erase the entry of this coalesced req
|
|
|
|
iter->second.erase(iter->second.begin() + vector_index);
|
|
|
|
|
|
|
|
if (iter->second.empty())
|
|
|
|
assert(i == coalescedReq_cnt);
|
|
|
|
|
|
|
|
sent_probes++;
|
|
|
|
if (sent_probes == coalescer->TLBProbesPerCycle)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//if there are no more coalesced reqs for this tick_index
|
|
|
|
//erase the hash_map with the first iterator
|
|
|
|
if (iter->second.empty()) {
|
|
|
|
coalescer->coalescerFIFO.erase(iter++);
|
|
|
|
} else {
|
|
|
|
++iter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TLBCoalescer::CleanupEvent::CleanupEvent(TLBCoalescer* _coalescer)
|
|
|
|
: Event(Maximum_Pri), coalescer(_coalescer)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
const char*
|
|
|
|
TLBCoalescer::CleanupEvent::description() const
|
|
|
|
{
|
|
|
|
return "Cleanup issuedTranslationsTable hashmap";
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::CleanupEvent::process()
|
|
|
|
{
|
|
|
|
while (!coalescer->cleanupQueue.empty()) {
|
|
|
|
Addr cleanup_addr = coalescer->cleanupQueue.front();
|
|
|
|
coalescer->cleanupQueue.pop();
|
|
|
|
coalescer->issuedTranslationsTable.erase(cleanup_addr);
|
|
|
|
|
|
|
|
DPRINTF(GPUTLB, "Cleanup - Delete coalescer entry with key %#x\n",
|
|
|
|
cleanup_addr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
TLBCoalescer::regStats()
|
|
|
|
{
|
2016-06-06 18:16:43 +02:00
|
|
|
MemObject::regStats();
|
|
|
|
|
2016-01-19 20:28:22 +01:00
|
|
|
uncoalescedAccesses
|
|
|
|
.name(name() + ".uncoalesced_accesses")
|
|
|
|
.desc("Number of uncoalesced TLB accesses")
|
|
|
|
;
|
|
|
|
|
|
|
|
coalescedAccesses
|
|
|
|
.name(name() + ".coalesced_accesses")
|
|
|
|
.desc("Number of coalesced TLB accesses")
|
|
|
|
;
|
|
|
|
|
|
|
|
queuingCycles
|
|
|
|
.name(name() + ".queuing_cycles")
|
|
|
|
.desc("Number of cycles spent in queue")
|
|
|
|
;
|
|
|
|
|
|
|
|
localqueuingCycles
|
|
|
|
.name(name() + ".local_queuing_cycles")
|
|
|
|
.desc("Number of cycles spent in queue for all incoming reqs")
|
|
|
|
;
|
|
|
|
|
|
|
|
localLatency
|
|
|
|
.name(name() + ".local_latency")
|
|
|
|
.desc("Avg. latency over all incoming pkts")
|
|
|
|
;
|
|
|
|
|
|
|
|
localLatency = localqueuingCycles / uncoalescedAccesses;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TLBCoalescer*
|
|
|
|
TLBCoalescerParams::create()
|
|
|
|
{
|
|
|
|
return new TLBCoalescer(this);
|
|
|
|
}
|
|
|
|
|