This patch supports cache flushing in MOESI_hammer
This commit is contained in:
parent
ef987a4064
commit
c8bbfed937
14 changed files with 508 additions and 28 deletions
|
@ -82,7 +82,14 @@ if args:
|
||||||
#
|
#
|
||||||
# Create the ruby random tester
|
# Create the ruby random tester
|
||||||
#
|
#
|
||||||
tester = RubyTester(checks_to_complete = options.checks,
|
|
||||||
|
# Check the protocol
|
||||||
|
check_flush = False
|
||||||
|
if buildEnv['PROTOCOL'] == 'MOESI_hammer':
|
||||||
|
check_flush = True
|
||||||
|
|
||||||
|
tester = RubyTester(check_flush = check_flush,
|
||||||
|
checks_to_complete = options.checks,
|
||||||
wakeup_frequency = options.wakeup_freq)
|
wakeup_frequency = options.wakeup_freq)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
@ -59,6 +59,10 @@ Check::initiate()
|
||||||
initiatePrefetch(); // Prefetch from random processor
|
initiatePrefetch(); // Prefetch from random processor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_tester_ptr->getCheckFlush() && (random() & 0xff) == 0) {
|
||||||
|
initiateFlush(); // issue a Flush request from random processor
|
||||||
|
}
|
||||||
|
|
||||||
if (m_status == TesterStatus_Idle) {
|
if (m_status == TesterStatus_Idle) {
|
||||||
initiateAction();
|
initiateAction();
|
||||||
} else if (m_status == TesterStatus_Ready) {
|
} else if (m_status == TesterStatus_Ready) {
|
||||||
|
@ -123,6 +127,37 @@ Check::initiatePrefetch()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Check::initiateFlush()
|
||||||
|
{
|
||||||
|
|
||||||
|
DPRINTF(RubyTest, "initiating Flush\n");
|
||||||
|
|
||||||
|
int index = random() % m_num_cpu_sequencers;
|
||||||
|
RubyTester::CpuPort* port =
|
||||||
|
safe_cast<RubyTester::CpuPort*>(m_tester_ptr->getCpuPort(index));
|
||||||
|
|
||||||
|
Request::Flags flags;
|
||||||
|
|
||||||
|
Request *req = new Request(m_address.getAddress(), CHECK_SIZE, flags, curTick(),
|
||||||
|
m_pc.getAddress());
|
||||||
|
|
||||||
|
Packet::Command cmd;
|
||||||
|
|
||||||
|
cmd = MemCmd::FlushReq;
|
||||||
|
|
||||||
|
PacketPtr pkt = new Packet(req, cmd, port->idx);
|
||||||
|
|
||||||
|
// push the subblock onto the sender state. The sequencer will
|
||||||
|
// update the subblock on the return
|
||||||
|
pkt->senderState =
|
||||||
|
new SenderState(m_address, req->getSize(), pkt->senderState);
|
||||||
|
|
||||||
|
if (port->sendTiming(pkt)) {
|
||||||
|
DPRINTF(RubyTest, "initiating Flush - successful\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Check::initiateAction()
|
Check::initiateAction()
|
||||||
{
|
{
|
||||||
|
|
|
@ -58,6 +58,7 @@ class Check
|
||||||
void print(std::ostream& out) const;
|
void print(std::ostream& out) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void initiateFlush();
|
||||||
void initiatePrefetch();
|
void initiatePrefetch();
|
||||||
void initiateAction();
|
void initiateAction();
|
||||||
void initiateCheck();
|
void initiateCheck();
|
||||||
|
|
|
@ -40,7 +40,8 @@ RubyTester::RubyTester(const Params *p)
|
||||||
: MemObject(p), checkStartEvent(this),
|
: MemObject(p), checkStartEvent(this),
|
||||||
m_checks_to_complete(p->checks_to_complete),
|
m_checks_to_complete(p->checks_to_complete),
|
||||||
m_deadlock_threshold(p->deadlock_threshold),
|
m_deadlock_threshold(p->deadlock_threshold),
|
||||||
m_wakeup_frequency(p->wakeup_frequency)
|
m_wakeup_frequency(p->wakeup_frequency),
|
||||||
|
m_check_flush(p->check_flush)
|
||||||
{
|
{
|
||||||
m_checks_completed = 0;
|
m_checks_completed = 0;
|
||||||
|
|
||||||
|
|
|
@ -99,6 +99,7 @@ class RubyTester : public MemObject
|
||||||
void printConfig(std::ostream& out) const {}
|
void printConfig(std::ostream& out) const {}
|
||||||
|
|
||||||
void print(std::ostream& out) const;
|
void print(std::ostream& out) const;
|
||||||
|
bool getCheckFlush() { return m_check_flush; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
class CheckStartEvent : public Event
|
class CheckStartEvent : public Event
|
||||||
|
@ -134,6 +135,7 @@ class RubyTester : public MemObject
|
||||||
int m_deadlock_threshold;
|
int m_deadlock_threshold;
|
||||||
int m_num_cpu_sequencers;
|
int m_num_cpu_sequencers;
|
||||||
int m_wakeup_frequency;
|
int m_wakeup_frequency;
|
||||||
|
bool m_check_flush;
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream&
|
inline std::ostream&
|
||||||
|
|
|
@ -36,3 +36,4 @@ class RubyTester(MemObject):
|
||||||
checks_to_complete = Param.Int(100, "checks to complete")
|
checks_to_complete = Param.Int(100, "checks to complete")
|
||||||
deadlock_threshold = Param.Int(50000, "how often to check for deadlock")
|
deadlock_threshold = Param.Int(50000, "how often to check for deadlock")
|
||||||
wakeup_frequency = Param.Int(10, "number of cycles between wakeups")
|
wakeup_frequency = Param.Int(10, "number of cycles between wakeups")
|
||||||
|
check_flush = Param.Bool(False, "check cache flushing")
|
||||||
|
|
|
@ -148,7 +148,9 @@ MemCmd::commandInfo[] =
|
||||||
/* BadAddressError -- memory address invalid */
|
/* BadAddressError -- memory address invalid */
|
||||||
{ SET2(IsResponse, IsError), InvalidCmd, "BadAddressError" },
|
{ SET2(IsResponse, IsError), InvalidCmd, "BadAddressError" },
|
||||||
/* PrintReq */
|
/* PrintReq */
|
||||||
{ SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" }
|
{ SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" },
|
||||||
|
/* Flush Request */
|
||||||
|
{ SET3(IsRequest, IsFlush, NeedsExclusive), InvalidCmd, "FlushReq" }
|
||||||
};
|
};
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -105,6 +105,7 @@ class MemCmd
|
||||||
BadAddressError, // memory address invalid
|
BadAddressError, // memory address invalid
|
||||||
// Fake simulator-only commands
|
// Fake simulator-only commands
|
||||||
PrintReq, // Print state matching address
|
PrintReq, // Print state matching address
|
||||||
|
FlushReq, //request for a cache flush
|
||||||
NUM_MEM_CMDS
|
NUM_MEM_CMDS
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -129,6 +130,7 @@ class MemCmd
|
||||||
HasData, //!< There is an associated payload
|
HasData, //!< There is an associated payload
|
||||||
IsError, //!< Error response
|
IsError, //!< Error response
|
||||||
IsPrint, //!< Print state matching address (for debugging)
|
IsPrint, //!< Print state matching address (for debugging)
|
||||||
|
IsFlush, //!< Flush the address from caches
|
||||||
NUM_COMMAND_ATTRIBUTES
|
NUM_COMMAND_ATTRIBUTES
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -175,6 +177,7 @@ class MemCmd
|
||||||
bool isLLSC() const { return testCmdAttrib(IsLlsc); }
|
bool isLLSC() const { return testCmdAttrib(IsLlsc); }
|
||||||
bool isError() const { return testCmdAttrib(IsError); }
|
bool isError() const { return testCmdAttrib(IsError); }
|
||||||
bool isPrint() const { return testCmdAttrib(IsPrint); }
|
bool isPrint() const { return testCmdAttrib(IsPrint); }
|
||||||
|
bool isFlush() const { return testCmdAttrib(IsFlush); }
|
||||||
|
|
||||||
const Command
|
const Command
|
||||||
responseCommand() const
|
responseCommand() const
|
||||||
|
@ -411,6 +414,7 @@ class Packet : public FastAlloc, public Printable
|
||||||
bool isLLSC() const { return cmd.isLLSC(); }
|
bool isLLSC() const { return cmd.isLLSC(); }
|
||||||
bool isError() const { return cmd.isError(); }
|
bool isError() const { return cmd.isError(); }
|
||||||
bool isPrint() const { return cmd.isPrint(); }
|
bool isPrint() const { return cmd.isPrint(); }
|
||||||
|
bool isFlush() const { return cmd.isFlush(); }
|
||||||
|
|
||||||
// Snoop flags
|
// Snoop flags
|
||||||
void assertMemInhibit() { flags.set(MEM_INHIBIT); }
|
void assertMemInhibit() { flags.set(MEM_INHIBIT); }
|
||||||
|
|
|
@ -78,7 +78,16 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
|
ST, AccessPermission:Busy, "ST", desc="S block transferring to L1";
|
||||||
OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
|
OT, AccessPermission:Busy, "OT", desc="O block transferring to L1";
|
||||||
MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
|
MT, AccessPermission:Busy, "MT", desc="M block transferring to L1";
|
||||||
MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L1";
|
MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L0";
|
||||||
|
|
||||||
|
//Transition States Related to Flushing
|
||||||
|
MI_F, AccessPermission:Busy, "MI_F", desc="Issued PutX due to a Flush, waiting for ack";
|
||||||
|
MM_F, AccessPermission:Busy, "MM_F", desc="Issued GETF due to a Flush, waiting for ack";
|
||||||
|
IM_F, AccessPermission:Busy, "IM_F", desc="Issued GetX due to a Flush";
|
||||||
|
ISM_F, AccessPermission:Read_Only, "ISM_F", desc="Issued GetX, received data, waiting for all acks";
|
||||||
|
SM_F, AccessPermission:Read_Only, "SM_F", desc="Issued GetX, we still have an old copy of the line";
|
||||||
|
OM_F, AccessPermission:Read_Only, "OM_F", desc="Issued GetX, received data";
|
||||||
|
MM_WF, AccessPermission:Busy, "MM_WF", desc="Issued GetX, received exclusive data";
|
||||||
}
|
}
|
||||||
|
|
||||||
// EVENTS
|
// EVENTS
|
||||||
|
@ -113,6 +122,10 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
// Triggers
|
// Triggers
|
||||||
All_acks, desc="Received all required data and message acks";
|
All_acks, desc="Received all required data and message acks";
|
||||||
All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
||||||
|
|
||||||
|
// For Flush
|
||||||
|
Flush_line, desc="flush the cache line from all caches";
|
||||||
|
Block_Ack, desc="the directory is blocked and ready for the flush";
|
||||||
}
|
}
|
||||||
|
|
||||||
// TYPES
|
// TYPES
|
||||||
|
@ -221,6 +234,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
return Event:Ifetch;
|
return Event:Ifetch;
|
||||||
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
|
} else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) {
|
||||||
return Event:Store;
|
return Event:Store;
|
||||||
|
} else if ((type == RubyRequestType:FLUSH)) {
|
||||||
|
return Event:Flush_line;
|
||||||
} else {
|
} else {
|
||||||
error("Invalid RubyRequestType");
|
error("Invalid RubyRequestType");
|
||||||
}
|
}
|
||||||
|
@ -318,7 +333,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
Entry cache_entry := getCacheEntry(in_msg.Address);
|
Entry cache_entry := getCacheEntry(in_msg.Address);
|
||||||
TBE tbe := TBEs[in_msg.Address];
|
TBE tbe := TBEs[in_msg.Address];
|
||||||
|
|
||||||
if (in_msg.Type == CoherenceRequestType:GETX) {
|
if ((in_msg.Type == CoherenceRequestType:GETX) || (in_msg.Type == CoherenceRequestType:GETF)) {
|
||||||
trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe);
|
trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe);
|
||||||
} else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
|
} else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) {
|
||||||
trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe);
|
trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe);
|
||||||
|
@ -342,6 +357,8 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe);
|
trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe);
|
||||||
} else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
|
} else if (in_msg.Type == CoherenceRequestType:WB_NACK) {
|
||||||
trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe);
|
trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe);
|
||||||
|
} else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) {
|
||||||
|
trigger(Event:Block_Ack, in_msg.Address, cache_entry, tbe);
|
||||||
} else {
|
} else {
|
||||||
error("Unexpected message");
|
error("Unexpected message");
|
||||||
}
|
}
|
||||||
|
@ -504,6 +521,19 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(bf_issueGETF, "bf", desc="Issue GETF") {
|
||||||
|
enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceRequestType:GETF;
|
||||||
|
out_msg.Requestor := machineID;
|
||||||
|
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||||
|
out_msg.MessageSize := MessageSizeType:Request_Control;
|
||||||
|
out_msg.InitialRequestTime := get_time();
|
||||||
|
tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") {
|
action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") {
|
||||||
peek(forwardToCache_in, RequestMsg) {
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
@ -527,6 +557,29 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(ct_sendExclusiveDataFromTBE, "ct", desc="Send exclusive data from tbe to requestor") {
|
||||||
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
|
||||||
|
out_msg.Sender := machineID;
|
||||||
|
out_msg.Destination.add(in_msg.Requestor);
|
||||||
|
out_msg.DataBlk := tbe.DataBlk;
|
||||||
|
out_msg.Dirty := tbe.Dirty;
|
||||||
|
if (in_msg.DirectedProbe) {
|
||||||
|
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||||
|
} else {
|
||||||
|
out_msg.Acks := 2;
|
||||||
|
}
|
||||||
|
out_msg.SilentAcks := in_msg.SilentAcks;
|
||||||
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||||
|
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
|
||||||
|
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(d_issuePUT, "d", desc="Issue PUT") {
|
action(d_issuePUT, "d", desc="Issue PUT") {
|
||||||
enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
|
enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
|
||||||
out_msg.Address := address;
|
out_msg.Address := address;
|
||||||
|
@ -537,6 +590,16 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(df_issuePUTF, "df", desc="Issue PUTF") {
|
||||||
|
enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) {
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceRequestType:PUTF;
|
||||||
|
out_msg.Requestor := machineID;
|
||||||
|
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||||
|
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(e_sendData, "e", desc="Send data from cache to requestor") {
|
action(e_sendData, "e", desc="Send data from cache to requestor") {
|
||||||
peek(forwardToCache_in, RequestMsg) {
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
@ -583,7 +646,31 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(et_sendDataSharedFromTBE, "\et", desc="Send data from TBE to requestor, keep a shared copy") {
|
||||||
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceResponseType:DATA_SHARED;
|
||||||
|
out_msg.Sender := machineID;
|
||||||
|
out_msg.Destination.add(in_msg.Requestor);
|
||||||
|
out_msg.DataBlk := tbe.DataBlk;
|
||||||
|
out_msg.Dirty := tbe.Dirty;
|
||||||
|
DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
|
||||||
|
if (in_msg.DirectedProbe) {
|
||||||
|
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||||
|
} else {
|
||||||
|
out_msg.Acks := 2;
|
||||||
|
}
|
||||||
|
out_msg.SilentAcks := in_msg.SilentAcks;
|
||||||
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||||
|
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
|
||||||
|
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors, still the owner") {
|
action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors, still the owner") {
|
||||||
peek(forwardToCache_in, RequestMsg) {
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
@ -604,6 +691,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(emt_sendDataSharedMultipleFromTBE, "emt", desc="Send data from tbe to all requestors") {
|
||||||
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceResponseType:DATA_SHARED;
|
||||||
|
out_msg.Sender := machineID;
|
||||||
|
out_msg.Destination := in_msg.MergedRequestors;
|
||||||
|
out_msg.DataBlk := tbe.DataBlk;
|
||||||
|
out_msg.Dirty := tbe.Dirty;
|
||||||
|
DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk);
|
||||||
|
out_msg.Acks := machineCount(MachineType:L1Cache);
|
||||||
|
out_msg.SilentAcks := in_msg.SilentAcks;
|
||||||
|
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||||
|
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
|
||||||
|
out_msg.ForwardRequestTime := in_msg.ForwardRequestTime;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(f_sendAck, "f", desc="Send ack from cache to requestor") {
|
action(f_sendAck, "f", desc="Send ack from cache to requestor") {
|
||||||
peek(forwardToCache_in, RequestMsg) {
|
peek(forwardToCache_in, RequestMsg) {
|
||||||
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) {
|
||||||
|
@ -706,6 +813,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
DPRINTF(RubySlicc, "%s\n", tbe.DataBlk);
|
||||||
|
sequencer.writeCallback(address, GenericMachineType:L1Cache,tbe.DataBlk);
|
||||||
|
}
|
||||||
|
|
||||||
action(sx_external_store_hit, "sx", desc="store required external msgs.") {
|
action(sx_external_store_hit, "sx", desc="store required external msgs.") {
|
||||||
assert(is_valid(cache_entry));
|
assert(is_valid(cache_entry));
|
||||||
assert(is_valid(tbe));
|
assert(is_valid(tbe));
|
||||||
|
@ -747,6 +860,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
tbe.Sharers := false;
|
tbe.Sharers := false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(it_allocateTBE, "it", desc="Allocate TBE") {
|
||||||
|
check_allocate(TBEs);
|
||||||
|
TBEs.allocate(address);
|
||||||
|
set_tbe(TBEs[address]);
|
||||||
|
tbe.Dirty := false;
|
||||||
|
tbe.Sharers := false;
|
||||||
|
}
|
||||||
|
|
||||||
action(j_popTriggerQueue, "j", desc="Pop trigger queue.") {
|
action(j_popTriggerQueue, "j", desc="Pop trigger queue.") {
|
||||||
triggerQueue_in.dequeue();
|
triggerQueue_in.dequeue();
|
||||||
}
|
}
|
||||||
|
@ -980,6 +1101,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(uf_writeDataToCacheTBE, "uf", desc="Write data to TBE") {
|
||||||
|
peek(responseToCache_in, ResponseMsg) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
tbe.DataBlk := in_msg.DataBlk;
|
||||||
|
tbe.Dirty := in_msg.Dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") {
|
action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") {
|
||||||
peek(responseToCache_in, ResponseMsg) {
|
peek(responseToCache_in, ResponseMsg) {
|
||||||
assert(is_valid(cache_entry));
|
assert(is_valid(cache_entry));
|
||||||
|
@ -990,6 +1119,17 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
cache_entry.Dirty := in_msg.Dirty || cache_entry.Dirty;
|
cache_entry.Dirty := in_msg.Dirty || cache_entry.Dirty;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(vt_writeDataToTBEVerify, "vt", desc="Write data to TBE, assert it was same as before") {
|
||||||
|
peek(responseToCache_in, ResponseMsg) {
|
||||||
|
assert(is_valid(tbe));
|
||||||
|
DPRINTF(RubySlicc, "Cached Data Block: %s, Msg Data Block: %s\n",
|
||||||
|
tbe.DataBlk, in_msg.DataBlk);
|
||||||
|
assert(tbe.DataBlk == in_msg.DataBlk);
|
||||||
|
tbe.DataBlk := in_msg.DataBlk;
|
||||||
|
tbe.Dirty := in_msg.Dirty || tbe.Dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") {
|
action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") {
|
||||||
if (L1DcacheMemory.isTagPresent(address)) {
|
if (L1DcacheMemory.isTagPresent(address)) {
|
||||||
|
@ -1051,23 +1191,35 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
//*****************************************************
|
//*****************************************************
|
||||||
|
|
||||||
// Transitions for Load/Store/L2_Replacement from transient states
|
// Transitions for Load/Store/L2_Replacement from transient states
|
||||||
transition({IM, SM, ISM, OM, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
|
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) {
|
||||||
zz_stallAndWaitMandatoryQueue;
|
zz_stallAndWaitMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition({M_W, MM_W}, {L2_Replacement}) {
|
transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II}, {Flush_line}) {
|
||||||
zz_stallAndWaitMandatoryQueue;
|
zz_stallAndWaitMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT}, {Load, Ifetch}) {
|
transition({M_W, MM_W}, {L2_Replacement, Flush_line}) {
|
||||||
zz_stallAndWaitMandatoryQueue;
|
zz_stallAndWaitMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT}, L1_to_L2) {
|
transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MM_F, OM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) {
|
||||||
zz_stallAndWaitMandatoryQueue;
|
zz_stallAndWaitMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) {
|
transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT, IM_F, SM_F, ISM_F, OM_F, MM_WF, MI_F, MM_F}, L1_to_L2) {
|
||||||
|
zz_stallAndWaitMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition({MI_F, MM_F}, {Store}) {
|
||||||
|
zz_stallAndWaitMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition({MM_F, MI_F}, {Flush_line}) {
|
||||||
|
zz_stallAndWaitMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) {
|
||||||
// stall
|
// stall
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1241,6 +1393,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
k_popMandatoryQueue;
|
k_popMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(I, Flush_line, IM_F) {
|
||||||
|
it_allocateTBE;
|
||||||
|
bf_issueGETF;
|
||||||
|
uu_profileMiss;
|
||||||
|
k_popMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(I, L2_Replacement) {
|
transition(I, L2_Replacement) {
|
||||||
rr_deallocateL2CacheBlock;
|
rr_deallocateL2CacheBlock;
|
||||||
ka_wakeUpAllDependents;
|
ka_wakeUpAllDependents;
|
||||||
|
@ -1264,6 +1423,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
k_popMandatoryQueue;
|
k_popMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(S, Flush_line, SM_F) {
|
||||||
|
i_allocateTBE;
|
||||||
|
bf_issueGETF;
|
||||||
|
uu_profileMiss;
|
||||||
|
gg_deallocateL1CacheBlock;
|
||||||
|
k_popMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(S, L2_Replacement, I) {
|
transition(S, L2_Replacement, I) {
|
||||||
rr_deallocateL2CacheBlock;
|
rr_deallocateL2CacheBlock;
|
||||||
ka_wakeUpAllDependents;
|
ka_wakeUpAllDependents;
|
||||||
|
@ -1292,6 +1459,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
uu_profileMiss;
|
uu_profileMiss;
|
||||||
k_popMandatoryQueue;
|
k_popMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
transition(O, Flush_line, OM_F) {
|
||||||
|
i_allocateTBE;
|
||||||
|
bf_issueGETF;
|
||||||
|
p_decrementNumberOfMessagesByOne;
|
||||||
|
uu_profileMiss;
|
||||||
|
gg_deallocateL1CacheBlock;
|
||||||
|
k_popMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(O, L2_Replacement, OI) {
|
transition(O, L2_Replacement, OI) {
|
||||||
i_allocateTBE;
|
i_allocateTBE;
|
||||||
|
@ -1326,6 +1501,20 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
k_popMandatoryQueue;
|
k_popMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition({MM, M}, Flush_line, MM_F) {
|
||||||
|
i_allocateTBE;
|
||||||
|
bf_issueGETF;
|
||||||
|
p_decrementNumberOfMessagesByOne;
|
||||||
|
gg_deallocateL1CacheBlock;
|
||||||
|
k_popMandatoryQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(MM_F, Block_Ack, MI_F) {
|
||||||
|
df_issuePUTF;
|
||||||
|
l_popForwardQueue;
|
||||||
|
kd_wakeUpDependents;
|
||||||
|
}
|
||||||
|
|
||||||
transition(MM, L2_Replacement, MI) {
|
transition(MM, L2_Replacement, MI) {
|
||||||
i_allocateTBE;
|
i_allocateTBE;
|
||||||
d_issuePUT;
|
d_issuePUT;
|
||||||
|
@ -1398,12 +1587,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
|
|
||||||
// Transitions from IM
|
// Transitions from IM
|
||||||
|
|
||||||
transition(IM, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
transition({IM, IM_F}, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||||
f_sendAck;
|
f_sendAck;
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(IM, Ack) {
|
transition({IM, IM_F, MM_F}, Ack) {
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
|
@ -1416,6 +1605,13 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(IM_F, Data, ISM_F) {
|
||||||
|
uf_writeDataToCacheTBE;
|
||||||
|
m_decrementNumberOfMessages;
|
||||||
|
o_checkForCompletion;
|
||||||
|
n_popResponseQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(IM, Exclusive_Data, MM_W) {
|
transition(IM, Exclusive_Data, MM_W) {
|
||||||
u_writeDataToCache;
|
u_writeDataToCache;
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
|
@ -1425,8 +1621,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(IM_F, Exclusive_Data, MM_WF) {
|
||||||
|
uf_writeDataToCacheTBE;
|
||||||
|
m_decrementNumberOfMessages;
|
||||||
|
o_checkForCompletion;
|
||||||
|
n_popResponseQueue;
|
||||||
|
}
|
||||||
|
|
||||||
// Transitions from SM
|
// Transitions from SM
|
||||||
transition(SM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
|
transition({SM, SM_F}, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
|
||||||
ff_sendAckShared;
|
ff_sendAckShared;
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
}
|
}
|
||||||
|
@ -1436,7 +1639,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(SM, Ack) {
|
transition(SM_F, {Other_GETX, Invalidate}, IM_F) {
|
||||||
|
f_sendAck;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition({SM, SM_F}, Ack) {
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
|
@ -1449,8 +1657,15 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(SM_F, {Data, Exclusive_Data}, ISM_F) {
|
||||||
|
vt_writeDataToTBEVerify;
|
||||||
|
m_decrementNumberOfMessages;
|
||||||
|
o_checkForCompletion;
|
||||||
|
n_popResponseQueue;
|
||||||
|
}
|
||||||
|
|
||||||
// Transitions from ISM
|
// Transitions from ISM
|
||||||
transition(ISM, Ack) {
|
transition({ISM, ISM_F}, Ack) {
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
|
@ -1464,6 +1679,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(ISM_F, All_acks_no_sharers, MI_F) {
|
||||||
|
df_issuePUTF;
|
||||||
|
j_popTriggerQueue;
|
||||||
|
kd_wakeUpDependents;
|
||||||
|
}
|
||||||
|
|
||||||
// Transitions from OM
|
// Transitions from OM
|
||||||
|
|
||||||
transition(OM, {Other_GETX, Invalidate}, IM) {
|
transition(OM, {Other_GETX, Invalidate}, IM) {
|
||||||
|
@ -1472,6 +1693,12 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(OM_F, {Other_GETX, Invalidate}, IM_F) {
|
||||||
|
q_sendDataFromTBEToCache;
|
||||||
|
pp_incrementNumberOfMessagesByOne;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(OM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
|
transition(OM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
|
||||||
ee_sendDataShared;
|
ee_sendDataShared;
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
|
@ -1482,7 +1709,17 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(OM, Ack) {
|
transition(OM_F, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) {
|
||||||
|
et_sendDataSharedFromTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(OM_F, Merged_GETS) {
|
||||||
|
emt_sendDataSharedMultipleFromTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition({OM, OM_F}, Ack) {
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
|
@ -1496,6 +1733,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition({MM_F, OM_F}, {All_acks, All_acks_no_sharers}, MI_F) {
|
||||||
|
df_issuePUTF;
|
||||||
|
j_popTriggerQueue;
|
||||||
|
kd_wakeUpDependents;
|
||||||
|
}
|
||||||
// Transitions from IS
|
// Transitions from IS
|
||||||
|
|
||||||
transition(IS, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
transition(IS, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) {
|
||||||
|
@ -1583,7 +1825,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
k_popMandatoryQueue;
|
k_popMandatoryQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(MM_W, Ack) {
|
transition({MM_W, MM_WF}, Ack) {
|
||||||
m_decrementNumberOfMessages;
|
m_decrementNumberOfMessages;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
n_popResponseQueue;
|
n_popResponseQueue;
|
||||||
|
@ -1596,6 +1838,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(MM_WF, All_acks_no_sharers, MI_F) {
|
||||||
|
df_issuePUTF;
|
||||||
|
j_popTriggerQueue;
|
||||||
|
kd_wakeUpDependents;
|
||||||
|
}
|
||||||
// Transitions from M_W
|
// Transitions from M_W
|
||||||
|
|
||||||
transition(M_W, Store, MM_W) {
|
transition(M_W, Store, MM_W) {
|
||||||
|
@ -1640,6 +1887,14 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(MI_F, Writeback_Ack, I) {
|
||||||
|
hh_flush_hit;
|
||||||
|
t_sendExclusiveDataFromTBEToMemory;
|
||||||
|
s_deallocateTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
kd_wakeUpDependents;
|
||||||
|
}
|
||||||
|
|
||||||
transition(OI, Writeback_Ack, I) {
|
transition(OI, Writeback_Ack, I) {
|
||||||
qq_sendDataFromTBEToMemory;
|
qq_sendDataFromTBEToMemory;
|
||||||
s_deallocateTBE;
|
s_deallocateTBE;
|
||||||
|
@ -1665,4 +1920,31 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||||
l_popForwardQueue;
|
l_popForwardQueue;
|
||||||
kd_wakeUpDependents;
|
kd_wakeUpDependents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(MM_F, {Other_GETX, Invalidate}, IM_F) {
|
||||||
|
ct_sendExclusiveDataFromTBE;
|
||||||
|
pp_incrementNumberOfMessagesByOne;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(MM_F, Other_GETS, IM_F) {
|
||||||
|
ct_sendExclusiveDataFromTBE;
|
||||||
|
pp_incrementNumberOfMessagesByOne;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(MM_F, NC_DMA_GETS) {
|
||||||
|
ct_sendExclusiveDataFromTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(MM_F, Other_GETS_No_Mig, OM_F) {
|
||||||
|
et_sendDataSharedFromTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(MM_F, Merged_GETS, OM_F) {
|
||||||
|
emt_sendDataSharedMultipleFromTBE;
|
||||||
|
l_popForwardQueue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,9 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
WB, AccessPermission:Invalid, desc="Blocked on a writeback";
|
WB, AccessPermission:Invalid, desc="Blocked on a writeback";
|
||||||
WB_O_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to O";
|
WB_O_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to O";
|
||||||
WB_E_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to E";
|
WB_E_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to E";
|
||||||
|
|
||||||
|
NO_F, AccessPermission:Invalid, desc="Blocked on a flush";
|
||||||
|
NO_F_W, AccessPermission:Invalid, desc="Not Owner, Blocked, waiting for Dram";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Events
|
// Events
|
||||||
|
@ -126,6 +129,8 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
All_acks_and_owner_data, desc="Received shared data and message acks";
|
All_acks_and_owner_data, desc="Received shared data and message acks";
|
||||||
All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
||||||
All_Unblocks, desc="Received all unblocks for a merged gets request";
|
All_Unblocks, desc="Received all unblocks for a merged gets request";
|
||||||
|
GETF, desc="A GETF arrives";
|
||||||
|
PUTF, desc="A PUTF arrives";
|
||||||
}
|
}
|
||||||
|
|
||||||
// TYPES
|
// TYPES
|
||||||
|
@ -233,6 +238,8 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
return Event:GETS;
|
return Event:GETS;
|
||||||
} else if (type == CoherenceRequestType:GETX) {
|
} else if (type == CoherenceRequestType:GETX) {
|
||||||
return Event:GETX;
|
return Event:GETX;
|
||||||
|
} else if (type == CoherenceRequestType:GETF) {
|
||||||
|
return Event:GETF;
|
||||||
} else {
|
} else {
|
||||||
error("Invalid CoherenceRequestType");
|
error("Invalid CoherenceRequestType");
|
||||||
}
|
}
|
||||||
|
@ -355,6 +362,8 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
TBE tbe := TBEs[in_msg.Address];
|
TBE tbe := TBEs[in_msg.Address];
|
||||||
if (in_msg.Type == CoherenceRequestType:PUT) {
|
if (in_msg.Type == CoherenceRequestType:PUT) {
|
||||||
trigger(Event:PUT, in_msg.Address, pf_entry, tbe);
|
trigger(Event:PUT, in_msg.Address, pf_entry, tbe);
|
||||||
|
} else if (in_msg.Type == CoherenceRequestType:PUTF) {
|
||||||
|
trigger(Event:PUTF, in_msg.Address, pf_entry, tbe);
|
||||||
} else {
|
} else {
|
||||||
if (probe_filter_enabled || full_bit_dir_enabled) {
|
if (probe_filter_enabled || full_bit_dir_enabled) {
|
||||||
if (is_valid(pf_entry)) {
|
if (is_valid(pf_entry)) {
|
||||||
|
@ -453,6 +462,20 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(oc_sendBlockAck, "oc", desc="Send block ack to the owner") {
|
||||||
|
peek(requestQueue_in, RequestMsg) {
|
||||||
|
if ((probe_filter_enabled || full_bit_dir_enabled) && (in_msg.Requestor == cache_entry.Owner)) {
|
||||||
|
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := CoherenceRequestType:BLOCK_ACK;
|
||||||
|
out_msg.Requestor := in_msg.Requestor;
|
||||||
|
out_msg.Destination.add(in_msg.Requestor);
|
||||||
|
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
|
action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") {
|
||||||
peek(requestQueue_in, RequestMsg) {
|
peek(requestQueue_in, RequestMsg) {
|
||||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||||
|
@ -966,6 +989,42 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
action(nofc_forwardRequestConditionalOwner, "nofc", desc="Forward request to one or more nodes if the requestor is not the owner") {
|
||||||
|
assert(machineCount(MachineType:L1Cache) > 1);
|
||||||
|
|
||||||
|
if (probe_filter_enabled || full_bit_dir_enabled) {
|
||||||
|
peek(requestQueue_in, RequestMsg) {
|
||||||
|
if (in_msg.Requestor != cache_entry.Owner) {
|
||||||
|
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||||
|
assert(is_valid(cache_entry));
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := in_msg.Type;
|
||||||
|
out_msg.Requestor := in_msg.Requestor;
|
||||||
|
out_msg.Destination.add(cache_entry.Owner);
|
||||||
|
out_msg.MessageSize := MessageSizeType:Request_Control;
|
||||||
|
out_msg.DirectedProbe := true;
|
||||||
|
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
|
||||||
|
out_msg.ForwardRequestTime := get_time();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
peek(requestQueue_in, RequestMsg) {
|
||||||
|
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||||
|
out_msg.Address := address;
|
||||||
|
out_msg.Type := in_msg.Type;
|
||||||
|
out_msg.Requestor := in_msg.Requestor;
|
||||||
|
out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches
|
||||||
|
out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor
|
||||||
|
out_msg.MessageSize := MessageSizeType:Broadcast_Control;
|
||||||
|
out_msg.InitialRequestTime := in_msg.InitialRequestTime;
|
||||||
|
out_msg.ForwardRequestTime := get_time();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
action(f_forwardWriteFromDma, "fw", desc="Forward requests") {
|
action(f_forwardWriteFromDma, "fw", desc="Forward requests") {
|
||||||
assert(is_valid(tbe));
|
assert(is_valid(tbe));
|
||||||
if (tbe.NumPendingMsgs > 0) {
|
if (tbe.NumPendingMsgs > 0) {
|
||||||
|
@ -1185,6 +1244,16 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
i_popIncomingRequestQueue;
|
i_popIncomingRequestQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(E, GETF, NO_F_W) {
|
||||||
|
pfa_probeFilterAllocate;
|
||||||
|
v_allocateTBE;
|
||||||
|
rx_recordExclusiveInTBE;
|
||||||
|
saa_setAcksToAllIfPF;
|
||||||
|
qf_queueMemoryFetchRequest;
|
||||||
|
fn_forwardRequestIfNecessary;
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(E, GETS, NO_B_W) {
|
transition(E, GETS, NO_B_W) {
|
||||||
pfa_probeFilterAllocate;
|
pfa_probeFilterAllocate;
|
||||||
v_allocateTBE;
|
v_allocateTBE;
|
||||||
|
@ -1223,6 +1292,17 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
i_popIncomingRequestQueue;
|
i_popIncomingRequestQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(O, GETF, NO_F_W) {
|
||||||
|
r_setMRU;
|
||||||
|
v_allocateTBE;
|
||||||
|
r_recordDataInTBE;
|
||||||
|
sa_setAcksToOne;
|
||||||
|
qf_queueMemoryFetchRequest;
|
||||||
|
fb_forwardRequestBcast;
|
||||||
|
cs_clearSharers;
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
// This transition is dumb, if a shared copy exists on-chip, then that should
|
// This transition is dumb, if a shared copy exists on-chip, then that should
|
||||||
// provide data, not slow off-chip dram. The problem is that the current
|
// provide data, not slow off-chip dram. The problem is that the current
|
||||||
// caches don't provide data in S state
|
// caches don't provide data in S state
|
||||||
|
@ -1286,6 +1366,13 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
i_popIncomingRequestQueue;
|
i_popIncomingRequestQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(NX, GETF, NO_F) {
|
||||||
|
r_setMRU;
|
||||||
|
fb_forwardRequestBcast;
|
||||||
|
cs_clearSharers;
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
// Transitions out of NO state
|
// Transitions out of NO state
|
||||||
transition(NO, GETX, NO_B) {
|
transition(NO, GETX, NO_B) {
|
||||||
r_setMRU;
|
r_setMRU;
|
||||||
|
@ -1295,6 +1382,15 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
i_popIncomingRequestQueue;
|
i_popIncomingRequestQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(NO, GETF, NO_F) {
|
||||||
|
r_setMRU;
|
||||||
|
//ano_assertNotOwner;
|
||||||
|
nofc_forwardRequestConditionalOwner; //forward request if the requester is not the owner
|
||||||
|
cs_clearSharers;
|
||||||
|
oc_sendBlockAck; // send ack if the owner
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(S, GETX, NO_B) {
|
transition(S, GETX, NO_B) {
|
||||||
r_setMRU;
|
r_setMRU;
|
||||||
fb_forwardRequestBcast;
|
fb_forwardRequestBcast;
|
||||||
|
@ -1302,6 +1398,13 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
i_popIncomingRequestQueue;
|
i_popIncomingRequestQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(S, GETF, NO_F) {
|
||||||
|
r_setMRU;
|
||||||
|
fb_forwardRequestBcast;
|
||||||
|
cs_clearSharers;
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(S, GETS, NO_B) {
|
transition(S, GETS, NO_B) {
|
||||||
r_setMRU;
|
r_setMRU;
|
||||||
ano_assertNotOwner;
|
ano_assertNotOwner;
|
||||||
|
@ -1348,12 +1451,16 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
// Blocked transient states
|
// Blocked transient states
|
||||||
transition({NO_B_X, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
transition({NO_B_X, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
|
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
|
||||||
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
|
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W},
|
||||||
{GETS, GETX, PUT, Pf_Replacement}) {
|
{GETS, GETX, GETF, PUT, Pf_Replacement}) {
|
||||||
z_stallAndWaitRequest;
|
z_stallAndWaitRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(NO_B, GETX, NO_B_X) {
|
transition(NO_F, {GETS, GETX, GETF, PUT, Pf_Replacement}){
|
||||||
|
z_stallAndWaitRequest;
|
||||||
|
}
|
||||||
|
|
||||||
|
transition(NO_B, {GETX, GETF}, NO_B_X) {
|
||||||
z_stallAndWaitRequest;
|
z_stallAndWaitRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1361,13 +1468,13 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
z_stallAndWaitRequest;
|
z_stallAndWaitRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition(NO_B_S, {GETX, PUT, Pf_Replacement}) {
|
transition(NO_B_S, {GETX, GETF, PUT, Pf_Replacement}) {
|
||||||
z_stallAndWaitRequest;
|
z_stallAndWaitRequest;
|
||||||
}
|
}
|
||||||
|
|
||||||
transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
|
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W,
|
||||||
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R},
|
NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W},
|
||||||
{DMA_READ, DMA_WRITE}) {
|
{DMA_READ, DMA_WRITE}) {
|
||||||
zd_stallAndWaitDMARequest;
|
zd_stallAndWaitDMARequest;
|
||||||
}
|
}
|
||||||
|
@ -1444,6 +1551,12 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
l_popMemQueue;
|
l_popMemQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(NO_F_W, Memory_Data, NO_F) {
|
||||||
|
d_sendData;
|
||||||
|
w_deallocateTBE;
|
||||||
|
l_popMemQueue;
|
||||||
|
}
|
||||||
|
|
||||||
transition(NO_DR_B_W, Memory_Data, NO_DR_B) {
|
transition(NO_DR_B_W, Memory_Data, NO_DR_B) {
|
||||||
r_recordMemoryData;
|
r_recordMemoryData;
|
||||||
o_checkForCompletion;
|
o_checkForCompletion;
|
||||||
|
@ -1738,4 +1851,16 @@ machine(Directory, "AMD Hammer-like protocol")
|
||||||
k_wakeUpDependents;
|
k_wakeUpDependents;
|
||||||
j_popIncomingUnblockQueue;
|
j_popIncomingUnblockQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transition(NO_F, PUTF, WB) {
|
||||||
|
a_sendWriteBackAck;
|
||||||
|
i_popIncomingRequestQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//possible race between GETF and UnblockM -- not sure needed any more?
|
||||||
|
transition(NO_F, UnblockM) {
|
||||||
|
us_updateSharerIfFBD;
|
||||||
|
uo_updateOwnerIfPf;
|
||||||
|
j_popIncomingUnblockQueue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,9 @@ enumeration(CoherenceRequestType, desc="...") {
|
||||||
PUT, desc="Put Ownership";
|
PUT, desc="Put Ownership";
|
||||||
WB_ACK, desc="Writeback ack";
|
WB_ACK, desc="Writeback ack";
|
||||||
WB_NACK, desc="Writeback neg. ack";
|
WB_NACK, desc="Writeback neg. ack";
|
||||||
|
PUTF, desc="PUT on a Flush";
|
||||||
|
GETF, desc="Issue exclusive for Flushing";
|
||||||
|
BLOCK_ACK, desc="Dir Block ack";
|
||||||
INV, desc="Invalidate";
|
INV, desc="Invalidate";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -119,6 +119,7 @@ enumeration(RubyRequestType, desc="...", default="RubyRequestType_NULL") {
|
||||||
Locked_RMW_Write, desc="";
|
Locked_RMW_Write, desc="";
|
||||||
COMMIT, desc="Commit version";
|
COMMIT, desc="Commit version";
|
||||||
NULL, desc="Invalid request type";
|
NULL, desc="Invalid request type";
|
||||||
|
FLUSH, desc="Flush request type";
|
||||||
}
|
}
|
||||||
|
|
||||||
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
|
enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") {
|
||||||
|
|
|
@ -244,6 +244,8 @@ RubyPort::M5Port::recvTiming(PacketPtr pkt)
|
||||||
// Note: M5 packets do not differentiate ST from RMW_Write
|
// Note: M5 packets do not differentiate ST from RMW_Write
|
||||||
//
|
//
|
||||||
type = RubyRequestType_ST;
|
type = RubyRequestType_ST;
|
||||||
|
} else if (pkt->isFlush()) {
|
||||||
|
type = RubyRequestType_FLUSH;
|
||||||
} else {
|
} else {
|
||||||
panic("Unsupported ruby packet type\n");
|
panic("Unsupported ruby packet type\n");
|
||||||
}
|
}
|
||||||
|
@ -335,7 +337,7 @@ RubyPort::M5Port::hitCallback(PacketPtr pkt)
|
||||||
|
|
||||||
//
|
//
|
||||||
// Unless specified at configuraiton, all responses except failed SC
|
// Unless specified at configuraiton, all responses except failed SC
|
||||||
// operations access M5 physical memory.
|
// and Flush operations access M5 physical memory.
|
||||||
//
|
//
|
||||||
bool accessPhysMem = access_phys_mem;
|
bool accessPhysMem = access_phys_mem;
|
||||||
|
|
||||||
|
@ -361,11 +363,19 @@ RubyPort::M5Port::hitCallback(PacketPtr pkt)
|
||||||
pkt->convertLlToRead();
|
pkt->convertLlToRead();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Flush requests don't access physical memory
|
||||||
|
//
|
||||||
|
if (pkt->isFlush()) {
|
||||||
|
accessPhysMem = false;
|
||||||
|
}
|
||||||
|
|
||||||
DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
|
DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
|
||||||
|
|
||||||
if (accessPhysMem) {
|
if (accessPhysMem) {
|
||||||
ruby_port->physMemPort->sendAtomic(pkt);
|
ruby_port->physMemPort->sendAtomic(pkt);
|
||||||
} else {
|
} else if (needsResponse) {
|
||||||
pkt->makeResponse();
|
pkt->makeResponse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -234,7 +234,8 @@ Sequencer::insertRequest(SequencerRequest* request)
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Load_Linked) ||
|
(request->ruby_request.m_Type == RubyRequestType_Load_Linked) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Store_Conditional) ||
|
(request->ruby_request.m_Type == RubyRequestType_Store_Conditional) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) ||
|
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write)) {
|
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write) ||
|
||||||
|
(request->ruby_request.m_Type == RubyRequestType_FLUSH)) {
|
||||||
pair<RequestTable::iterator, bool> r =
|
pair<RequestTable::iterator, bool> r =
|
||||||
m_writeRequestTable.insert(RequestTable::value_type(line_addr, 0));
|
m_writeRequestTable.insert(RequestTable::value_type(line_addr, 0));
|
||||||
bool success = r.second;
|
bool success = r.second;
|
||||||
|
@ -338,7 +339,7 @@ Sequencer::handleLlsc(const Address& address, SequencerRequest* request)
|
||||||
// previously locked cache lines?
|
// previously locked cache lines?
|
||||||
//
|
//
|
||||||
m_dataCache_ptr->setLocked(address, m_version);
|
m_dataCache_ptr->setLocked(address, m_version);
|
||||||
} else if (m_dataCache_ptr->isLocked(address, m_version)) {
|
} else if ((m_dataCache_ptr->isTagPresent(address)) && (m_dataCache_ptr->isLocked(address, m_version))) {
|
||||||
//
|
//
|
||||||
// Normal writes should clear the locked address
|
// Normal writes should clear the locked address
|
||||||
//
|
//
|
||||||
|
@ -385,7 +386,9 @@ Sequencer::writeCallback(const Address& address,
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Load_Linked) ||
|
(request->ruby_request.m_Type == RubyRequestType_Load_Linked) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Store_Conditional) ||
|
(request->ruby_request.m_Type == RubyRequestType_Store_Conditional) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) ||
|
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Read) ||
|
||||||
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write));
|
(request->ruby_request.m_Type == RubyRequestType_Locked_RMW_Write) ||
|
||||||
|
(request->ruby_request.m_Type == RubyRequestType_FLUSH));
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// For Alpha, properly handle LL, SC, and write requests with respect to
|
// For Alpha, properly handle LL, SC, and write requests with respect to
|
||||||
|
@ -619,6 +622,9 @@ Sequencer::issueRequest(const RubyRequest& request)
|
||||||
case RubyRequestType_LD:
|
case RubyRequestType_LD:
|
||||||
ctype = RubyRequestType_LD;
|
ctype = RubyRequestType_LD;
|
||||||
break;
|
break;
|
||||||
|
case RubyRequestType_FLUSH:
|
||||||
|
ctype = RubyRequestType_FLUSH;
|
||||||
|
break;
|
||||||
case RubyRequestType_ST:
|
case RubyRequestType_ST:
|
||||||
case RubyRequestType_RMW_Read:
|
case RubyRequestType_RMW_Read:
|
||||||
case RubyRequestType_RMW_Write:
|
case RubyRequestType_RMW_Write:
|
||||||
|
|
Loading…
Reference in a new issue