ruby: MOESI hammer support for DMA reads and writes
This commit is contained in:
parent
dbb2c111cc
commit
cef3c56163
5 changed files with 657 additions and 62 deletions
|
@ -39,11 +39,17 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
|
||||
MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false";
|
||||
MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false";
|
||||
//MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true";
|
||||
//
|
||||
// For a finite buffered network, note that the DMA response network only
|
||||
// works at this relatively higher numbered (lower priority) virtual network
|
||||
// because the trigger queue decouples cache responses from DMA responses.
|
||||
//
|
||||
MessageBuffer dmaResponseFromDir, network="To", virtual_network="4", ordered="true";
|
||||
|
||||
MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false";
|
||||
MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false";
|
||||
//MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true";
|
||||
MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false";
|
||||
MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false";
|
||||
MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true";
|
||||
|
||||
// STATES
|
||||
enumeration(State, desc="Directory states", default="Directory_State_E") {
|
||||
|
@ -57,6 +63,13 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
O_B_W, desc="Owner, Blocked, waiting for Dram";
|
||||
NO_W, desc="Not Owner, waiting for Dram";
|
||||
O_W, desc="Owner, waiting for Dram";
|
||||
NO_DW_B_W, desc="Not Owner, Dma Write waiting for Dram and cache responses";
|
||||
NO_DR_B_W, desc="Not Owner, Dma Read waiting for Dram and cache responses";
|
||||
NO_DR_B_D, desc="Not Owner, Dma Read waiting for cache responses including dirty data";
|
||||
NO_DR_B, desc="Not Owner, Dma Read waiting for cache responses";
|
||||
NO_DW_W, desc="Not Owner, Dma Write waiting for Dram";
|
||||
O_DR_B_W, desc="Owner, Dma Read waiting for Dram and cache responses";
|
||||
O_DR_B, desc="Owner, Dma Read waiting for cache responses";
|
||||
WB, desc="Blocked on a writeback";
|
||||
WB_O_W, desc="Blocked on memory write, will go to O";
|
||||
WB_E_W, desc="Blocked on memory write, will go to E";
|
||||
|
@ -73,9 +86,23 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)";
|
||||
Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)";
|
||||
|
||||
// DMA requests
|
||||
DMA_READ, desc="A DMA Read memory request";
|
||||
DMA_WRITE, desc="A DMA Write memory request";
|
||||
|
||||
// Memory Controller
|
||||
Memory_Data, desc="Fetched data from memory arrives";
|
||||
Memory_Ack, desc="Writeback Ack from memory arrives";
|
||||
|
||||
// Cache responses required to handle DMA
|
||||
Ack, desc="Received an ack message";
|
||||
Shared_Ack, desc="Received an ack message, responder has a shared copy";
|
||||
Shared_Data, desc="Received a data message, responder has a shared copy";
|
||||
Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us";
|
||||
|
||||
// Triggers
|
||||
All_acks_and_data, desc="Received all required data and message acks";
|
||||
All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy";
|
||||
}
|
||||
|
||||
// TYPES
|
||||
|
@ -100,9 +127,13 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
Address PhysicalAddress, desc="physical address";
|
||||
State TBEState, desc="Transient State";
|
||||
CoherenceResponseType ResponseType, desc="The type for the subsequent response message";
|
||||
DataBlock DataBlk, desc="Data to be written (DMA write only)";
|
||||
DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory";
|
||||
DataBlock DataBlk, desc="The current view of system memory";
|
||||
int Len, desc="...";
|
||||
MachineID DmaRequestor, desc="DMA requestor";
|
||||
int NumPendingMsgs, desc="Number of pending acks/messages";
|
||||
bool CacheDirty, desc="Indicates whether a cache has responded with dirty data";
|
||||
bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer";
|
||||
}
|
||||
|
||||
external_type(TBETable) {
|
||||
|
@ -135,10 +166,14 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
directory[addr].DirectoryState := state;
|
||||
}
|
||||
|
||||
MessageBuffer triggerQueue, ordered="true";
|
||||
|
||||
// ** OUT_PORTS **
|
||||
out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests
|
||||
out_port(forwardNetwork_out, RequestMsg, forwardFromDir);
|
||||
out_port(responseNetwork_out, ResponseMsg, responseFromDir);
|
||||
out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests
|
||||
out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir);
|
||||
out_port(triggerQueue_out, TriggerMsg, triggerQueue);
|
||||
|
||||
//
|
||||
// Memory buffer for memory controller to DIMM communication
|
||||
|
@ -147,6 +182,21 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
|
||||
// ** IN_PORTS **
|
||||
|
||||
// Trigger Queue
|
||||
in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
|
||||
if (triggerQueue_in.isReady()) {
|
||||
peek(triggerQueue_in, TriggerMsg) {
|
||||
if (in_msg.Type == TriggerType:ALL_ACKS) {
|
||||
trigger(Event:All_acks_and_data, in_msg.Address);
|
||||
} else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) {
|
||||
trigger(Event:All_acks_and_data_no_sharers, in_msg.Address);
|
||||
} else {
|
||||
error("Unexpected message");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in_port(unblockNetwork_in, ResponseMsg, unblockToDir) {
|
||||
if (unblockNetwork_in.isReady()) {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
|
@ -167,6 +217,39 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
// Response Network
|
||||
in_port(responseToDir_in, ResponseMsg, responseToDir) {
|
||||
if (responseToDir_in.isReady()) {
|
||||
peek(responseToDir_in, ResponseMsg) {
|
||||
if (in_msg.Type == CoherenceResponseType:ACK) {
|
||||
trigger(Event:Ack, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) {
|
||||
trigger(Event:Shared_Ack, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) {
|
||||
trigger(Event:Shared_Data, in_msg.Address);
|
||||
} else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
|
||||
trigger(Event:Exclusive_Data, in_msg.Address);
|
||||
} else {
|
||||
error("Unexpected message");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) {
|
||||
if (dmaRequestQueue_in.isReady()) {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
if (in_msg.Type == DMARequestType:READ) {
|
||||
trigger(Event:DMA_READ, in_msg.LineAddress);
|
||||
} else if (in_msg.Type == DMARequestType:WRITE) {
|
||||
trigger(Event:DMA_WRITE, in_msg.LineAddress);
|
||||
} else {
|
||||
error("Invalid message");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in_port(requestQueue_in, RequestMsg, requestToDir) {
|
||||
if (requestQueue_in.isReady()) {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
|
@ -233,10 +316,61 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
TBEs.allocate(address);
|
||||
TBEs[address].DmaDataBlk := in_msg.DataBlk;
|
||||
TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
|
||||
TBEs[address].Len := in_msg.Len;
|
||||
TBEs[address].DmaRequestor := in_msg.Requestor;
|
||||
TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE;
|
||||
//
|
||||
// One ack for each last-level cache
|
||||
//
|
||||
TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches();
|
||||
//
|
||||
// Assume initially that the caches store a clean copy and that memory
|
||||
// will provide the data
|
||||
//
|
||||
TBEs[address].CacheDirty := false;
|
||||
}
|
||||
}
|
||||
|
||||
action(w_deallocateTBE, "w", desc="Deallocate TBE") {
|
||||
TBEs.deallocate(address);
|
||||
}
|
||||
|
||||
action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") {
|
||||
peek(responseToDir_in, ResponseMsg) {
|
||||
assert(in_msg.Acks > 0);
|
||||
DEBUG_EXPR(TBEs[address].NumPendingMsgs);
|
||||
//
|
||||
// Note that cache data responses will have an ack count of 2. However,
|
||||
// directory DMA requests must wait for acks from all LLC caches, so
|
||||
// only decrement by 1.
|
||||
//
|
||||
TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1;
|
||||
DEBUG_EXPR(TBEs[address].NumPendingMsgs);
|
||||
}
|
||||
}
|
||||
|
||||
action(n_popResponseQueue, "n", desc="Pop response queue") {
|
||||
responseToDir_in.dequeue();
|
||||
}
|
||||
|
||||
action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") {
|
||||
if (TBEs[address].NumPendingMsgs == 0) {
|
||||
enqueue(triggerQueue_out, TriggerMsg) {
|
||||
out_msg.Address := address;
|
||||
if (TBEs[address].Sharers) {
|
||||
out_msg.Type := TriggerType:ALL_ACKS;
|
||||
} else {
|
||||
out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(d_sendData, "d", desc="Send data to requestor") {
|
||||
peek(memQueue_in, MemoryMsg) {
|
||||
enqueue(responseNetwork_out, ResponseMsg, latency="1") {
|
||||
|
@ -252,18 +386,66 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(dr_sendDmaData, "dr", desc="Send Data to DMA controller from memory") {
|
||||
peek(memQueue_in, MemoryMsg) {
|
||||
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
|
||||
out_msg.PhysicalAddress := address;
|
||||
out_msg.LineAddress := address;
|
||||
out_msg.Type := DMAResponseType:DATA;
|
||||
//
|
||||
// we send the entire data block and rely on the dma controller to
|
||||
// split it up if need be
|
||||
//
|
||||
out_msg.DataBlk := in_msg.DataBlk;
|
||||
out_msg.Destination.add(TBEs[address].DmaRequestor);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(dt_sendDmaDataFromTbe, "dt", desc="Send Data to DMA controller from tbe") {
|
||||
peek(triggerQueue_in, TriggerMsg) {
|
||||
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
|
||||
out_msg.PhysicalAddress := address;
|
||||
out_msg.LineAddress := address;
|
||||
out_msg.Type := DMAResponseType:DATA;
|
||||
//
|
||||
// we send the entire data block and rely on the dma controller to
|
||||
// split it up if need be
|
||||
//
|
||||
out_msg.DataBlk := TBEs[address].DataBlk;
|
||||
out_msg.Destination.add(TBEs[address].DmaRequestor);
|
||||
out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") {
|
||||
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") {
|
||||
out_msg.PhysicalAddress := address;
|
||||
out_msg.LineAddress := address;
|
||||
out_msg.Type := DMAResponseType:ACK;
|
||||
out_msg.Destination.add(TBEs[address].DmaRequestor);
|
||||
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||
}
|
||||
}
|
||||
|
||||
action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE;
|
||||
}
|
||||
}
|
||||
|
||||
action(r_recordDataInTBE, "r", desc="Record Data in TBE") {
|
||||
action(r_recordDataInTBE, "rt", desc="Record Data in TBE") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
TBEs[address].ResponseType := CoherenceResponseType:DATA;
|
||||
}
|
||||
}
|
||||
|
||||
action(r_setSharerBit, "r", desc="We saw other sharers") {
|
||||
TBEs[address].Sharers := true;
|
||||
}
|
||||
|
||||
action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") {
|
||||
peek(requestQueue_in, RequestMsg) {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
|
@ -272,56 +454,25 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
out_msg.Sender := machineID;
|
||||
out_msg.OriginalRequestorMachId := in_msg.Requestor;
|
||||
out_msg.MessageSize := in_msg.MessageSize;
|
||||
out_msg.DataBlk := directory[in_msg.Address].DataBlk;
|
||||
out_msg.DataBlk := directory[address].DataBlk;
|
||||
DEBUG_EXPR(out_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// action(qx_queueMemoryFetchExclusiveRequest, "xf", desc="Queue off-chip fetch request") {
|
||||
// peek(requestQueue_in, RequestMsg) {
|
||||
// enqueue(memQueue_out, MemoryMsg, latency=memory_request_latency) {
|
||||
// out_msg.Address := address;
|
||||
// out_msg.Type := MemoryRequestType:MEMORY_READ;
|
||||
// out_msg.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE;
|
||||
// out_msg.Sender := machineID;
|
||||
// out_msg.OriginalRequestorMachId := in_msg.Requestor;
|
||||
// out_msg.MessageSize := in_msg.MessageSize;
|
||||
// out_msg.DataBlk := directory[in_msg.Address].DataBlk;
|
||||
// DEBUG_EXPR(out_msg);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// action(d_sendData, "d", desc="Send data to requestor") {
|
||||
// peek(requestQueue_in, RequestMsg) {
|
||||
// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) {
|
||||
// out_msg.Address := address;
|
||||
// out_msg.Type := CoherenceResponseType:DATA;
|
||||
// out_msg.Sender := machineID;
|
||||
// out_msg.Destination.add(in_msg.Requestor);
|
||||
// out_msg.DataBlk := directory[in_msg.Address].DataBlk;
|
||||
// out_msg.Dirty := false; // By definition, the block is now clean
|
||||
// out_msg.Acks := 1;
|
||||
// out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") {
|
||||
// peek(requestQueue_in, RequestMsg) {
|
||||
// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) {
|
||||
// out_msg.Address := address;
|
||||
// out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE;
|
||||
// out_msg.Sender := machineID;
|
||||
// out_msg.Destination.add(in_msg.Requestor);
|
||||
// out_msg.DataBlk := directory[in_msg.Address].DataBlk;
|
||||
// out_msg.Dirty := false; // By definition, the block is now clean
|
||||
// out_msg.Acks := 1;
|
||||
// out_msg.MessageSize := MessageSizeType:Response_Data;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
action(qd_queueMemoryRequestFromDmaRead, "qd", desc="Queue off-chip fetch request") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := MemoryRequestType:MEMORY_READ;
|
||||
out_msg.Sender := machineID;
|
||||
out_msg.OriginalRequestorMachId := in_msg.Requestor;
|
||||
out_msg.MessageSize := in_msg.MessageSize;
|
||||
out_msg.DataBlk := directory[address].DataBlk;
|
||||
DEBUG_EXPR(out_msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(f_forwardRequest, "f", desc="Forward requests") {
|
||||
if (getNumberOfLastLevelCaches() > 1) {
|
||||
|
@ -338,6 +489,38 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(f_forwardWriteFromDma, "fw", desc="Forward requests") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceRequestType:GETX;
|
||||
//
|
||||
// Send to all L1 caches, since the requestor is the memory controller
|
||||
// itself
|
||||
//
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.Destination.broadcast(MachineType:L1Cache);
|
||||
out_msg.MessageSize := MessageSizeType:Forwarded_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(f_forwardReadFromDma, "fr", desc="Forward requests") {
|
||||
peek(dmaRequestQueue_in, DMARequestMsg) {
|
||||
enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := CoherenceRequestType:GETS;
|
||||
//
|
||||
// Send to all L1 caches, since the requestor is the memory controller
|
||||
// itself
|
||||
//
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.Destination.broadcast(MachineType:L1Cache);
|
||||
out_msg.MessageSize := MessageSizeType:Forwarded_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") {
|
||||
requestQueue_in.dequeue();
|
||||
}
|
||||
|
@ -350,16 +533,52 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
memQueue_in.dequeue();
|
||||
}
|
||||
|
||||
action(g_popTriggerQueue, "g", desc="Pop trigger queue") {
|
||||
triggerQueue_in.dequeue();
|
||||
}
|
||||
|
||||
action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") {
|
||||
dmaRequestQueue_in.dequeue();
|
||||
}
|
||||
|
||||
action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") {
|
||||
dmaRequestQueue_in.recycle();
|
||||
}
|
||||
|
||||
action(r_recordMemoryData, "rd", desc="record data from memory to TBE") {
|
||||
peek(memQueue_in, MemoryMsg) {
|
||||
if (TBEs[address].CacheDirty == false) {
|
||||
TBEs[address].DataBlk := in_msg.DataBlk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(r_recordCacheData, "rc", desc="record data from cache response to TBE") {
|
||||
peek(responseToDir_in, ResponseMsg) {
|
||||
TBEs[address].CacheDirty := true;
|
||||
TBEs[address].DataBlk := in_msg.DataBlk;
|
||||
}
|
||||
}
|
||||
|
||||
action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
assert(in_msg.Dirty);
|
||||
assert(in_msg.MessageSize == MessageSizeType:Writeback_Data);
|
||||
directory[in_msg.Address].DataBlk := in_msg.DataBlk;
|
||||
directory[address].DataBlk := in_msg.DataBlk;
|
||||
DEBUG_EXPR(in_msg.Address);
|
||||
DEBUG_EXPR(in_msg.DataBlk);
|
||||
}
|
||||
}
|
||||
|
||||
action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") {
|
||||
directory[address].DataBlk := TBEs[address].DataBlk;
|
||||
directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
|
||||
}
|
||||
|
||||
action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") {
|
||||
assert(TBEs[address].CacheDirty);
|
||||
}
|
||||
|
||||
action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
|
@ -370,6 +589,18 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
}
|
||||
}
|
||||
|
||||
action(ld_queueMemoryDmaWrite, "ld", desc="Write DMA data to memory") {
|
||||
enqueue(memQueue_out, MemoryMsg, latency="1") {
|
||||
out_msg.Address := address;
|
||||
out_msg.Type := MemoryRequestType:MEMORY_WB;
|
||||
// first, initialize the data blk to the current version of system memory
|
||||
out_msg.DataBlk := TBEs[address].DataBlk;
|
||||
// then add the dma write data
|
||||
out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len);
|
||||
DEBUG_EXPR(out_msg);
|
||||
}
|
||||
}
|
||||
|
||||
action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") {
|
||||
peek(unblockNetwork_in, ResponseMsg) {
|
||||
assert(in_msg.Dirty == false);
|
||||
|
@ -379,20 +610,17 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
// implementation. We include the data in the "dataless"
|
||||
// message so we can assert the clean data matches the datablock
|
||||
// in memory
|
||||
assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk);
|
||||
assert(directory[address].DataBlk == in_msg.DataBlk);
|
||||
}
|
||||
}
|
||||
|
||||
// action(z_stall, "z", desc="Cannot be handled right now.") {
|
||||
// Special name recognized as do nothing case
|
||||
// }
|
||||
|
||||
action(zz_recycleRequest, "\z", desc="Recycle the request queue") {
|
||||
requestQueue_in.recycle();
|
||||
}
|
||||
|
||||
// TRANSITIONS
|
||||
|
||||
// Transitions out of E state
|
||||
transition(E, GETX, NO_B_W) {
|
||||
v_allocateTBE;
|
||||
rx_recordExclusiveInTBE;
|
||||
|
@ -409,7 +637,14 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
//
|
||||
transition(E, DMA_READ, NO_DR_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
qd_queueMemoryRequestFromDmaRead;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
// Transitions out of O state
|
||||
transition(O, GETX, NO_B_W) {
|
||||
v_allocateTBE;
|
||||
r_recordDataInTBE;
|
||||
|
@ -426,7 +661,20 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
//
|
||||
transition(O, DMA_READ, O_DR_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
qd_queueMemoryRequestFromDmaRead;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
f_forwardWriteFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
// Transitions out of NO state
|
||||
transition(NO, GETX, NO_B) {
|
||||
f_forwardRequest;
|
||||
i_popIncomingRequestQueue;
|
||||
|
@ -442,16 +690,33 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO, DMA_READ, NO_DR_B_D) {
|
||||
vd_allocateDmaRequestInTBE;
|
||||
f_forwardReadFromDma;
|
||||
p_popDmaRequestQueue;
|
||||
}
|
||||
|
||||
// Nack PUT requests when races cause us to believe we own the data
|
||||
transition({O, E}, PUT) {
|
||||
b_sendWriteBackNack;
|
||||
i_popIncomingRequestQueue;
|
||||
}
|
||||
|
||||
// Blocked states
|
||||
transition({NO_B, O_B, NO_B_W, O_B_W, NO_W, O_W, WB, WB_E_W, WB_O_W}, {GETS, GETX, PUT}) {
|
||||
// Blocked transient states
|
||||
transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W,
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W},
|
||||
{GETS, GETX, PUT}) {
|
||||
zz_recycleRequest;
|
||||
}
|
||||
|
||||
transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D,
|
||||
NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W,
|
||||
NO_W, O_W, WB, WB_E_W, WB_O_W},
|
||||
{DMA_READ, DMA_WRITE}) {
|
||||
y_recycleDmaRequestQueue;
|
||||
}
|
||||
|
||||
transition(NO_B, Unblock, NO) {
|
||||
j_popIncomingUnblockQueue;
|
||||
}
|
||||
|
@ -466,6 +731,134 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
l_popMemQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Memory_Data, NO_DR_B) {
|
||||
r_recordMemoryData;
|
||||
o_checkForCompletion;
|
||||
l_popMemQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B_W, Memory_Data, O_DR_B) {
|
||||
r_recordMemoryData;
|
||||
dr_sendDmaData;
|
||||
o_checkForCompletion;
|
||||
l_popMemQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, O_DR_B, NO_DR_B_D, NO_DW_B_W}, Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Shared_Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) {
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Shared_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, NO_DR_B_D}, Shared_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
r_setSharerBit;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_W, Exclusive_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) {
|
||||
r_recordCacheData;
|
||||
m_decrementNumberOfMessages;
|
||||
o_checkForCompletion;
|
||||
n_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B, All_acks_and_data, O) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_D, All_acks_and_data, O) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(O_DR_B, All_acks_and_data_no_sharers, O) {
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B, All_acks_and_data_no_sharers, E) {
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DR_B_D, All_acks_and_data_no_sharers, E) {
|
||||
a_assertCacheData;
|
||||
//
|
||||
// Note that the DMA consistency model allows us to send the DMA device
|
||||
// a response as soon as we receive valid data and prior to receiving
|
||||
// all acks. However, to simplify the protocol we wait for all acks.
|
||||
//
|
||||
dt_sendDmaDataFromTbe;
|
||||
w_deallocateTBE;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DW_B_W, All_acks_and_data_no_sharers, NO_DW_W) {
|
||||
dwt_writeDmaDataFromTBE;
|
||||
ld_queueMemoryDmaWrite;
|
||||
g_popTriggerQueue;
|
||||
}
|
||||
|
||||
transition(NO_DW_W, Memory_Ack, E) {
|
||||
da_sendDmaAck;
|
||||
w_deallocateTBE;
|
||||
l_popMemQueue;
|
||||
}
|
||||
|
||||
transition(O_B_W, Memory_Data, O_B) {
|
||||
d_sendData;
|
||||
w_deallocateTBE;
|
||||
|
@ -490,7 +883,7 @@ machine(Directory, "AMD Hammer-like protocol")
|
|||
l_popMemQueue;
|
||||
}
|
||||
|
||||
// WB
|
||||
// WB State Transistions
|
||||
transition(WB, Writeback_Dirty, WB_E_W) {
|
||||
l_writeDataToMemory;
|
||||
l_queueMemoryWBRequest;
|
||||
|
|
165
src/mem/protocol/MOESI_hammer-dma.sm
Normal file
165
src/mem/protocol/MOESI_hammer-dma.sm
Normal file
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
machine(DMA, "DMA Controller")
|
||||
: int request_latency
|
||||
{
|
||||
|
||||
MessageBuffer responseFromDir, network="From", virtual_network="4", ordered="true", no_vector="true";
|
||||
MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true";
|
||||
|
||||
enumeration(State, desc="DMA states", default="DMA_State_READY") {
|
||||
READY, desc="Ready to accept a new request";
|
||||
BUSY_RD, desc="Busy: currently processing a request";
|
||||
BUSY_WR, desc="Busy: currently processing a request";
|
||||
}
|
||||
|
||||
enumeration(Event, desc="DMA events") {
|
||||
ReadRequest, desc="A new read request";
|
||||
WriteRequest, desc="A new write request";
|
||||
Data, desc="Data from a DMA memory read";
|
||||
Ack, desc="DMA write to memory completed";
|
||||
}
|
||||
|
||||
external_type(DMASequencer) {
|
||||
void ackCallback();
|
||||
void dataCallback(DataBlock);
|
||||
}
|
||||
|
||||
MessageBuffer mandatoryQueue, ordered="false", no_vector="true";
|
||||
DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true";
|
||||
State cur_state, no_vector="true";
|
||||
|
||||
State getState(Address addr) {
|
||||
return cur_state;
|
||||
}
|
||||
void setState(Address addr, State state) {
|
||||
cur_state := state;
|
||||
}
|
||||
|
||||
out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="...");
|
||||
|
||||
in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") {
|
||||
if (dmaRequestQueue_in.isReady()) {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
if (in_msg.Type == SequencerRequestType:LD ) {
|
||||
trigger(Event:ReadRequest, in_msg.LineAddress);
|
||||
} else if (in_msg.Type == SequencerRequestType:ST) {
|
||||
trigger(Event:WriteRequest, in_msg.LineAddress);
|
||||
} else {
|
||||
error("Invalid request type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") {
|
||||
if (dmaResponseQueue_in.isReady()) {
|
||||
peek( dmaResponseQueue_in, DMAResponseMsg) {
|
||||
if (in_msg.Type == DMAResponseType:ACK) {
|
||||
trigger(Event:Ack, in_msg.LineAddress);
|
||||
} else if (in_msg.Type == DMAResponseType:DATA) {
|
||||
trigger(Event:Data, in_msg.LineAddress);
|
||||
} else {
|
||||
error("Invalid response type");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
|
||||
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
|
||||
out_msg.LineAddress := in_msg.LineAddress;
|
||||
out_msg.Type := DMARequestType:READ;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.DataBlk := in_msg.DataBlk;
|
||||
out_msg.Len := in_msg.Len;
|
||||
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
|
||||
peek(dmaRequestQueue_in, SequencerMsg) {
|
||||
enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
|
||||
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
|
||||
out_msg.LineAddress := in_msg.LineAddress;
|
||||
out_msg.Type := DMARequestType:WRITE;
|
||||
out_msg.Requestor := machineID;
|
||||
out_msg.DataBlk := in_msg.DataBlk;
|
||||
out_msg.Len := in_msg.Len;
|
||||
out_msg.Destination.add(map_Address_to_Directory(address));
|
||||
out_msg.MessageSize := MessageSizeType:Writeback_Control;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
|
||||
peek (dmaResponseQueue_in, DMAResponseMsg) {
|
||||
dma_sequencer.ackCallback();
|
||||
}
|
||||
}
|
||||
|
||||
action(d_dataCallback, "d", desc="Write data to dma sequencer") {
|
||||
peek (dmaResponseQueue_in, DMAResponseMsg) {
|
||||
dma_sequencer.dataCallback(in_msg.DataBlk);
|
||||
}
|
||||
}
|
||||
|
||||
action(p_popRequestQueue, "p", desc="Pop request queue") {
|
||||
dmaRequestQueue_in.dequeue();
|
||||
}
|
||||
|
||||
action(p_popResponseQueue, "\p", desc="Pop request queue") {
|
||||
dmaResponseQueue_in.dequeue();
|
||||
}
|
||||
|
||||
transition(READY, ReadRequest, BUSY_RD) {
|
||||
s_sendReadRequest;
|
||||
p_popRequestQueue;
|
||||
}
|
||||
|
||||
transition(READY, WriteRequest, BUSY_WR) {
|
||||
s_sendWriteRequest;
|
||||
p_popRequestQueue;
|
||||
}
|
||||
|
||||
transition(BUSY_RD, Data, READY) {
|
||||
d_dataCallback;
|
||||
p_popResponseQueue;
|
||||
}
|
||||
|
||||
transition(BUSY_WR, Ack, READY) {
|
||||
a_ackCallback;
|
||||
p_popResponseQueue;
|
||||
}
|
||||
}
|
|
@ -85,3 +85,35 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
|
|||
int Acks, desc="How many messages this counts as";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
}
|
||||
|
||||
enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
|
||||
READ, desc="Memory Read";
|
||||
WRITE, desc="Memory Write";
|
||||
NULL, desc="Invalid";
|
||||
}
|
||||
|
||||
enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
|
||||
DATA, desc="DATA read";
|
||||
ACK, desc="ACK write";
|
||||
NULL, desc="Invalid";
|
||||
}
|
||||
|
||||
structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
|
||||
DMARequestType Type, desc="Request type (read/write)";
|
||||
Address PhysicalAddress, desc="Physical address for this request";
|
||||
Address LineAddress, desc="Line address for this request";
|
||||
MachineID Requestor, desc="Node who initiated the request";
|
||||
NetDest Destination, desc="Destination";
|
||||
DataBlock DataBlk, desc="DataBlk attached to this request";
|
||||
int Len, desc="The length of the request";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
}
|
||||
|
||||
structure(DMAResponseMsg, desc="...", interface="NetworkMessage") {
|
||||
DMAResponseType Type, desc="Response type (DATA/ACK)";
|
||||
Address PhysicalAddress, desc="Physical address for this request";
|
||||
Address LineAddress, desc="Line address for this request";
|
||||
NetDest Destination, desc="Destination";
|
||||
DataBlock DataBlk, desc="DataBlk attached to this request";
|
||||
MessageSizeType MessageSize, desc="size category of the message";
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
MOESI_hammer-msg.sm
|
||||
MOESI_hammer-cache.sm
|
||||
MOESI_hammer-dir.sm
|
||||
MOESI_hammer-dma.sm
|
||||
standard_1level_CMP-protocol.sm
|
||||
|
|
|
@ -178,6 +178,10 @@ class MOESI_hammer_DirectoryController < DirectoryController
|
|||
default_param :memory_controller_latency, Integer, 12
|
||||
end
|
||||
|
||||
class MOESI_hammer_DMAController < DMAController
|
||||
default_param :request_latency, Integer, 6
|
||||
end
|
||||
|
||||
class RubySystem
|
||||
|
||||
# Random seed used by the simulation. If set to "rand", the seed
|
||||
|
|
Loading…
Reference in a new issue