From cef3c5616358912b45aafac490bf182c1a8def04 Mon Sep 17 00:00:00 2001 From: Brad Beckmann Date: Wed, 18 Nov 2009 16:34:32 -0800 Subject: [PATCH] ruby: MOESI hammer support for DMA reads and writes --- src/mem/protocol/MOESI_hammer-dir.sm | 517 +++++++++++++++++++++++---- src/mem/protocol/MOESI_hammer-dma.sm | 165 +++++++++ src/mem/protocol/MOESI_hammer-msg.sm | 32 ++ src/mem/protocol/MOESI_hammer.slicc | 1 + src/mem/ruby/config/defaults.rb | 4 + 5 files changed, 657 insertions(+), 62 deletions(-) create mode 100644 src/mem/protocol/MOESI_hammer-dma.sm diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm index 49efaffb6..b9b001e40 100644 --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -39,11 +39,17 @@ machine(Directory, "AMD Hammer-like protocol") MessageBuffer forwardFromDir, network="To", virtual_network="2", ordered="false"; MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false"; - //MessageBuffer dmaRequestFromDir, network="To", virtual_network="4", ordered="true"; + // + // For a finite buffered network, note that the DMA response network only + // works at this relatively higher numbered (lower priority) virtual network + // because the trigger queue decouples cache responses from DMA responses. + // + MessageBuffer dmaResponseFromDir, network="To", virtual_network="4", ordered="true"; - MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; MessageBuffer unblockToDir, network="From", virtual_network="0", ordered="false"; - //MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; + MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false"; + MessageBuffer requestToDir, network="From", virtual_network="3", ordered="false"; + MessageBuffer dmaRequestToDir, network="From", virtual_network="5", ordered="true"; // STATES enumeration(State, desc="Directory states", default="Directory_State_E") { @@ -57,6 +63,13 @@ machine(Directory, "AMD Hammer-like protocol") O_B_W, desc="Owner, Blocked, waiting for Dram"; NO_W, desc="Not Owner, waiting for Dram"; O_W, desc="Owner, waiting for Dram"; + NO_DW_B_W, desc="Not Owner, Dma Write waiting for Dram and cache responses"; + NO_DR_B_W, desc="Not Owner, Dma Read waiting for Dram and cache responses"; + NO_DR_B_D, desc="Not Owner, Dma Read waiting for cache responses including dirty data"; + NO_DR_B, desc="Not Owner, Dma Read waiting for cache responses"; + NO_DW_W, desc="Not Owner, Dma Write waiting for Dram"; + O_DR_B_W, desc="Owner, Dma Read waiting for Dram and cache responses"; + O_DR_B, desc="Owner, Dma Read waiting for cache responses"; WB, desc="Blocked on a writeback"; WB_O_W, desc="Blocked on memory write, will go to O"; WB_E_W, desc="Blocked on memory write, will go to E"; @@ -73,9 +86,23 @@ machine(Directory, "AMD Hammer-like protocol") Writeback_Exclusive_Clean, desc="The final part of a PutX (no data, exclusive)"; Writeback_Exclusive_Dirty, desc="The final part of a PutX (data, exclusive)"; + // DMA requests + DMA_READ, desc="A DMA Read memory request"; + DMA_WRITE, desc="A DMA Write memory request"; + // Memory Controller Memory_Data, desc="Fetched data from memory arrives"; Memory_Ack, desc="Writeback Ack from memory arrives"; + + // Cache responses required to handle DMA + Ack, desc="Received an ack message"; + Shared_Ack, desc="Received an ack message, responder has a shared copy"; + Shared_Data, desc="Received a data message, responder has a shared copy"; + Exclusive_Data, desc="Received a data message, responder had an exclusive copy, they gave it to us"; + + // Triggers + All_acks_and_data, desc="Received all required data and message acks"; + All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; } // TYPES @@ -100,9 +127,13 @@ machine(Directory, "AMD Hammer-like protocol") Address PhysicalAddress, desc="physical address"; State TBEState, desc="Transient State"; CoherenceResponseType ResponseType, desc="The type for the subsequent response message"; - DataBlock DataBlk, desc="Data to be written (DMA write only)"; + DataBlock DmaDataBlk, desc="DMA Data to be written. Partial blocks need to merged with system memory"; + DataBlock DataBlk, desc="The current view of system memory"; int Len, desc="..."; MachineID DmaRequestor, desc="DMA requestor"; + int NumPendingMsgs, desc="Number of pending acks/messages"; + bool CacheDirty, desc="Indicates whether a cache has responded with dirty data"; + bool Sharers, desc="Indicates whether a cache has indicated it is currently a sharer"; } external_type(TBETable) { @@ -135,10 +166,14 @@ machine(Directory, "AMD Hammer-like protocol") directory[addr].DirectoryState := state; } + MessageBuffer triggerQueue, ordered="true"; + // ** OUT_PORTS ** + out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests out_port(forwardNetwork_out, RequestMsg, forwardFromDir); out_port(responseNetwork_out, ResponseMsg, responseFromDir); - out_port(requestQueue_out, ResponseMsg, requestToDir); // For recycling requests + out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaResponseFromDir); + out_port(triggerQueue_out, TriggerMsg, triggerQueue); // // Memory buffer for memory controller to DIMM communication @@ -147,6 +182,21 @@ machine(Directory, "AMD Hammer-like protocol") // ** IN_PORTS ** + // Trigger Queue + in_port(triggerQueue_in, TriggerMsg, triggerQueue) { + if (triggerQueue_in.isReady()) { + peek(triggerQueue_in, TriggerMsg) { + if (in_msg.Type == TriggerType:ALL_ACKS) { + trigger(Event:All_acks_and_data, in_msg.Address); + } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { + trigger(Event:All_acks_and_data_no_sharers, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + in_port(unblockNetwork_in, ResponseMsg, unblockToDir) { if (unblockNetwork_in.isReady()) { peek(unblockNetwork_in, ResponseMsg) { @@ -167,6 +217,39 @@ machine(Directory, "AMD Hammer-like protocol") } } + // Response Network + in_port(responseToDir_in, ResponseMsg, responseToDir) { + if (responseToDir_in.isReady()) { + peek(responseToDir_in, ResponseMsg) { + if (in_msg.Type == CoherenceResponseType:ACK) { + trigger(Event:Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:ACK_SHARED) { + trigger(Event:Shared_Ack, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_SHARED) { + trigger(Event:Shared_Data, in_msg.Address); + } else if (in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) { + trigger(Event:Exclusive_Data, in_msg.Address); + } else { + error("Unexpected message"); + } + } + } + } + + in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, DMARequestMsg) { + if (in_msg.Type == DMARequestType:READ) { + trigger(Event:DMA_READ, in_msg.LineAddress); + } else if (in_msg.Type == DMARequestType:WRITE) { + trigger(Event:DMA_WRITE, in_msg.LineAddress); + } else { + error("Invalid message"); + } + } + } + } + in_port(requestQueue_in, RequestMsg, requestToDir) { if (requestQueue_in.isReady()) { peek(requestQueue_in, RequestMsg) { @@ -233,10 +316,61 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(vd_allocateDmaRequestInTBE, "vd", desc="Record Data in TBE") { + peek(dmaRequestQueue_in, DMARequestMsg) { + TBEs.allocate(address); + TBEs[address].DmaDataBlk := in_msg.DataBlk; + TBEs[address].PhysicalAddress := in_msg.PhysicalAddress; + TBEs[address].Len := in_msg.Len; + TBEs[address].DmaRequestor := in_msg.Requestor; + TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; + // + // One ack for each last-level cache + // + TBEs[address].NumPendingMsgs := getNumberOfLastLevelCaches(); + // + // Assume initially that the caches store a clean copy and that memory + // will provide the data + // + TBEs[address].CacheDirty := false; + } + } + action(w_deallocateTBE, "w", desc="Deallocate TBE") { TBEs.deallocate(address); } + action(m_decrementNumberOfMessages, "m", desc="Decrement the number of messages for which we're waiting") { + peek(responseToDir_in, ResponseMsg) { + assert(in_msg.Acks > 0); + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + // + // Note that cache data responses will have an ack count of 2. However, + // directory DMA requests must wait for acks from all LLC caches, so + // only decrement by 1. + // + TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - 1; + DEBUG_EXPR(TBEs[address].NumPendingMsgs); + } + } + + action(n_popResponseQueue, "n", desc="Pop response queue") { + responseToDir_in.dequeue(); + } + + action(o_checkForCompletion, "o", desc="Check if we have received all the messages required for completion") { + if (TBEs[address].NumPendingMsgs == 0) { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + if (TBEs[address].Sharers) { + out_msg.Type := TriggerType:ALL_ACKS; + } else { + out_msg.Type := TriggerType:ALL_ACKS_NO_SHARERS; + } + } + } + } + action(d_sendData, "d", desc="Send data to requestor") { peek(memQueue_in, MemoryMsg) { enqueue(responseNetwork_out, ResponseMsg, latency="1") { @@ -252,18 +386,66 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(dr_sendDmaData, "dr", desc="Send Data to DMA controller from memory") { + peek(memQueue_in, MemoryMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(dt_sendDmaDataFromTbe, "dt", desc="Send Data to DMA controller from tbe") { + peek(triggerQueue_in, TriggerMsg) { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:DATA; + // + // we send the entire data block and rely on the dma controller to + // split it up if need be + // + out_msg.DataBlk := TBEs[address].DataBlk; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Response_Data; + } + } + } + + action(da_sendDmaAck, "da", desc="Send Ack to DMA controller") { + enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency="1") { + out_msg.PhysicalAddress := address; + out_msg.LineAddress := address; + out_msg.Type := DMAResponseType:ACK; + out_msg.Destination.add(TBEs[address].DmaRequestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(rx_recordExclusiveInTBE, "rx", desc="Record Exclusive in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; } } - action(r_recordDataInTBE, "r", desc="Record Data in TBE") { + action(r_recordDataInTBE, "rt", desc="Record Data in TBE") { peek(requestQueue_in, RequestMsg) { TBEs[address].ResponseType := CoherenceResponseType:DATA; } } + action(r_setSharerBit, "r", desc="We saw other sharers") { + TBEs[address].Sharers := true; + } + action(qf_queueMemoryFetchRequest, "qf", desc="Queue off-chip fetch request") { peek(requestQueue_in, RequestMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -272,56 +454,25 @@ machine(Directory, "AMD Hammer-like protocol") out_msg.Sender := machineID; out_msg.OriginalRequestorMachId := in_msg.Requestor; out_msg.MessageSize := in_msg.MessageSize; - out_msg.DataBlk := directory[in_msg.Address].DataBlk; + out_msg.DataBlk := directory[address].DataBlk; DEBUG_EXPR(out_msg); } } } -// action(qx_queueMemoryFetchExclusiveRequest, "xf", desc="Queue off-chip fetch request") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(memQueue_out, MemoryMsg, latency=memory_request_latency) { -// out_msg.Address := address; -// out_msg.Type := MemoryRequestType:MEMORY_READ; -// out_msg.ResponseType := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.OriginalRequestorMachId := in_msg.Requestor; -// out_msg.MessageSize := in_msg.MessageSize; -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// DEBUG_EXPR(out_msg); -// } -// } -// } - -// action(d_sendData, "d", desc="Send data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } - -// action(dd_sendExclusiveData, "\d", desc="Send exclusive data to requestor") { -// peek(requestQueue_in, RequestMsg) { -// enqueue(responseNetwork_out, ResponseMsg, latency=memory_latency) { -// out_msg.Address := address; -// out_msg.Type := CoherenceResponseType:DATA_EXCLUSIVE; -// out_msg.Sender := machineID; -// out_msg.Destination.add(in_msg.Requestor); -// out_msg.DataBlk := directory[in_msg.Address].DataBlk; -// out_msg.Dirty := false; // By definition, the block is now clean -// out_msg.Acks := 1; -// out_msg.MessageSize := MessageSizeType:Response_Data; -// } -// } -// } + action(qd_queueMemoryRequestFromDmaRead, "qd", desc="Queue off-chip fetch request") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := machineID; + out_msg.OriginalRequestorMachId := in_msg.Requestor; + out_msg.MessageSize := in_msg.MessageSize; + out_msg.DataBlk := directory[address].DataBlk; + DEBUG_EXPR(out_msg); + } + } + } action(f_forwardRequest, "f", desc="Forward requests") { if (getNumberOfLastLevelCaches() > 1) { @@ -338,6 +489,38 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(f_forwardWriteFromDma, "fw", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETX; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + + action(f_forwardReadFromDma, "fr", desc="Forward requests") { + peek(dmaRequestQueue_in, DMARequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETS; + // + // Send to all L1 caches, since the requestor is the memory controller + // itself + // + out_msg.Requestor := machineID; + out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + } + } + } + action(i_popIncomingRequestQueue, "i", desc="Pop incoming request queue") { requestQueue_in.dequeue(); } @@ -350,16 +533,52 @@ machine(Directory, "AMD Hammer-like protocol") memQueue_in.dequeue(); } + action(g_popTriggerQueue, "g", desc="Pop trigger queue") { + triggerQueue_in.dequeue(); + } + + action(p_popDmaRequestQueue, "pd", desc="pop dma request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(y_recycleDmaRequestQueue, "y", desc="recycle dma request queue") { + dmaRequestQueue_in.recycle(); + } + + action(r_recordMemoryData, "rd", desc="record data from memory to TBE") { + peek(memQueue_in, MemoryMsg) { + if (TBEs[address].CacheDirty == false) { + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + } + + action(r_recordCacheData, "rc", desc="record data from cache response to TBE") { + peek(responseToDir_in, ResponseMsg) { + TBEs[address].CacheDirty := true; + TBEs[address].DataBlk := in_msg.DataBlk; + } + } + action(l_writeDataToMemory, "l", desc="Write PUTX/PUTO data to memory") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty); assert(in_msg.MessageSize == MessageSizeType:Writeback_Data); - directory[in_msg.Address].DataBlk := in_msg.DataBlk; + directory[address].DataBlk := in_msg.DataBlk; DEBUG_EXPR(in_msg.Address); DEBUG_EXPR(in_msg.DataBlk); } } + action(dwt_writeDmaDataFromTBE, "dwt", desc="DMA Write data to memory from TBE") { + directory[address].DataBlk := TBEs[address].DataBlk; + directory[address].DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + } + + action(a_assertCacheData, "ac", desc="Assert that a cache provided the data") { + assert(TBEs[address].CacheDirty); + } + action(l_queueMemoryWBRequest, "lq", desc="Write PUTX data to memory") { peek(unblockNetwork_in, ResponseMsg) { enqueue(memQueue_out, MemoryMsg, latency="1") { @@ -370,6 +589,18 @@ machine(Directory, "AMD Hammer-like protocol") } } + action(ld_queueMemoryDmaWrite, "ld", desc="Write DMA data to memory") { + enqueue(memQueue_out, MemoryMsg, latency="1") { + out_msg.Address := address; + out_msg.Type := MemoryRequestType:MEMORY_WB; + // first, initialize the data blk to the current version of system memory + out_msg.DataBlk := TBEs[address].DataBlk; + // then add the dma write data + out_msg.DataBlk.copyPartial(TBEs[address].DmaDataBlk, addressOffset(TBEs[address].PhysicalAddress), TBEs[address].Len); + DEBUG_EXPR(out_msg); + } + } + action(ll_checkIncomingWriteback, "\l", desc="Check PUTX/PUTO response message") { peek(unblockNetwork_in, ResponseMsg) { assert(in_msg.Dirty == false); @@ -379,20 +610,17 @@ machine(Directory, "AMD Hammer-like protocol") // implementation. We include the data in the "dataless" // message so we can assert the clean data matches the datablock // in memory - assert(directory[in_msg.Address].DataBlk == in_msg.DataBlk); + assert(directory[address].DataBlk == in_msg.DataBlk); } } - // action(z_stall, "z", desc="Cannot be handled right now.") { - // Special name recognized as do nothing case - // } - action(zz_recycleRequest, "\z", desc="Recycle the request queue") { requestQueue_in.recycle(); } // TRANSITIONS + // Transitions out of E state transition(E, GETX, NO_B_W) { v_allocateTBE; rx_recordExclusiveInTBE; @@ -409,7 +637,14 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(E, DMA_READ, NO_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of O state transition(O, GETX, NO_B_W) { v_allocateTBE; r_recordDataInTBE; @@ -426,7 +661,20 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } - // + transition(O, DMA_READ, O_DR_B_W) { + vd_allocateDmaRequestInTBE; + qd_queueMemoryRequestFromDmaRead; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + transition({E, O, NO}, DMA_WRITE, NO_DW_B_W) { + vd_allocateDmaRequestInTBE; + f_forwardWriteFromDma; + p_popDmaRequestQueue; + } + + // Transitions out of NO state transition(NO, GETX, NO_B) { f_forwardRequest; i_popIncomingRequestQueue; @@ -442,16 +690,33 @@ machine(Directory, "AMD Hammer-like protocol") i_popIncomingRequestQueue; } + transition(NO, DMA_READ, NO_DR_B_D) { + vd_allocateDmaRequestInTBE; + f_forwardReadFromDma; + p_popDmaRequestQueue; + } + + // Nack PUT requests when races cause us to believe we own the data transition({O, E}, PUT) { b_sendWriteBackNack; i_popIncomingRequestQueue; } - // Blocked states - transition({NO_B, O_B, NO_B_W, O_B_W, NO_W, O_W, WB, WB_E_W, WB_O_W}, {GETS, GETX, PUT}) { + // Blocked transient states + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {GETS, GETX, PUT}) { zz_recycleRequest; } + transition({NO_B, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, + NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, + NO_W, O_W, WB, WB_E_W, WB_O_W}, + {DMA_READ, DMA_WRITE}) { + y_recycleDmaRequestQueue; + } + transition(NO_B, Unblock, NO) { j_popIncomingUnblockQueue; } @@ -466,6 +731,134 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { + r_recordMemoryData; + o_checkForCompletion; + l_popMemQueue; + } + + transition(O_DR_B_W, Memory_Data, O_DR_B) { + r_recordMemoryData; + dr_sendDmaData; + o_checkForCompletion; + l_popMemQueue; + } + + transition({NO_DR_B, O_DR_B, NO_DR_B_D, NO_DW_B_W}, Ack) { + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Ack) { + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Ack) { + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D}, Shared_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + r_setSharerBit; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B_W, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + n_popResponseQueue; + } + + transition({NO_DR_B, NO_DR_B_D, NO_DW_B_W}, Exclusive_Data) { + r_recordCacheData; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(NO_DR_B, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data, O) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(O_DR_B, All_acks_and_data_no_sharers, O) { + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B, All_acks_and_data_no_sharers, E) { + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DR_B_D, All_acks_and_data_no_sharers, E) { + a_assertCacheData; + // + // Note that the DMA consistency model allows us to send the DMA device + // a response as soon as we receive valid data and prior to receiving + // all acks. However, to simplify the protocol we wait for all acks. + // + dt_sendDmaDataFromTbe; + w_deallocateTBE; + g_popTriggerQueue; + } + + transition(NO_DW_B_W, All_acks_and_data_no_sharers, NO_DW_W) { + dwt_writeDmaDataFromTBE; + ld_queueMemoryDmaWrite; + g_popTriggerQueue; + } + + transition(NO_DW_W, Memory_Ack, E) { + da_sendDmaAck; + w_deallocateTBE; + l_popMemQueue; + } + transition(O_B_W, Memory_Data, O_B) { d_sendData; w_deallocateTBE; @@ -490,7 +883,7 @@ machine(Directory, "AMD Hammer-like protocol") l_popMemQueue; } - // WB + // WB State Transistions transition(WB, Writeback_Dirty, WB_E_W) { l_writeDataToMemory; l_queueMemoryWBRequest; diff --git a/src/mem/protocol/MOESI_hammer-dma.sm b/src/mem/protocol/MOESI_hammer-dma.sm new file mode 100644 index 000000000..b217923a4 --- /dev/null +++ b/src/mem/protocol/MOESI_hammer-dma.sm @@ -0,0 +1,165 @@ +/* + * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(DMA, "DMA Controller") +: int request_latency +{ + + MessageBuffer responseFromDir, network="From", virtual_network="4", ordered="true", no_vector="true"; + MessageBuffer reqToDirectory, network="To", virtual_network="5", ordered="false", no_vector="true"; + + enumeration(State, desc="DMA states", default="DMA_State_READY") { + READY, desc="Ready to accept a new request"; + BUSY_RD, desc="Busy: currently processing a request"; + BUSY_WR, desc="Busy: currently processing a request"; + } + + enumeration(Event, desc="DMA events") { + ReadRequest, desc="A new read request"; + WriteRequest, desc="A new write request"; + Data, desc="Data from a DMA memory read"; + Ack, desc="DMA write to memory completed"; + } + + external_type(DMASequencer) { + void ackCallback(); + void dataCallback(DataBlock); + } + + MessageBuffer mandatoryQueue, ordered="false", no_vector="true"; + DMASequencer dma_sequencer, factory='RubySystem::getDMASequencer(m_cfg["dma_sequencer"])', no_vector="true"; + State cur_state, no_vector="true"; + + State getState(Address addr) { + return cur_state; + } + void setState(Address addr, State state) { + cur_state := state; + } + + out_port(reqToDirectory_out, DMARequestMsg, reqToDirectory, desc="..."); + + in_port(dmaRequestQueue_in, SequencerMsg, mandatoryQueue, desc="...") { + if (dmaRequestQueue_in.isReady()) { + peek(dmaRequestQueue_in, SequencerMsg) { + if (in_msg.Type == SequencerRequestType:LD ) { + trigger(Event:ReadRequest, in_msg.LineAddress); + } else if (in_msg.Type == SequencerRequestType:ST) { + trigger(Event:WriteRequest, in_msg.LineAddress); + } else { + error("Invalid request type"); + } + } + } + } + + in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") { + if (dmaResponseQueue_in.isReady()) { + peek( dmaResponseQueue_in, DMAResponseMsg) { + if (in_msg.Type == DMAResponseType:ACK) { + trigger(Event:Ack, in_msg.LineAddress); + } else if (in_msg.Type == DMAResponseType:DATA) { + trigger(Event:Data, in_msg.LineAddress); + } else { + error("Invalid response type"); + } + } + } + } + + action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:READ; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { + peek(dmaRequestQueue_in, SequencerMsg) { + enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) { + out_msg.PhysicalAddress := in_msg.PhysicalAddress; + out_msg.LineAddress := in_msg.LineAddress; + out_msg.Type := DMARequestType:WRITE; + out_msg.Requestor := machineID; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + + action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.ackCallback(); + } + } + + action(d_dataCallback, "d", desc="Write data to dma sequencer") { + peek (dmaResponseQueue_in, DMAResponseMsg) { + dma_sequencer.dataCallback(in_msg.DataBlk); + } + } + + action(p_popRequestQueue, "p", desc="Pop request queue") { + dmaRequestQueue_in.dequeue(); + } + + action(p_popResponseQueue, "\p", desc="Pop request queue") { + dmaResponseQueue_in.dequeue(); + } + + transition(READY, ReadRequest, BUSY_RD) { + s_sendReadRequest; + p_popRequestQueue; + } + + transition(READY, WriteRequest, BUSY_WR) { + s_sendWriteRequest; + p_popRequestQueue; + } + + transition(BUSY_RD, Data, READY) { + d_dataCallback; + p_popResponseQueue; + } + + transition(BUSY_WR, Ack, READY) { + a_ackCallback; + p_popResponseQueue; + } +} diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm index c9f146819..5d8226eb6 100644 --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -85,3 +85,35 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") { int Acks, desc="How many messages this counts as"; MessageSizeType MessageSize, desc="size category of the message"; } + +enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") { + READ, desc="Memory Read"; + WRITE, desc="Memory Write"; + NULL, desc="Invalid"; +} + +enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") { + DATA, desc="DATA read"; + ACK, desc="ACK write"; + NULL, desc="Invalid"; +} + +structure(DMARequestMsg, desc="...", interface="NetworkMessage") { + DMARequestType Type, desc="Request type (read/write)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + MachineID Requestor, desc="Node who initiated the request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + int Len, desc="The length of the request"; + MessageSizeType MessageSize, desc="size category of the message"; +} + +structure(DMAResponseMsg, desc="...", interface="NetworkMessage") { + DMAResponseType Type, desc="Response type (DATA/ACK)"; + Address PhysicalAddress, desc="Physical address for this request"; + Address LineAddress, desc="Line address for this request"; + NetDest Destination, desc="Destination"; + DataBlock DataBlk, desc="DataBlk attached to this request"; + MessageSizeType MessageSize, desc="size category of the message"; +} diff --git a/src/mem/protocol/MOESI_hammer.slicc b/src/mem/protocol/MOESI_hammer.slicc index d49350e32..31ad47c2e 100644 --- a/src/mem/protocol/MOESI_hammer.slicc +++ b/src/mem/protocol/MOESI_hammer.slicc @@ -1,4 +1,5 @@ MOESI_hammer-msg.sm MOESI_hammer-cache.sm MOESI_hammer-dir.sm +MOESI_hammer-dma.sm standard_1level_CMP-protocol.sm diff --git a/src/mem/ruby/config/defaults.rb b/src/mem/ruby/config/defaults.rb index da7fa17c7..bb054ec4e 100644 --- a/src/mem/ruby/config/defaults.rb +++ b/src/mem/ruby/config/defaults.rb @@ -178,6 +178,10 @@ class MOESI_hammer_DirectoryController < DirectoryController default_param :memory_controller_latency, Integer, 12 end +class MOESI_hammer_DMAController < DMAController + default_param :request_latency, Integer, 6 +end + class RubySystem # Random seed used by the simulation. If set to "rand", the seed