Automated merge with ssh://hg@m5sim.org/m5

This commit is contained in:
Derek Hower 2010-01-22 17:23:21 -06:00
commit 589218168c
48 changed files with 4059 additions and 2170 deletions

View file

@ -18,3 +18,4 @@ dce5a8655829b7d2e24ce40cafc9c8873a71671f m5_2.0_beta5
1ac44b6c87ec71a8410c9a9c219269eca71f8077 m5_2.0_beta4
60a931b03fb165807f02bcccc4f7d0fd705a67a9 copyright_update
d8b246a665c160a31751b4091f097022cde16dd7 m5_2.0_beta6
5de565c4b7bdf46670611858b709c1eb50ad7c5c Calvin_Submission

View file

@ -27,12 +27,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* $Id: MSI_MOSI_CMP_directory-L1cache.sm 1.10 05/01/19 15:55:40-06:00 beckmann@s0-28.cs.wisc.edu $
*
*/
machine(L1Cache, "MSI Directory L1 Cache CMP")
: int l1_request_latency,
int l1_response_latency,
@ -47,15 +41,15 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// a local L1 -> this L2 bank, currently ordered with directory forwarded requests
MessageBuffer requestFromL1Cache, network="To", virtual_network="0", ordered="false";
// a local L1 -> this L2 bank
MessageBuffer responseFromL1Cache, network="To", virtual_network="3", ordered="false";
MessageBuffer unblockFromL1Cache, network="To", virtual_network="4", ordered="false";
MessageBuffer responseFromL1Cache, network="To", virtual_network="1", ordered="false";
MessageBuffer unblockFromL1Cache, network="To", virtual_network="2", ordered="false";
// To this node's L1 cache FROM the network
// a L2 bank -> this L1
MessageBuffer requestToL1Cache, network="From", virtual_network="1", ordered="false";
MessageBuffer requestToL1Cache, network="From", virtual_network="0", ordered="false";
// a L2 bank -> this L1
MessageBuffer responseToL1Cache, network="From", virtual_network="3", ordered="false";
MessageBuffer responseToL1Cache, network="From", virtual_network="1", ordered="false";
// STATES
enumeration(State, desc="Cache states", default="L1Cache_State_I") {
@ -244,7 +238,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Response IntraChip L1 Network - response msg to this L1 cache
in_port(responseIntraChipL1Network_in, ResponseMsg, responseToL1Cache) {
if (responseIntraChipL1Network_in.isReady()) {
peek(responseIntraChipL1Network_in, ResponseMsg) {
peek(responseIntraChipL1Network_in, ResponseMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if(in_msg.Type == CoherenceResponseType:DATA_EXCLUSIVE) {
trigger(Event:Data_Exclusive, in_msg.Address);
@ -277,7 +271,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Request InterChip network - request from this L1 cache to the shared L2
in_port(requestIntraChipL1Network_in, RequestMsg, requestToL1Cache) {
if(requestIntraChipL1Network_in.isReady()) {
peek(requestIntraChipL1Network_in, RequestMsg) {
peek(requestIntraChipL1Network_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
if (in_msg.Type == CoherenceRequestType:INV) {
trigger(Event:Inv, in_msg.Address);
@ -298,7 +292,7 @@ machine(L1Cache, "MSI Directory L1 Cache CMP")
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache

View file

@ -40,14 +40,14 @@ machine(L2Cache, "MESI Directory L2 Cache CMP")
// L2 BANK QUEUES
// From local bank of L2 cache TO the network
MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="2", ordered="false"; // this L2 bank -> Memory
MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> a local L1
MessageBuffer responseFromL2Cache, network="To", virtual_network="3", ordered="false"; // this L2 bank -> a local L1 || Memory
MessageBuffer DirRequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> Memory
MessageBuffer L1RequestFromL2Cache, network="To", virtual_network="0", ordered="false"; // this L2 bank -> a local L1
MessageBuffer responseFromL2Cache, network="To", virtual_network="1", ordered="false"; // this L2 bank -> a local L1 || Memory
// FROM the network to this local bank of L2 cache
MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank
MessageBuffer unblockToL2Cache, network="From", virtual_network="2", ordered="false"; // a local L1 || Memory -> this L2 bank
MessageBuffer L1RequestToL2Cache, network="From", virtual_network="0", ordered="false"; // a local L1 -> this L2 bank
MessageBuffer responseToL2Cache, network="From", virtual_network="3", ordered="false"; // a local L1 || Memory -> this L2 bank
MessageBuffer responseToL2Cache, network="From", virtual_network="1", ordered="false"; // a local L1 || Memory -> this L2 bank
// MessageBuffer unblockToL2Cache, network="From", virtual_network="4", ordered="false"; // a local L1 || Memory -> this L2 bank
// STATES

View file

@ -3,8 +3,8 @@ machine(DMA, "DMA Controller")
: int request_latency
{
MessageBuffer responseFromDir, network="From", virtual_network="6", ordered="true", no_vector="true";
MessageBuffer reqToDirectory, network="To", virtual_network="7", ordered="false", no_vector="true";
MessageBuffer responseFromDir, network="From", virtual_network="1", ordered="true", no_vector="true";
MessageBuffer reqToDirectory, network="To", virtual_network="0", ordered="false", no_vector="true";
enumeration(State, desc="DMA states", default="DMA_State_READY") {
READY, desc="Ready to accept a new request";
@ -51,13 +51,13 @@ machine(DMA, "DMA Controller")
}
}
in_port(dmaResponseQueue_in, DMAResponseMsg, responseFromDir, desc="...") {
in_port(dmaResponseQueue_in, ResponseMsg, responseFromDir, desc="...") {
if (dmaResponseQueue_in.isReady()) {
peek( dmaResponseQueue_in, DMAResponseMsg) {
if (in_msg.Type == DMAResponseType:ACK) {
trigger(Event:Ack, in_msg.LineAddress);
} else if (in_msg.Type == DMAResponseType:DATA) {
trigger(Event:Data, in_msg.LineAddress);
peek( dmaResponseQueue_in, ResponseMsg) {
if (in_msg.Type == CoherenceResponseType:ACK) {
trigger(Event:Ack, makeLineAddress(in_msg.Address));
} else if (in_msg.Type == CoherenceResponseType:DATA) {
trigger(Event:Data, makeLineAddress(in_msg.Address));
} else {
error("Invalid response type");
}
@ -67,10 +67,9 @@ machine(DMA, "DMA Controller")
action(s_sendReadRequest, "s", desc="Send a DMA read request to memory") {
peek(dmaRequestQueue_in, SequencerMsg) {
enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
out_msg.LineAddress := in_msg.LineAddress;
out_msg.Type := DMARequestType:READ;
enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
out_msg.Address := in_msg.PhysicalAddress;
out_msg.Type := CoherenceRequestType:DMA_READ;
out_msg.DataBlk := in_msg.DataBlk;
out_msg.Len := in_msg.Len;
out_msg.Destination.add(map_Address_to_Directory(address));
@ -81,10 +80,9 @@ machine(DMA, "DMA Controller")
action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") {
peek(dmaRequestQueue_in, SequencerMsg) {
enqueue(reqToDirectory_out, DMARequestMsg, latency=request_latency) {
out_msg.PhysicalAddress := in_msg.PhysicalAddress;
out_msg.LineAddress := in_msg.LineAddress;
out_msg.Type := DMARequestType:WRITE;
enqueue(reqToDirectory_out, RequestMsg, latency=request_latency) {
out_msg.Address := in_msg.PhysicalAddress;
out_msg.Type := CoherenceRequestType:DMA_WRITE;
out_msg.DataBlk := in_msg.DataBlk;
out_msg.Len := in_msg.Len;
out_msg.Destination.add(map_Address_to_Directory(address));
@ -94,13 +92,11 @@ machine(DMA, "DMA Controller")
}
action(a_ackCallback, "a", desc="Notify dma controller that write request completed") {
peek (dmaResponseQueue_in, DMAResponseMsg) {
dma_sequencer.ackCallback();
}
}
action(d_dataCallback, "d", desc="Write data to dma sequencer") {
peek (dmaResponseQueue_in, DMAResponseMsg) {
peek (dmaResponseQueue_in, ResponseMsg) {
dma_sequencer.dataCallback(in_msg.DataBlk);
}
}

View file

@ -40,13 +40,11 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
int directory_latency
{
MessageBuffer requestToDir, network="From", virtual_network="2", ordered="false";
MessageBuffer responseToDir, network="From", virtual_network="3", ordered="false";
MessageBuffer responseFromDir, network="To", virtual_network="3", ordered="false";
MessageBuffer dmaRequestFromDir, network="To", virtual_network="6", ordered="true";
MessageBuffer dmaRequestToDir, network="From", virtual_network="7", ordered="true";
MessageBuffer requestToDir, network="From", virtual_network="0", ordered="false";
MessageBuffer responseToDir, network="From", virtual_network="1", ordered="false";
MessageBuffer requestFromDir, network="To", virtual_network="0", ordered="false";
MessageBuffer responseFromDir, network="To", virtual_network="1", ordered="false";
// STATES
enumeration(State, desc="Directory states", default="Directory_State_I") {
@ -118,9 +116,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
// DirectoryMemory directory, constructor_hack="i";
// MemoryControl memBuffer, constructor_hack="i";
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
TBETable TBEs, template_hack="<Directory_TBE>";
@ -167,32 +165,19 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
// ** OUT_PORTS **
out_port(responseNetwork_out, ResponseMsg, responseFromDir);
out_port(memQueue_out, MemoryMsg, memBuffer);
out_port(dmaResponseNetwork_out, DMAResponseMsg, dmaRequestFromDir);
// ** IN_PORTS **
//added by SS for dma
in_port(dmaRequestQueue_in, DMARequestMsg, dmaRequestToDir) {
if (dmaRequestQueue_in.isReady()) {
peek(dmaRequestQueue_in, DMARequestMsg) {
if (in_msg.Type == DMARequestType:READ) {
trigger(Event:DMA_READ, in_msg.LineAddress);
} else if (in_msg.Type == DMARequestType:WRITE) {
trigger(Event:DMA_WRITE, in_msg.LineAddress);
} else {
error("Invalid message");
}
}
}
}
in_port(requestNetwork_in, RequestMsg, requestToDir) {
if (requestNetwork_in.isReady()) {
peek(requestNetwork_in, RequestMsg) {
assert(in_msg.Destination.isElement(machineID));
if (isGETRequest(in_msg.Type)) {
trigger(Event:Fetch, in_msg.Address);
} else if (in_msg.Type == CoherenceRequestType:DMA_READ) {
trigger(Event:DMA_READ, makeLineAddress(in_msg.Address));
} else if (in_msg.Type == CoherenceRequestType:DMA_WRITE) {
trigger(Event:DMA_WRITE, makeLineAddress(in_msg.Address));
} else {
DEBUG_EXPR(in_msg);
error("Invalid message");
@ -328,7 +313,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
//added by SS for dma
action(qf_queueMemoryFetchRequestDMA, "qfd", desc="Queue off-chip fetch request") {
peek(dmaRequestQueue_in, DMARequestMsg) {
peek(requestNetwork_in, RequestMsg) {
enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_READ;
@ -342,14 +327,14 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
action(p_popIncomingDMARequestQueue, "p", desc="Pop incoming DMA queue") {
dmaRequestQueue_in.dequeue();
requestNetwork_in.dequeue();
}
action(dr_sendDMAData, "dr", desc="Send Data to DMA controller from directory") {
peek(memQueue_in, MemoryMsg) {
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.PhysicalAddress := address;
out_msg.Type := DMAResponseType:DATA;
enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be
out_msg.Destination.add(map_Address_to_DMA(address));
out_msg.MessageSize := MessageSizeType:Response_Data;
@ -358,15 +343,13 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
action(dw_writeDMAData, "dw", desc="DMA Write data to memory") {
peek(dmaRequestQueue_in, DMARequestMsg) {
//directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, in_msg.Offset, in_msg.Len);
directory[in_msg.PhysicalAddress].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.PhysicalAddress), in_msg.Len);
peek(requestNetwork_in, RequestMsg) {
directory[address].DataBlk.copyPartial(in_msg.DataBlk, addressOffset(in_msg.Address), in_msg.Len);
}
}
action(qw_queueMemoryWBRequest_partial, "qwp", desc="Queue off-chip writeback request") {
peek(dmaRequestQueue_in, DMARequestMsg) {
peek(requestNetwork_in, RequestMsg) {
enqueue(memQueue_out, MemoryMsg, latency=to_mem_ctrl_latency) {
out_msg.Address := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
@ -384,9 +367,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
action(da_sendDMAAck, "da", desc="Send Ack to DMA controller") {
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.PhysicalAddress := address;
out_msg.Type := DMAResponseType:ACK;
enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:ACK;
out_msg.Destination.add(map_Address_to_DMA(address));
out_msg.MessageSize := MessageSizeType:Writeback_Control;
}
@ -397,7 +380,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
action(zz_recycleDMAQueue, "zz", desc="recycle DMA queue") {
dmaRequestQueue_in.recycle();
requestNetwork_in.recycle();
}
@ -410,12 +393,12 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
action(inv_sendCacheInvalidate, "inv", desc="Invalidate a cache block") {
peek(dmaRequestQueue_in, DMARequestMsg) {
peek(requestNetwork_in, RequestMsg) {
enqueue(responseNetwork_out, ResponseMsg, latency=directory_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:INV;
out_msg.Sender := machineID;
out_msg.Destination := directory[in_msg.PhysicalAddress].Owner;
out_msg.Destination := directory[address].Owner;
out_msg.MessageSize := MessageSizeType:Response_Control;
}
}
@ -424,9 +407,9 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
action(drp_sendDMAData, "drp", desc="Send Data to DMA controller from incoming PUTX") {
peek(responseNetwork_in, ResponseMsg) {
enqueue(dmaResponseNetwork_out, DMAResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.PhysicalAddress := address;
out_msg.Type := DMAResponseType:DATA;
enqueue(responseNetwork_out, ResponseMsg, latency=to_mem_ctrl_latency) {
out_msg.Address := address;
out_msg.Type := CoherenceResponseType:DATA;
out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be
out_msg.Destination.add(map_Address_to_DMA(address));
out_msg.MessageSize := MessageSizeType:Response_Data;
@ -439,10 +422,10 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
}
action(v_allocateTBE, "v", desc="Allocate TBE") {
peek(dmaRequestQueue_in, DMARequestMsg) {
peek(requestNetwork_in, RequestMsg) {
TBEs.allocate(address);
TBEs[address].DataBlk := in_msg.DataBlk;
TBEs[address].PhysicalAddress := in_msg.PhysicalAddress;
TBEs[address].PhysicalAddress := in_msg.Address;
TBEs[address].Len := in_msg.Len;
}
}
@ -514,7 +497,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
//added by SS for dma support
transition(I, DMA_READ, ID) {
qf_queueMemoryFetchRequestDMA;
p_popIncomingDMARequestQueue;
j_popIncomingRequestQueue;
}
transition(ID, Memory_Data, I) {
@ -525,7 +508,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(I, DMA_WRITE, ID_W) {
dw_writeDMAData;
qw_queueMemoryWBRequest_partial;
p_popIncomingDMARequestQueue;
j_popIncomingRequestQueue;
}
transition(ID_W, Memory_Ack, I) {
@ -544,7 +527,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(M, DMA_READ, M_DRD) {
inv_sendCacheInvalidate;
p_popIncomingDMARequestQueue;
j_popIncomingRequestQueue;
}
transition(M_DRD, Data, M_DRDI) {
@ -563,7 +546,7 @@ machine(Directory, "MESI_CMP_filter_directory protocol")
transition(M, DMA_WRITE, M_DWR) {
v_allocateTBE;
inv_sendCacheInvalidate;
p_popIncomingDMARequestQueue;
j_popIncomingRequestQueue;
}
transition(M_DWR, Data, M_DWRI) {

View file

@ -70,7 +70,8 @@ enumeration(CoherenceRequestType, desc="...") {
WB_NACK, desc="Writeback neg. ack";
FWD, desc="Generic FWD";
DMA_READ, desc="DMA Read";
DMA_WRITE, desc="DMA Write";
}
// CoherenceResponseType
@ -95,6 +96,7 @@ structure(RequestMsg, desc="...", interface="NetworkMessage") {
NetDest Destination, desc="What components receive the request, includes MachineType and num";
MessageSizeType MessageSize, desc="size category of the message";
DataBlock DataBlk, desc="Data for the cache line (if PUTX)";
int Len;
bool Dirty, default="false", desc="Dirty bit";
PrefetchBit Prefetch, desc="Is this a prefetch request";
}
@ -111,68 +113,4 @@ structure(ResponseMsg, desc="...", interface="NetworkMessage") {
MessageSizeType MessageSize, desc="size category of the message";
}
enumeration(DMARequestType, desc="...", default="DMARequestType_NULL") {
READ, desc="Memory Read";
WRITE, desc="Memory Write";
NULL, desc="Invalid";
}
enumeration(DMAResponseType, desc="...", default="DMAResponseType_NULL") {
DATA, desc="DATA read";
ACK, desc="ACK write";
NULL, desc="Invalid";
}
structure(DMARequestMsg, desc="...", interface="NetworkMessage") {
DMARequestType Type, desc="Request type (read/write)";
Address PhysicalAddress, desc="Physical address for this request";
Address LineAddress, desc="Line address for this request";
NetDest Destination, desc="Destination";
DataBlock DataBlk, desc="DataBlk attached to this request";
int Offset, desc="The offset into the datablock";
int Len, desc="The length of the request";
MessageSizeType MessageSize, desc="size category of the message";
}
structure(DMAResponseMsg, desc="...", interface="NetworkMessage") {
DMAResponseType Type, desc="Response type (DATA/ACK)";
Address PhysicalAddress, desc="Physical address for this request";
Address LineAddress, desc="Line address for this request";
NetDest Destination, desc="Destination";
DataBlock DataBlk, desc="DataBlk attached to this request";
MessageSizeType MessageSize, desc="size category of the message";
}
/*
GenericRequestType convertToGenericType(CoherenceRequestType type) {
if(type == CoherenceRequestType:PUTX) {
return GenericRequestType:PUTX;
} else if(type == CoherenceRequestType:GETS) {
return GenericRequestType:GETS;
} else if(type == CoherenceRequestType:GET_INSTR) {
return GenericRequestType:GET_INSTR;
} else if(type == CoherenceRequestType:GETX) {
return GenericRequestType:GETX;
} else if(type == CoherenceRequestType:UPGRADE) {
return GenericRequestType:UPGRADE;
} else if(type == CoherenceRequestType:PUTS) {
return GenericRequestType:PUTS;
} else if(type == CoherenceRequestType:INV) {
return GenericRequestType:INV;
} else if(type == CoherenceRequestType:INV_S) {
return GenericRequestType:INV_S;
} else if(type == CoherenceRequestType:L1_DG) {
return GenericRequestType:DOWNGRADE;
} else if(type == CoherenceRequestType:WB_ACK) {
return GenericRequestType:WB_ACK;
} else if(type == CoherenceRequestType:EXE_ACK) {
return GenericRequestType:EXE_ACK;
} else {
DEBUG_EXPR(type);
error("invalid CoherenceRequestType");
}
}
*/

View file

@ -17,6 +17,7 @@ machine(L1Cache, "MI Example L1 Cache")
II, desc="Not Present/Invalid, issued PUT";
M, desc="Modified";
MI, desc="Modified, issued PUT";
MII, desc="Modified, issued PUTX, received nack";
IS, desc="Issued request for LOAD/IFETCH";
IM, desc="Issued request for STORE/ATOMIC";
@ -137,7 +138,7 @@ machine(L1Cache, "MI Example L1 Cache")
in_port(forwardRequestNetwork_in, RequestMsg, forwardToCache) {
if (forwardRequestNetwork_in.isReady()) {
peek(forwardRequestNetwork_in, RequestMsg) {
peek(forwardRequestNetwork_in, RequestMsg, block_on="Address") {
if (in_msg.Type == CoherenceRequestType:GETX) {
trigger(Event:Fwd_GETX, in_msg.Address);
}
@ -159,7 +160,7 @@ machine(L1Cache, "MI Example L1 Cache")
in_port(responseNetwork_in, ResponseMsg, responseToCache) {
if (responseNetwork_in.isReady()) {
peek(responseNetwork_in, ResponseMsg) {
peek(responseNetwork_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:DATA) {
trigger(Event:Data, in_msg.Address);
}
@ -173,7 +174,7 @@ machine(L1Cache, "MI Example L1 Cache")
// Mandatory Queue
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
if (cacheMemory.isTagPresent(in_msg.LineAddress) == false &&
@ -388,6 +389,16 @@ machine(L1Cache, "MI Example L1 Cache")
o_popForwardedRequestQueue;
}
transition(MI, Writeback_Nack, MII) {
o_popForwardedRequestQueue;
}
transition(MII, Fwd_GETX, I) {
ee_sendDataFromTBE;
w_deallocateTBE;
o_popForwardedRequestQueue;
}
transition(II, Writeback_Nack, I) {
w_deallocateTBE;
o_popForwardedRequestQueue;

View file

@ -83,9 +83,9 @@ machine(Directory, "Directory protocol")
}
// ** OBJECTS **
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
TBETable TBEs, template_hack="<Directory_TBE>";

View file

@ -306,7 +306,7 @@ machine(L1Cache, "Directory protocol")
// Request Network
in_port(requestNetwork_in, RequestMsg, requestToL1Cache) {
if (requestNetwork_in.isReady()) {
peek(requestNetwork_in, RequestMsg) {
peek(requestNetwork_in, RequestMsg, block_on="Address") {
assert(in_msg.Destination.isElement(machineID));
DEBUG_EXPR("MRM_DEBUG: L1 received");
DEBUG_EXPR(in_msg.Type);
@ -338,7 +338,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT
// Response Network
in_port(responseToL1Cache_in, ResponseMsg, responseToL1Cache) {
if (responseToL1Cache_in.isReady()) {
peek(responseToL1Cache_in, ResponseMsg) {
peek(responseToL1Cache_in, ResponseMsg, block_on="Address") {
if (in_msg.Type == CoherenceResponseType:ACK) {
trigger(Event:Ack, in_msg.Address);
} else if (in_msg.Type == CoherenceResponseType:DATA) {
@ -356,7 +356,7 @@ if (in_msg.Type == CoherenceRequestType:GETX || in_msg.Type == CoherenceRequestT
// Mandatory Queue betweens Node's CPU and it's L1 caches
in_port(mandatoryQueue_in, CacheMsg, mandatoryQueue, desc="...") {
if (mandatoryQueue_in.isReady()) {
peek(mandatoryQueue_in, CacheMsg) {
peek(mandatoryQueue_in, CacheMsg, block_on="LineAddress") {
// Check for data access to blocks in I-cache and ifetchs to blocks in D-cache

View file

@ -127,8 +127,8 @@ machine(Directory, "Directory protocol")
// ** OBJECTS **
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory_name"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_controller_name"])';
DirectoryMemory directory, factory='RubySystem::getDirectory(m_cfg["directory"])';
MemoryControl memBuffer, factory='RubySystem::getMemoryControl(m_cfg["memory_control"])';
TBETable TBEs, template_hack="<Directory_TBE>";
State getState(Address addr) {

View file

@ -33,6 +33,7 @@ int getNumberOfLastLevelCaches();
// NodeID map_address_to_node(Address addr);
MachineID mapAddressToRange(Address addr, MachineType type, int low, int high);
NetDest broadcast(MachineType type);
MachineID map_Address_to_DMA(Address addr);
MachineID map_Address_to_Directory(Address addr);
NodeID map_Address_to_DirectoryNode(Address addr);

View file

@ -50,7 +50,7 @@ all_protocols = [
'MOESI_hammer',
]
opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MI_example',
opt = EnumVariable('PROTOCOL', 'Coherence Protocol for Ruby', 'MOESI_CMP_directory',
all_protocols)
sticky_vars.AddVariables(opt)

View file

@ -64,6 +64,11 @@ public:
(m_prio_heap.peekMin().m_time <= g_eventQueue_ptr->getTime()));
}
void delayHead() {
MessageBufferNode node = m_prio_heap.extractMin();
enqueue(node.m_msgptr, 1);
}
bool areNSlotsAvailable(int n);
int getPriority() { return m_priority_rank; }
void setPriority(int rank) { m_priority_rank = rank; }

View file

@ -12,8 +12,8 @@ class MESI_CMP_directory_L2CacheController < CacheController
def argv()
vec = super()
vec += " cache " + cache.obj_name
vec += " l2_request_latency "+l2_request_latency.to_s
vec += " l2_response_latency "+l2_response_latency.to_s
vec += " l2_request_latency "+request_latency.to_s
vec += " l2_response_latency "+response_latency.to_s
vec += " to_l1_latency "+to_L1_latency.to_s
return vec
end

View file

@ -13,7 +13,7 @@ RubySystem.reset
# default values
num_cores = 2
l1_cache_size_kb = 32768
l1_cache_size_bytes = 32768
l1_cache_assoc = 8
l1_cache_latency = 1
num_memories = 2
@ -34,6 +34,13 @@ for i in 0..$*.size-1 do
elsif $*[i] == "-m"
num_memories = $*[i+1].to_i
i = i+1
elsif $*[i] == "-R"
if $*[i+1] == "rand"
RubySystem.random_seed = "rand"
else
RubySystem.random_seed = $*[i+1].to_i
end
i = i+ 1
elsif $*[i] == "-s"
memory_size_mb = $*[i+1].to_i
i = i + 1

View file

@ -12,13 +12,13 @@ RubySystem.reset
# default values
num_cores = 2
l1_icache_size_bytes = 32768
l1_icache_size_kb = 64
l1_icache_assoc = 8
l1_icache_latency = 1
l1_dcache_size_bytes = 32768
l1_dcache_size_kb = 32
l1_dcache_assoc = 8
l1_dcache_latency = 1
l2_cache_size_bytes = 2048 # total size (sum of all banks)
l2_cache_size_kb = 8192 # total size (sum of all banks)
l2_cache_assoc = 16
l2_cache_latency = 12
num_l2_banks = num_cores
@ -26,7 +26,8 @@ num_memories = 1
memory_size_mb = 1024
num_dma = 1
protocol = "MOESI_CMP_token"
#default protocol
protocol = "MOESI_CMP_directory"
# check for overrides
@ -34,59 +35,50 @@ for i in 0..$*.size-1 do
if $*[i] == "-c" or $*[i] == "--protocol"
i += 1
protocol = $*[i]
elsif $*[i] == "-A"
l1_dcache_size_kb = $*[i+1].to_i
i = i+1
elsif $*[i] == "-B"
num_l2_banks = $*[i+1].to_i
i = i+1
elsif $*[i] == "-m"
num_memories = $*[i+1].to_i
i = i+1
elsif $*[i] == "-p"
num_cores = $*[i+1].to_i
i = i+1
elsif $*[i] == "-R"
if $*[i+1] == "rand"
RubySystem.random_seed = "rand"
else
RubySystem.random_seed = $*[i+1].to_i
end
i = i+ 1
elsif $*[i] == "-s"
memory_size_mb = $*[i+1].to_i
i = i + 1
elsif $*[i] == "-C"
l1_dcache_size_bytes = $*[i+1].to_i
i = i + 1
elsif $*[i] == "-A"
l1_dcache_assoc = $*[i+1].to_i
i = i + 1
elsif $*[i] == "-D"
num_dma = $*[i+1].to_i
i = i + 1
end
end
n_tokens = num_cores + 1
net_ports = Array.new
iface_ports = Array.new
#assert(protocol == "MESI_CMP_directory", __FILE__+" cannot be used with protocol "+protocol);
assert((protocol == "MESI_CMP_directory" or protocol == "MOESI_CMP_directory"), __FILE__+" cannot be used with protocol '#{protocol}'");
require protocol+".rb"
num_cores.times { |n|
icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_bytes, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_bytes, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
icache = SetAssociativeCache.new("l1i_"+n.to_s, l1_icache_size_kb*1024, l1_icache_latency, l1_icache_assoc, "PSEUDO_LRU")
dcache = SetAssociativeCache.new("l1d_"+n.to_s, l1_dcache_size_kb*1024, l1_dcache_latency, l1_dcache_assoc, "PSEUDO_LRU")
sequencer = Sequencer.new("Sequencer_"+n.to_s, icache, dcache)
iface_ports << sequencer
if protocol == "MOESI_CMP_token"
net_ports << MOESI_CMP_token_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
icache, dcache,
sequencer,
num_l2_banks,
n_tokens)
end
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
icache, dcache,
sequencer,
num_l2_banks)
end
if protocol == "MESI_CMP_directory"
elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_L1CacheController.new("L1CacheController_"+n.to_s,
"L1Cache",
icache, dcache,
@ -95,47 +87,29 @@ num_cores.times { |n|
end
}
num_l2_banks.times { |n|
cache = SetAssociativeCache.new("l2u_"+n.to_s, l2_cache_size_bytes/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
if protocol == "MOESI_CMP_token"
net_ports << MOESI_CMP_token_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
cache,
n_tokens)
end
cache = SetAssociativeCache.new("l2u_"+n.to_s, (l2_cache_size_kb*1024)/num_l2_banks, l2_cache_latency, l2_cache_assoc, "PSEUDO_LRU")
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
cache)
end
if protocol == "MESI_CMP_directory"
elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_L2CacheController.new("L2CacheController_"+n.to_s,
"L2Cache",
cache)
end
net_ports.last.request_latency = l2_cache_latency + 2
net_ports.last.response_latency = l2_cache_latency + 2
}
num_memories.times { |n|
directory = DirectoryMemory.new("DirectoryMemory_"+n.to_s, memory_size_mb/num_memories)
memory_control = MemoryControl.new("MemoryControl_"+n.to_s)
if protocol == "MOESI_CMP_token"
net_ports << MOESI_CMP_token_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
directory,
memory_control,
num_l2_banks)
end
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
directory,
memory_control)
end
if protocol == "MESI_CMP_directory"
elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_DirectoryController.new("DirectoryController_"+n.to_s,
"Directory",
directory,
@ -146,19 +120,11 @@ num_memories.times { |n|
num_dma.times { |n|
dma_sequencer = DMASequencer.new("DMASequencer_"+n.to_s)
iface_ports << dma_sequencer
if protocol == "MOESI_CMP_token"
net_ports << MOESI_CMP_token_DMAController.new("DMAController_"+n.to_s,
"DMA",
dma_sequencer)
end
if protocol == "MOESI_CMP_directory"
net_ports << MOESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
"DMA",
dma_sequencer)
end
if protocol == "MESI_CMP_directory"
elsif protocol == "MESI_CMP_directory"
net_ports << MESI_CMP_directory_DMAController.new("DMAController_"+n.to_s,
"DMA",
dma_sequencer)

View file

@ -0,0 +1,18 @@
#!/usr/bin/env ruby
class AssertionFailure < RuntimeError
attr_reader :msg, :output
def initialize(message, out=nil)
@msg = message
@output = out
end
end
class NotImplementedException < Exception
end
def assert(condition,message)
unless condition
raise AssertionFailure.new(message), "\n\nAssertion failed: \n\n #{message}\n\n"
end
end

View file

@ -1,7 +1,7 @@
#!/usr/bin/ruby
class AssertionFailure < RuntimeError
end
root = File.dirname(File.expand_path(__FILE__))
require root+'/assert.rb'
class Boolean
def self.is_a?(obj)
@ -9,22 +9,46 @@ class Boolean
end
end
def assert(condition,message)
unless condition
raise AssertionFailure, "\n\nAssertion failed: \n\n #{message}\n\n"
end
end
class LibRubyObject
@@all_objs = Array.new
attr_reader :obj_name
@@default_params = Hash.new
@@param_types = Hash.new
attr_reader :obj_name
def initialize(obj_name)
assert obj_name.is_a?(String), "Obj_Name must be a string"
@obj_name = obj_name
@@all_objs << self
@params = Hash.new
# add all parent parameter accessors if they don't exist
self.class.ancestors.each { |ancestor|
if @@default_params.key?(ancestor.name.to_sym)
@@default_params[ancestor.name.to_sym].each { |p, default|
p = p.to_sym
@params[p] = default
if ! respond_to?(p)
self.class.send(:define_method, p) {
@params[p] = @@default_params[ancestor.name.to_sym][p] if ! @params.key?(p)
return @params[p]
}
end
setter_method_name = (p.to_s + "=").to_sym
if ! respond_to?(setter_method_name)
self.class.send(:define_method, setter_method_name) { |val|
type = @@param_types[ancestor.name.to_sym][p]
if val.is_a?(FalseClass) || val.is_a?(TrueClass)
assert type.is_a?(Boolean), "default value of param \"#{p}\" must be either true or false"
else
assert val.is_a?(type), "default value of param \"#{p}\", which is of type #{val.class.name} does not match expected type #{type}"
end
@params[p] = val
}
end
}
end
}
end
def cppClassName()
@ -35,40 +59,24 @@ class LibRubyObject
idx = self.name.to_sym
@@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
@@default_params[idx][param_name] = nil
send :define_method, param_name do
@params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
@params[param_name]
end
method_name = (param_name.to_s + "=").to_sym
send :define_method, method_name do |val|
if val.is_a?(FalseClass) || val.is_a?(TrueClass)
assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
else
assert val.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
end
# assert val.is_a?(type), "#{param_name} must be of type #{type}"
@params[param_name] = val
end
@@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
@@param_types[idx][param_name] = type
end
def self.default_param(param_name, type, default)
idx = self.name.to_sym
@@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
if default.is_a?(FalseClass) || default.is_a?(TrueClass)
assert type.is_a?(Boolean), "default value of param \"#{param_name}\" must be either true or false"
else
assert default.is_a?(type), "default value of param \"#{param_name}\" does not match type #{type}"
end
idx = self.name.to_sym
@@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
@@default_params[idx][param_name] = default
send :define_method, param_name do
@params[param_name] = @@default_params[idx][param_name] if ! @params.key?(param_name)
@params[param_name]
end
method_name = (param_name.to_s + "=").to_sym
send :define_method, method_name do |val|
assert val.is_a?(type), "#{param_name} must be of type #{type}"
@params[param_name] = val
end
@@param_types[idx] = Hash.new if ! @@param_types.key?(idx)
@@param_types[idx][param_name] = type
end
def applyDefaults()
@ -86,6 +94,7 @@ class LibRubyObject
@params.each { |key, val|
str += key.id2name + " "
assert(val != nil, "parameter #{key} is nil")
if val.is_a?(LibRubyObject)
str += val.obj_name + " "
else
@ -123,36 +132,32 @@ end
class NetPort < LibRubyObject
attr :mach_type
attr_reader :version
param :version, Integer
@@type_cnt = Hash.new
@type_id
def initialize(obj_name, mach_type)
super(obj_name)
@mach_type = mach_type
@@type_cnt[mach_type] ||= 0
@type_id = @@type_cnt[mach_type]
self.version= @@type_cnt[mach_type] # sets the version parameter
@@type_cnt[mach_type] += 1
idx = "NetPort".to_sym
@@default_params[idx] = Hash.new if ! @@default_params.key?(idx)
@@default_params[idx].each { |key, val|
@params[key] = val if ! @params.key?(key)
}
end
def port_name
mach_type
end
def port_num
@type_id
end
def cppClassName
"NetPort"
version
end
def self.totalOfType(mach_type)
return @@type_cnt[mach_type]
end
def cppClassName()
"generated:"+@mach_type
end
end
class MemoryVector < LibRubyObject
@ -161,7 +166,7 @@ class MemoryVector < LibRubyObject
end
def cppClassName
"MemoryController"
"MemoryVector"
end
end
@ -296,37 +301,13 @@ private
end
class CacheController < NetPort
@@total_cache_controllers = Hash.new
def initialize(obj_name, mach_type, caches)
super(obj_name, mach_type)
caches.each { |cache|
cache.controller = self
}
if !@@total_cache_controllers.key?(mach_type)
@@total_cache_controllers[mach_type] = 0
end
@version = @@total_cache_controllers[mach_type]
@@total_cache_controllers[mach_type] += 1
# call inhereted parameters
transitions_per_cycle
buffer_size
number_of_TBEs
recycle_latency
end
def argv()
vec = "version "+@version.to_s
vec += " transitions_per_cycle "+@params[:transitions_per_cycle].to_s
vec += " buffer_size "+@params[:buffer_size].to_s
vec += " number_of_TBEs "+@params[:number_of_TBEs].to_s
vec += " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
@ -334,89 +315,92 @@ class CacheController < NetPort
end
end
class Sequencer < IfacePort
end
class L1CacheController < CacheController
attr :sequencer
param :sequencer, Sequencer
def initialize(obj_name, mach_type, caches, sequencer)
super(obj_name, mach_type, caches)
@sequencer = sequencer
@sequencer.controller = self
@sequencer.version = @version
sequencer.controller = self
sequencer.version = version
self.sequencer= sequencer
end
def argv()
vec = super()
vec += " sequencer "+@sequencer.obj_name
# def argv()
# vec = super()
# vec += " sequencer "+@sequencer.obj_name
# end
end
class DirectoryMemory < LibRubyObject
end
class MemoryControl < LibRubyObject
end
class DirectoryController < NetPort
@@total_directory_controllers = 0
attr :directory
attr :memory_control
param :directory, DirectoryMemory
param :memory_control, MemoryControl
def initialize(obj_name, mach_type, directory, memory_control)
super(obj_name, mach_type)
@directory = directory
directory.controller = self
@memory_control = memory_control
directory.version = @@total_directory_controllers
self.directory = directory
self.memory_control = memory_control
@version = @@total_directory_controllers
@@total_directory_controllers += 1
buffer_size()
end
def argv()
"version "+@version.to_s+" directory_name "+@directory.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " memory_controller_name "+@memory_control.obj_name + " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
"generated:"+@mach_type
end
end
class DMASequencer < IfacePort
end
class DMAController < NetPort
@@total_dma_controllers = 0
attr :dma_sequencer
param :dma_sequencer, DMASequencer
param :version, Integer
def initialize(obj_name, mach_type, dma_sequencer)
super(obj_name, mach_type)
@dma_sequencer = dma_sequencer
@version = @@total_dma_controllers
@@total_dma_controllers += 1
dma_sequencer.controller = self
buffer_size
dma_sequencer.version = @@total_dma_controllers
self.dma_sequencer = dma_sequencer
self.version = @@total_dma_controllers
@@total_dma_controllers += 1
end
def argv()
"version "+@version.to_s+" dma_sequencer "+@dma_sequencer.obj_name+" transitions_per_cycle "+@params[:transitions_per_cycle].to_s + " buffer_size "+@params[:buffer_size].to_s + " number_of_TBEs "+@params[:number_of_TBEs].to_s + " recycle_latency "+@params[:recycle_latency].to_s
end
def cppClassName()
"generated:"+@mach_type
end
end
class Cache < LibRubyObject
attr :size, :latency
attr_writer :controller
param :size, Integer
param :latency, Integer
param :controller, NetPort
def initialize(obj_name, size, latency)
super(obj_name)
assert size.is_a?(Integer), "Cache size must be an integer"
@size = size
@latency = latency
self.size = size
self.latency = latency
# controller must be set manually by the configuration script
# because there is a cyclic dependence
end
def args
"controller "+@controller.obj_name+" size "+@size.to_s+" latency "+@latency.to_s
end
end
class SetAssociativeCache < Cache
attr :assoc, :replacement_policy
param :assoc, Integer
param :replacement_policy, String
# latency can be either an integer, a float, or the string "auto"
# when an integer, it represents the number of cycles for a hit
@ -424,52 +408,48 @@ class SetAssociativeCache < Cache
# when set to "auto", libruby will attempt to find a realistic latency by running CACTI
def initialize(obj_name, size, latency, assoc, replacement_policy)
super(obj_name, size, latency)
@assoc = assoc
@replacement_policy = replacement_policy
self.assoc = assoc
self.replacement_policy = replacement_policy
end
def calculateLatency()
if @latency == "auto"
if self.latency == "auto"
cacti_args = Array.new()
cacti_args << (@size) << RubySystem.block_size_bytes << @assoc
cacti_args << (self.size*1024) << RubySystem.block_size_bytes << self.assoc
cacti_args << 1 << 0 << 0 << 0 << 1
cacti_args << RubySystem.tech_nm << RubySystem.block_size_bytes*8
cacti_args << 0 << 0 << 0 << 1 << 0 << 0 << 0 << 0 << 1
cacti_args << 360 << 0 << 0 << 0 << 0 << 1 << 1 << 1 << 1 << 0 << 0
cacti_args << 50 << 10 << 10 << 0 << 1 << 1
# cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
cacti_cmd = File.dirname(__FILE__) + "/cacti/cacti " + cacti_args.join(" ")
# IO.popen(cacti_cmd) { |pipe|
# str1 = pipe.readline
# str2 = pipe.readline
# results = str2.split(", ")
# if results.size != 61
# print "CACTI ERROR: CACTI produced unexpected output.\n"
# print "Are you using the version shipped with libruby?\n"
# raise Exception
# end
# latency_ns = results[5].to_f
# if (latency_ns == "1e+39")
# print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n"
# print "Either change the cache parameters or manually set the latency values\n"
# raise Exception
# end
# clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
# latency_cycles = (latency_ns / clk_period_ns).ceil
# @latency = latency_cycles
# }
elsif @latency.is_a?(Float)
clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
latency_cycles = (@latency / clk_period_ns).ceil
@latency = latency_cycles
elsif ! @latency.is_a?(Integer)
IO.popen(cacti_cmd) { |pipe|
str1 = pipe.readline
str2 = pipe.readline
results = str2.split(", ")
if results.size != 61
print "CACTI ERROR: CACTI produced unexpected output.\n"
print "Are you using the version shipped with libruby?\n"
raise Exception
end
latency_ns = results[5].to_f
if (latency_ns == "1e+39")
print "CACTI ERROR: CACTI was unable to realistically model the cache ",@obj_name,"\n"
print "Either change the cache parameters or manually set the latency values\n"
raise Exception
end
clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
latency_cycles = (latency_ns / clk_period_ns).ceil
self.latency = latency_cycles
}
elsif self.latency.is_a?(Float)
clk_period_ns = 1e9 * (1.0 / (RubySystem.freq_mhz * 1e6))
latency_cycles = (self.latency / clk_period_ns).ceil
self.latency = latency_cycles
elsif ! self.latency.is_a?(Integer)
raise Exception
end
def argv()
args+" assoc "+@assoc.to_s+" replacement_policy "+@replacement_policy
end
def cppClassName()
@ -478,20 +458,18 @@ class SetAssociativeCache < Cache
end
class DirectoryMemory < LibRubyObject
attr :size_mb
attr_writer :controller
param :size_mb, Integer
param :controller, NetPort
param :version, Integer
@@total_size_mb = 0
def initialize(obj_name, size_mb)
super(obj_name)
@size_mb = size_mb
self.size_mb = size_mb
@@total_size_mb += size_mb
end
def argv()
"version "+@controller.version.to_s+" size_mb "+@size_mb.to_s+" controller "+@controller.obj_name
end
def cppClassName()
"DirectoryMemory"
end
@ -501,43 +479,17 @@ class DirectoryMemory < LibRubyObject
end
end
#added by SS
class MemoryControl < LibRubyObject
attr :name
def initialize(obj_name)
super(obj_name)
@name = obj_name
end
def argv()
vec = super()
vec += " mem_bus_cycle_multiplier "+mem_bus_cycle_multiplier.to_s
vec += " banks_per_rank "+banks_per_rank.to_s
vec += " ranks_per_dimm "+ranks_per_dimm.to_s
vec += " dimms_per_channel "+dimms_per_channel.to_s
vec += " bank_bit_0 "+bank_bit_0.to_s
vec += " rank_bit_0 "+rank_bit_0.to_s
vec += " dimm_bit_0 "+dimm_bit_0.to_s
vec += " bank_queue_size "+bank_queue_size.to_s
vec += " bank_busy_time "+bank_busy_time.to_s
vec += " rank_rank_delay "+rank_rank_delay.to_s
vec += " read_write_delay "+read_write_delay.to_s
vec += " basic_bus_busy_time "+basic_bus_busy_time.to_s
vec += " mem_ctl_latency "+mem_ctl_latency.to_s
vec += " refresh_period "+refresh_period.to_s
vec += " tFaw "+tFaw.to_s
vec += " mem_random_arbitrate "+mem_random_arbitrate.to_s
vec += " mem_fixed_delay "+mem_fixed_delay.to_s
vec += " memory_controller_name "+@name
end
def cppClassName()
"MemoryControl"
end
end
class Sequencer < IfacePort
def cppClassName()
@ -564,17 +516,11 @@ end
class DMASequencer < IfacePort
param :controller, NetPort
param :version, Integer
def initialize(obj_name)
super(obj_name)
@params = {
:controller => nil,
:version => nil
}
end
def controller=(controller)
@params[:controller] = controller.obj_name
@params[:version] = controller.version
end
def cppClassName()
@ -582,7 +528,7 @@ class DMASequencer < IfacePort
end
def bochsConnType()
return "dma"+@params[:version].to_s
return "dma"+self.version.to_s
end
end
@ -613,22 +559,8 @@ class Network < LibRubyObject
param :topology, Topology
def initialize(name, topo)
super(name)
@params[:topology] = topo
topo.network= self
end
def argv()
vec = super()
vec += " endpoint_bandwidth "+endpoint_bandwidth.to_s
vec += " adaptive_routing "+adaptive_routing.to_s
vec += " number_of_virtual_networks "+number_of_virtual_networks.to_s
vec += " fan_out_degree "+fan_out_degree.to_s
vec += " buffer_size "+buffer_size.to_s
vec += " link_latency "+adaptive_routing.to_s
vec += " on_chip_latency "+on_chip_latency.to_s
vec += " control_msg_size "+control_msg_size.to_s
self.topology = topo
end
def printTopology()
@ -689,7 +621,6 @@ class CrossbarTopology < Topology
end
end
#added by SS
class Tracer < LibRubyObject
def initialize(obj_name)
super(obj_name)
@ -712,20 +643,10 @@ class Profiler < LibRubyObject
end
#added by SS
class GarnetNetwork < Network
def initialize(name, topo)
super(name, topo)
end
def argv()
vec = super()
vec += " flit_size "+flit_size.to_s
vec += " number_of_pipe_stages "+number_of_pipe_stages.to_s
vec += " vcs_per_class "+vcs_per_class.to_s
vec += " buffer_size "+buffer_size.to_s
vec += " using_network_testing "+using_network_testing.to_s
end
end
class GarnetFixedPipeline < GarnetNetwork
@ -733,10 +654,6 @@ class GarnetFixedPipeline < GarnetNetwork
super(name, net_ports)
end
def argv()
super()
end
def cppClassName()
"GarnetNetwork_d"
end
@ -747,14 +664,9 @@ class GarnetFlexiblePipeline < GarnetNetwork
super(name, net_ports)
end
def argv()
super()
end
def cppClassName()
"GarnetNetwork"
end
end
#added by SS
require "defaults.rb"

View file

@ -1,236 +0,0 @@
// FOR MOESI_CMP_token
//PARAM_BOOL( FilteringEnabled, false, false );
//PARAM_BOOL( DistributedPersistentEnabled, true, false );
//PARAM_BOOL( DynamicTimeoutEnabled, true, false );
//PARAM( RetryThreshold, 1, false );
//PARAM( FixedTimeoutLatency, 300, false );
//PARAM( TraceWarmupLength, 1000000, false );
//PARAM( callback_counter, 0, false );
//PARAM( NUM_COMPLETIONS_BEFORE_PASS, 0, false );
//PARAM( tester_length, 0, false );
//PARAM( synthetic_locks, 2048, false );
//PARAM( think_time, 5, false );
//PARAM( wait_time, 5, false );
//PARAM( hold_time, 5, false );
//PARAM( deterministic_addrs, 1, false );
//PARAM_STRING( SpecifiedGenerator, "DetermInvGenerator", false );
// For debugging purposes, one can enable a trace of all the protocol
// state machine changes. Unfortunately, the code to generate the
// trace is protocol specific. To enable the code for some of the
// standard protocols,
// 1. change "PROTOCOL_DEBUG_TRACE = true"
// 2. enable debug in Makefile
// 3. use the "--start 1" command line parameter or
// "g_debug_ptr->setDebugTime(1)" to beging the following to set the
// debug begin time
//
// this use to be ruby/common/Global.hh
//PARAM_BOOL( ProtocolDebugTrace, true, false );
// a string for filtering debugging output (for all g_debug vars see Debug.hh)
//PARAM_STRING( DEBUG_FILTER_STRING, "", false );
// filters debugging messages based on priority (low, med, high)
//PARAM_STRING( DEBUG_VERBOSITY_STRING, "", false );
// filters debugging messages based on a ruby time
//PARAM_ULONG( DEBUG_START_TIME, 0, false );
// sends debugging messages to a output filename
//PARAM_STRING( DEBUG_OUTPUT_FILENAME, "", false );
//PARAM_BOOL( ProfileHotLines, false, false );
// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed
// The following need to be true for this to work correctly:
// 1. Disable istc and dstc for this simulation run
// 2. Add the following line to the object "sim" in the checkpoint you run from:
// instruction_profile_line_size: 4
// This is used to have simics report back all instruction requests
// For more details on how to find out how to interpret the output physical instruction
// address, please read the document in the simics-howto directory
//PARAM_BOOL( ProfileAllInstructions, false, false );
// Set the following variable to true if you want a complete trace of
// PCs (physical address of program counters, with executing processor IDs)
// to be printed to stdout. Make sure to direct the simics output to a file.
// Otherwise, the run will take a really long time!
// A long run may write a file that can exceed the OS limit on file length
//PARAM_BOOL( PRINT_INSTRUCTION_TRACE, false, false );
//PARAM( DEBUG_CYCLE, 0, false );
// Make the entire memory system perfect
//PARAM_BOOL( PERFECT_MEMORY_SYSTEM, false, false );
//PARAM( PERFECT_MEMORY_SYSTEM_LATENCY, 0, false );
// *********************************************
// SYSTEM PARAMETERS
// *********************************************
//PARAM( NumberOfChips, 1, false );
//PARAM( NumberOfCores, 2, false );
//PARAM_ARRAY( NumberOfCoresPerChip, int, m_NumberOfChips, 2, false);
// *********************************************
// CACHE PARAMETERS
// *********************************************
//PARAM( NumberOfCaches, m_NumberOfCores, false );
//PARAM( NumberOfCacheLevels, 1, false );
/* this returns the number of discrete CacheMemories per level (i.e. a split L1 counts for 2) */
//PARAM_ARRAY( NumberOfCachesPerLevel, int, m_NumberOfCacheLevels, m_NumberOfCores, false ); // this is the number of discrete caches if the level is private
// or the number of banks if the level is shared
//PARAM( CacheIDFromParams, 1, true ); // returns a unique CacheID from the parameters (level, num, split_type)
//PARAM_ARRAY( CacheLatency, int, m_NumberOfCaches, 1, false ); // returns the latency for cache, indexed by CacheID
//PARAM_ARRAY( CacheSplitType, string, m_NumberOfCaches, "unified", false ); // returns "data", "instruction", or "unified", indexed by CacheID
//PARAM_ARRAY( CacheType, string, m_NumberOfCaches, "SetAssociative", false ); // returns the type of a cache, indexed by CacheID
//PARAM_ARRAY( CacheAssoc, int, m_NumberOfCaches, 4, false ); // returns the cache associativity, indexed by CacheID
//PARAM_ARRAY( NumberOfCacheSets, int, m_NumberOfCaches, 256, false ); // returns the number of cache sets, indexed by CacheID
//PARAM_ARRAY( NumberOfCacheSetBits, int, m_NumberOfCaches, log_int(256), false ); // returns the number of cache set bits, indexed by CacheID
//PARAM_ARRAY( CacheReplacementPolicy, string, m_NumberOfCaches, "PSEUDO_LRU", false ); // other option is "LRU"
//PARAM( DataBlockBytes, 64, false );
//PARAM( DataBlockBits, log_int(m_DataBlockBytes), false);
// ********************************************
// MEMORY PARAMETERS
// ********************************************
//PARAM_ARRAY( NumberOfControllersPerType, int, m_NumberOfCacheLevels+2, m_NumberOfCores, false);
//PARAM_ARRAY2D( NumberOfControllersPerTypePerChip, int, m_NumberOfCacheLevels+2, m_NumberOfChips, m_NumberOfCores, false);
// ********************************************
// DMA CONTROLLER PARAMETERS
// ********************************************
//PARAM( NumberOfDMA, 1, false );
//PARAM_ARRAY( NumberOfDMAPerChip, int, m_NumberOfChips, 1, false);
//PARAM_ARRAY( ChipNumFromDMAVersion, int, m_NumberOfDMA, 0, false );
//PARAM_ULONG( MemorySizeBytes, 4294967296, false );
//PARAM_ULONG( MemorySizeBits, 32, false);
//PARAM( NUM_PROCESSORS, 0, false );
//PARAM( NUM_L2_BANKS, 0, false );
//PARAM( NUM_MEMORIES, 0, false );
//PARAM( ProcsPerChip, 1, false );
// The following group of parameters are calculated. They must
// _always_ be left at zero.
//PARAM( NUM_CHIPS, 0, false );
//PARAM( NUM_CHIP_BITS, 0, false );
//PARAM( MEMORY_SIZE_BITS, 0, false );
//PARAM( DATA_BLOCK_BITS, 0, false );
//PARAM( PAGE_SIZE_BITS, 0, false );
//PARAM( NUM_PROCESSORS_BITS, 0, false );
//PARAM( PROCS_PER_CHIP_BITS, 0, false );
//PARAM( NUM_L2_BANKS_BITS, 0, false );
//PARAM( NUM_L2_BANKS_PER_CHIP_BITS, 0, false );
//PARAM( NUM_L2_BANKS_PER_CHIP, 0, false );
//PARAM( NUM_MEMORIES_BITS, 0, false );
//PARAM( NUM_MEMORIES_PER_CHIP, 0, false );
//PARAM( MEMORY_MODULE_BITS, 0, false );
//PARAM_ULONG( MEMORY_MODULE_BLOCKS, 0, false );
// TIMING PARAMETERS
//PARAM( DIRECTORY_CACHE_LATENCY, 6, false );
//PARAM( NULL_LATENCY, 1, false );
//PARAM( ISSUE_LATENCY, 2, false );
//PARAM( CACHE_RESPONSE_LATENCY, 12, false );
//PARAM( L2_RESPONSE_LATENCY, 6, false );
//PARAM( L2_TAG_LATENCY, 6, false );
//PARAM( L1_RESPONSE_LATENCY, 3, false );
//PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2, 158, false );
//PARAM( DirectoryLatency, 6, false );
//PARAM( NetworkLinkLatency, 1, false );
//PARAM( COPY_HEAD_LATENCY, 4, false );
//PARAM( OnChipLinkLatency, 1, false );
//PARAM( RecycleLatency, 10, false );
//PARAM( L2_RECYCLE_LATENCY, 5, false );
//PARAM( TIMER_LATENCY, 10000, false );
//PARAM( TBE_RESPONSE_LATENCY, 1, false );
//PARAM_BOOL( PERIODIC_TIMER_WAKEUPS, true, false );
// constants used by CMP protocols
//PARAM( L1_REQUEST_LATENCY, 2, false );
//PARAM( L2_REQUEST_LATENCY, 4, false );
//PARAM_BOOL( SINGLE_ACCESS_L2_BANKS, true, false ); // hack to simulate multi-cycle L2 bank accesses
// Ruby cycles between when a sequencer issues a miss it arrives at
// the L1 cache controller
//PARAM( SequencerToControllerLatency, 4, false );
// Number of transitions each controller state machines can complete per cycle
//PARAM( L1CacheTransitionsPerCycle, 32, false );
//PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE, 32, false );
//PARAM( DirectoryTransitionsPerCycle, 32, false );
//PARAM( DMATransitionsPerCycle, 1, false );
// Number of TBEs available for demand misses, prefetches, and replacements
//PARAM( NumberOfTBEs, 128, false );
//PARAM( NumberOfL1TBEs, 32, false );
//PARAM( NumberOfL2TBEs, 32, false );
// NOTE: Finite buffering allows us to simulate a wormhole routed network
// with idealized flow control. All message buffers within the network (i.e.
// the switch's input and output buffers) are set to the size specified below
// by the PROTOCOL_BUFFER_SIZE
//PARAM_BOOL( FiniteBuffering, false, false );
//PARAM( FiniteBufferSize, 3, false ); // Zero is unbounded buffers
// Number of requests buffered between the sequencer and the L1 conroller
// This can be more accurately simulated in Opal, therefore it's set to an
// infinite number
// Only effects the simualtion when FINITE_BUFFERING is enabled
//PARAM( ProcessorBufferSize, 10, false );
// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
//PARAM( ProtocolBufferSize, 32, false );
// NETWORK PARAMETERS
// Network Topology: See TopologyType in external.sm for valid values
//PARAM_STRING( NetworkTopology, "PT_TO_PT", false );
// Cache Design specifies file prefix for topology
//PARAM_STRING( CacheDesign, "NUCA", false );
//PARAM( EndpointBandwidth, 10000, false );
//PARAM_BOOL( AdaptiveRouting, true, false );
//PARAM( NumberOfVirtualNetworks, 6, false );
//PARAM( FanOutDegree, 4, false );
//PARAM_BOOL( PrintTopology, true, false );
// Princeton Network (Garnet)
//PARAM_BOOL( UsingGarnetNetwork, true, false );
//PARAM_BOOL( UsingDetailNetwork, false, false );
//PARAM_BOOL( UsingNetworkTesting, false, false );
//PARAM( FlitSize, 16, false );
//PARAM( NumberOfPipeStages, 4, false );
//PARAM( VCSPerClass, 4, false );
//PARAM( BufferSize, 4, false );
// MemoryControl:
//PARAM( MEM_BUS_CYCLE_MULTIPLIER, 10, false );
//PARAM( BANKS_PER_RANK, 8, false );
//PARAM( RANKS_PER_DIMM, 2, false );
//PARAM( DIMMS_PER_CHANNEL, 2, false );
//PARAM( BANK_BIT_0, 8, false );
//PARAM( RANK_BIT_0, 11, false );
//PARAM( DIMM_BIT_0, 12, false );
//PARAM( BANK_QUEUE_SIZE, 12, false );
//PARAM( BankBusyTime, 11, false );
//PARAM( RANK_RANK_DELAY, 1, false );
//PARAM( READ_WRITE_DELAY, 2, false );
//PARAM( BASIC_BUS_BUSY_TIME, 2, false );
//PARAM( MEM_CTL_LATENCY, 12, false );
//PARAM( REFRESH_PERIOD, 1560, false );
//PARAM( TFAW, 0, false );
//PARAM( MEM_RANDOM_ARBITRATE, 0, false );
//PARAM( MEM_FIXED_DELAY, 0, false );

View file

@ -1,7 +1,5 @@
#!/usr/bin/ruby
class NetPort < LibRubyObject
# number of transitions a SLICC state machine can transition per
# cycle
@ -9,9 +7,8 @@ class NetPort < LibRubyObject
# buffer_size limits the size of all other buffers connecting to
# SLICC Controllers. When 0, infinite buffering is used.
default_param :buffer_size, Integer, 0
default_param :buffer_size, Integer, 32
# added by SS for TBE
default_param :number_of_TBEs, Integer, 256
default_param :recycle_latency, Integer, 10
@ -38,16 +35,36 @@ class Debug < LibRubyObject
# 3. set start_time = 1
default_param :protocol_trace, Boolean, false
# a string for filtering debugging output (for all g_debug vars see Debug.h)
# a string for filtering debugging output. Valid options (also see Debug.cc):
# {"System", 's' },
# {"Node", 'N' },
# {"Queue", 'q' },
# {"Event Queue", 'e' },
# {"Network", 'n' },
# {"Sequencer", 'S' },
# {"Tester", 't' },
# {"Generated", 'g' },
# {"SLICC", 'l' },
# {"Network Queues", 'Q' },
# {"Time", 'T' },
# {"Network Internals", 'i' },
# {"Store Buffer", 'b' },
# {"Cache", 'c' },
# {"Predictor", 'p' },
# {"Allocator", 'a' }
#
# e.g., "sq" will print system and queue debugging messages
# Set to "none" for no debugging output
default_param :filter_string, String, "none"
# filters debugging messages based on priority (low, med, high)
# filters debugging messages based on priority (none, low, med, high)
default_param :verbosity_string, String, "none"
# filters debugging messages based on a ruby time
default_param :start_time, Integer, 1
# sends debugging messages to a output filename
# set to "none" to print to stdout
default_param :output_filename, String, "none"
end
@ -65,23 +82,23 @@ class Topology < LibRubyObject
# indicates whether the topology config will be displayed in the
# stats file
default_param :print_config, Boolean, true
default_param :print_config, Boolean, false
end
class Network < LibRubyObject
default_param :endpoint_bandwidth, Integer, 10000
default_param :adaptive_routing, Boolean, true
default_param :number_of_virtual_networks, Integer, 10
default_param :fan_out_degree, Integer, 4
default_param :number_of_virtual_networks, Integer, 5
# default_param :fan_out_degree, Integer, 4
# default buffer size. Setting to 0 indicates infinite buffering
default_param :buffer_size, Integer, 0
# default_param :buffer_size, Integer, 0
# local memory latency ?? NetworkLinkLatency
default_param :link_latency, Integer, 1
# on chip latency
default_param :on_chip_latency, Integer, 1
# default_param :on_chip_latency, Integer, 1
default_param :control_msg_size, Integer, 8
end
@ -94,20 +111,15 @@ class GarnetNetwork < Network
default_param :using_network_testing, Boolean, false
end
#added by SS
class Tracer < LibRubyObject
default_param :warmup_length, Integer, 1000000
end
#added by SS
class Profiler < LibRubyObject
default_param :hot_lines, Boolean, false
default_param :all_instructions, Boolean, false
end
#added by SS
class MemoryControl < LibRubyObject
default_param :mem_bus_cycle_multiplier, Integer, 10
@ -125,7 +137,7 @@ class MemoryControl < LibRubyObject
default_param :mem_ctl_latency, Integer, 12
default_param :refresh_period, Integer, 1560
default_param :tFaw, Integer, 0
default_param :mem_random_arbitrate, Integer, 0
default_param :mem_random_arbitrate, Integer, 11
default_param :mem_fixed_delay, Integer, 0
end
@ -163,49 +175,33 @@ class MOESI_CMP_directory_DirectoryController < DirectoryController
end
class MOESI_CMP_directory_DMAController < DMAController
default_param :request_latency, Integer, 6
default_param :response_latency, Integer, 6
default_param :request_latency, Integer, 14
default_param :response_latency, Integer, 14
end
## MOESI_CMP_token protocol
class MESI_CMP_directory_L2CacheController < CacheController
default_param :request_latency, Integer, 2
default_param :response_latency, Integer, 2
default_param :to_L1_latency, Integer, 1
class MOESI_CMP_token_L1CacheController < L1CacheController
#if 0 then automatically calculated
default_param :lowest_bit, Integer, 0
default_param :highest_bit, Integer, 0
end
class MESI_CMP_directory_L1CacheController < L1CacheController
default_param :l1_request_latency, Integer, 2
default_param :l1_response_latency, Integer, 2
default_param :retry_threshold, Integer, 1
default_param :fixed_timeout_latency, Integer, 300
default_param :dynamic_timeout_enabled, Boolean, true
default_param :to_L2_latency, Integer, 1
end
class MOESI_CMP_token_L2CacheController < CacheController
default_param :l2_request_latency, Integer, 2
default_param :l2_response_latency, Integer, 2
default_param :filtering_enabled, Boolean, true
end
class MOESI_CMP_token_DirectoryController < DirectoryController
class MESI_CMP_directory_DirectoryController < DirectoryController
default_param :to_mem_ctrl_latency, Integer, 1
default_param :directory_latency, Integer, 6
default_param :distributed_persistent, Boolean, true
default_param :fixed_timeout_latency, Integer, 300
end
class MOESI_CMP_token_DMAController < DMAController
default_param :request_latency, Integer, 6
default_param :response_latency, Integer, 6
end
## MOESI_hammer protocol
class MOESI_hammer_CacheController < L1CacheController
default_param :issue_latency, Integer, 2
default_param :cache_response_latency, Integer, 12
end
class MOESI_hammer_DirectoryController < DirectoryController
default_param :memory_controller_latency, Integer, 12
end
class MOESI_hammer_DMAController < DMAController
class MESI_CMP_directory_DMAController < DMAController
default_param :request_latency, Integer, 6
end
@ -219,8 +215,9 @@ class RubySystem
# When set to true, the simulation will insert random delays on
# message enqueue times. Note that even if this is set to false,
# you can still have a non-deterministic simulation if random seed
# is set to "rand". This is because the Ruby swtiches use random
# link priority elevation
# is set to "rand". This is used mainly to debug protocols by forcing
# really strange interleavings and should not be used for
# performance runs.
default_param :randomization, Boolean, false
# tech_nm is the device size used to calculate latency and area
@ -246,31 +243,6 @@ class RubySystem
default_param :profiler, Profiler, Profiler.new("profiler0")
end
#added by SS
class MESI_CMP_directory_L2CacheController < CacheController
default_param :l2_request_latency, Integer, 2
default_param :l2_response_latency, Integer, 2
default_param :to_L1_latency, Integer, 1
#if 0 then automatically calculated
default_param :lowest_bit, Integer, 0
default_param :highest_bit, Integer, 0
end
class MESI_CMP_directory_L1CacheController < L1CacheController
default_param :l1_request_latency, Integer, 2
default_param :l1_response_latency, Integer, 2
default_param :to_L2_latency, Integer, 1
end
class MESI_CMP_directory_DirectoryController < DirectoryController
default_param :to_mem_ctrl_latency, Integer, 1
default_param :directory_latency, Integer, 6
end
class MESI_CMP_directory_DMAController < DMAController
default_param :request_latency, Integer, 6
end

View file

@ -1,405 +0,0 @@
//
// This file has been modified by Kevin Moore and Dan Nussbaum of the
// Scalable Systems Research Group at Sun Microsystems Laboratories
// (http://research.sun.com/scalable/) to support the Adaptive
// Transactional Memory Test Platform (ATMTP). For information about
// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
//
// Please send email to atmtp-interest@sun.com with feedback, questions, or
// to request future announcements about ATMTP.
//
// ----------------------------------------------------------------------
//
// File modification date: 2008-02-23
//
// ----------------------------------------------------------------------
//
// ATMTP is distributed as part of the GEMS software toolset and is
// available for use and modification under the terms of version 2 of the
// GNU General Public License. The GNU General Public License is contained
// in the file $GEMS/LICENSE.
//
// Multifacet GEMS is free software; you can redistribute it and/or modify
// it under the terms of version 2 of the GNU General Public License as
// published by the Free Software Foundation.
//
// Multifacet GEMS is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with the Multifacet GEMS; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
//
// ----------------------------------------------------------------------
//
g_RANDOM_SEED: 1
g_DEADLOCK_THRESHOLD: 500000
// determines how many Simics cycles advance for every Ruby cycle
// (does not apply when running Opal)
SIMICS_RUBY_MULTIPLIER: 4
// Ruby cycles between when a sequencer issues a request and it arrives at
// the L1 cache controller
//
// ** important ** this parameter determines the L2 hit latency when
// using the SMP protocols with a combined L1/L2 controller (-cache.sm)
//
SEQUENCER_TO_CONTROLLER_LATENCY: 4
// When set to false, the L1 cache structures are probed for a hit in Sequencer.C
// If a request hits, it is *not* issued to the cache controller
// When set to true, all processor data requests issue to cache controller
//
// ** important ** this parameter must be set to false for proper L1/L2 hit timing
// for the SMP protocols with combined L1/L2 controllers (-cache.sm)
//
REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false
// When running with Opal in SMT configurations, this indicates the number of threads per physical processor
g_NUM_SMT_THREADS: 1
// Maximum number of requests (including SW prefetches) outstanding from
// the sequencer (Note: this also include items buffered in the store
// buffer)
g_SEQUENCER_OUTSTANDING_REQUESTS: 16
PROTOCOL_DEBUG_TRACE: true
DEBUG_FILTER_STRING: none
DEBUG_VERBOSITY_STRING: none
DEBUG_START_TIME: 0
DEBUG_OUTPUT_FILENAME: none
TRANSACTION_TRACE_ENABLED: false
USER_MODE_DATA_ONLY: false
PROFILE_HOT_LINES: false
PROFILE_ALL_INSTRUCTIONS: false
PRINT_INSTRUCTION_TRACE: false
g_DEBUG_CYCLE: 0
BLOCK_STC: false
PERFECT_MEMORY_SYSTEM: false
PERFECT_MEMORY_SYSTEM_LATENCY: 0
DATA_BLOCK: false
// *********************************************
// CACHE & MEMORY PARAMETERS
// *********************************************
L1_CACHE_ASSOC: 4
L1_CACHE_NUM_SETS_BITS: 8
L2_CACHE_ASSOC: 4
L2_CACHE_NUM_SETS_BITS: 16
// 32 bits = 4 GB address space
g_MEMORY_SIZE_BYTES: 1073741824 //4294967296
g_DATA_BLOCK_BYTES: 64
g_PAGE_SIZE_BYTES: 4096
g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU
g_PROCS_PER_CHIP: 1
// set automatically
g_NUM_PROCESSORS: 0
g_NUM_L2_BANKS: 0
g_NUM_MEMORIES: 0
// The following group of parameters are calculated. They must
// _always_ be left at zero.
g_NUM_CHIPS: 0
g_NUM_CHIP_BITS: 0
g_MEMORY_SIZE_BITS: 0
g_DATA_BLOCK_BITS: 0
g_PAGE_SIZE_BITS: 0
g_NUM_PROCESSORS_BITS: 0
g_PROCS_PER_CHIP_BITS: 0
g_NUM_L2_BANKS_BITS: 0
g_NUM_L2_BANKS_PER_CHIP: 0
g_NUM_L2_BANKS_PER_CHIP_BITS: 0
g_NUM_MEMORIES_BITS: 0
g_NUM_MEMORIES_PER_CHIP: 0
g_MEMORY_MODULE_BITS: 0
g_MEMORY_MODULE_BLOCKS: 0
// For certain CMP protocols, determines whether the lowest bits of a block address
// are used to index to a L2 cache bank or into the sets of a
// single bank
// lowest highest
// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
MAP_L2BANKS_TO_LOWEST_BITS: false
// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files
// to determine where they apply
MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency
DIRECTORY_CACHE_LATENCY: 6
NULL_LATENCY: 1
ISSUE_LATENCY: 2
CACHE_RESPONSE_LATENCY: 12
L1_RESPONSE_LATENCY: 3
L2_RESPONSE_LATENCY: 6
L2_TAG_LATENCY: 6
DIRECTORY_LATENCY: 80
NETWORK_LINK_LATENCY: 1
COPY_HEAD_LATENCY: 4
ON_CHIP_LINK_LATENCY: 1
RECYCLE_LATENCY: 10
L2_RECYCLE_LATENCY: 5
TIMER_LATENCY: 10000
TBE_RESPONSE_LATENCY: 1
PERIODIC_TIMER_WAKEUPS: true
// constants used by CMP protocols
// cache bank access times
L1_REQUEST_LATENCY: 2
L2_REQUEST_LATENCY: 4
// Number of transitions each controller state machines can complete per cycle
// i.e. the number of ports to each controller
// L1cache is the sum of the L1I and L1D cache ports
L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
// much greater constraint on the concurrency of a L2 cache bank
L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32
DMA_TRANSITIONS_PER_RUBY_CYCLE: 1
// Number of TBEs available for demand misses, ALL prefetches, and replacements
// used by one-level protocols
NUMBER_OF_TBES: 128
// two-level protocols
NUMBER_OF_L1_TBES: 32
NUMBER_OF_L2_TBES: 32
// ** INTERCONECT PARAMETERS **
//
g_PRINT_TOPOLOGY: true
g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology
FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology
g_adaptive_routing: true
NUMBER_OF_VIRTUAL_NETWORKS: 6
// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by
// topology specific link weights
g_endpoint_bandwidth: 10000
// ** finite buffering parameters
//
// note: Finite buffering allows us to simulate a realistic virtual cut-through
// routed network with idealized flow control. this feature is NOT heavily tested
FINITE_BUFFERING: false
// All message buffers within the network (i.e. the switch's input and
// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
FINITE_BUFFER_SIZE: 3
// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the
// number of requests in the mandatory queue
// Only effects the simualtion when FINITE_BUFFERING is enabled
PROCESSOR_BUFFER_SIZE: 10
// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
PROTOCOL_BUFFER_SIZE: 32
// ** end finite buffering parameters
// (deprecated)
// Allows on a single accesses to a multi-cycle L2 bank.
// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
// number of cycles. However the TBE table can be accessed in parallel.
SINGLE_ACCESS_L2_BANKS: true
// MOESI_CMP_token parameters (some might be deprecated)
g_FILTERING_ENABLED: false
g_DISTRIBUTED_PERSISTENT_ENABLED: true
g_RETRY_THRESHOLD: 1
g_DYNAMIC_TIMEOUT_ENABLED: true
g_FIXED_TIMEOUT_LATENCY: 300
// tester parameters (overridden by testerconfig.defaults)
//
// injects random message delays to excite protocol races
RANDOMIZATION: false
g_SYNTHETIC_DRIVER: false
g_DETERMINISTIC_DRIVER: false
g_trace_warmup_length: 1000000
g_bash_bandwidth_adaptive_threshold: 0.75
g_tester_length: 0
// # of synthetic locks == 16 * 128
g_synthetic_locks: 2048
g_deterministic_addrs: 1
g_SpecifiedGenerator: DetermInvGenerator
g_callback_counter: 0
g_NUM_COMPLETIONS_BEFORE_PASS: 0
// parameters used by locking synthetic tester
g_think_time: 5
g_hold_time: 5
g_wait_time: 5
// Princeton Network (Garnet)
g_GARNET_NETWORK: true
g_DETAIL_NETWORK: false
g_NETWORK_TESTING: false
g_FLIT_SIZE: 16
g_NUM_PIPE_STAGES: 4
g_VCS_PER_CLASS: 4
g_BUFFER_SIZE: 4
///////////////////////////////////////////////////////////////////////////////
//
// MemoryControl:
// Basic cycle time of the memory controller. This defines the period which is
// used as the memory channel clock period, the address bus bit time, and the
// memory controller cycle time.
// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
// and a 2 GHz Ruby clock:
MEM_BUS_CYCLE_MULTIPLIER: 10
// How many internal banks in each DRAM chip:
BANKS_PER_RANK: 8
// How many sets of DRAM chips per DIMM.
RANKS_PER_DIMM: 2
// How many DIMMs per channel. (Currently the only thing that
// matters is the number of ranks per channel, i.e. the product
// of this parameter and RANKS_PER_DIMM. But if and when this is
// expanded to do FB-DIMMs, the distinction between the two
// will matter.)
DIMMS_PER_CHANNEL: 2
// Which bits to use to find the bank, rank, and DIMM numbers.
// You could choose to have the bank bits, rank bits, and DIMM bits
// in any order; here they are in that order.
// For these defaults, we assume this format for addresses:
// Offset within line: [5:0]
// Memory controller #: [7:6]
// Bank: [10:8]
// Rank: [11]
// DIMM: [12]
// Row addr / Col addr: [top:13]
// If you get these bits wrong, then some banks won't see any
// requests; you need to check for this in the .stats output.
BANK_BIT_0: 8
RANK_BIT_0: 11
DIMM_BIT_0: 12
// Number of entries max in each bank queues; set to whatever you want.
// If it is too small, you will see in the .stats file a lot of delay
// time spent in the common input queue.
BANK_QUEUE_SIZE: 12
// Bank cycle time (tRC) measured in memory cycles:
BANK_BUSY_TIME: 11
// This is how many memory address cycles to delay between reads to
// different ranks of DRAMs to allow for clock skew:
RANK_RANK_DELAY: 1
// This is how many memory address cycles to delay between a read
// and a write. This is based on two things: (1) the data bus is
// used one cycle earlier in the operation; (2) a round-trip wire
// delay from the controller to the DIMM that did the reading.
READ_WRITE_DELAY: 2
// Basic address and data bus occupancy. If you are assuming a
// 16-byte-wide data bus (pairs of DIMMs side-by-side), then
// the data bus occupancy matches the address bus occupancy at
// two cycles. But if the channel is only 8 bytes wide, you
// need to increase this bus occupancy time to 4 cycles.
BASIC_BUS_BUSY_TIME: 2
// Latency to returning read request or writeback acknowledgement.
// Measured in memory address cycles.
// This equals tRCD + CL + AL + (four bit times)
// + (round trip on channel)
// + (memory control internal delays)
// It's going to be an approximation, so pick what you like.
// Note: The fact that latency is a constant, and does not depend on two
// low-order address bits, implies that our memory controller either:
// (a) tells the DRAM to read the critical word first, and sends the
// critical word first back to the CPU, or (b) waits until it has
// seen all four bit times on the data wires before sending anything
// back. Either is plausible. If (a), remove the "four bit times"
// term from the calculation above.
MEM_CTL_LATENCY: 12
// refresh_period is the number of memory cycles between refresh
// of row x in bank n and refresh of row x+1 in bank n. For DDR-400,
// this is typically 7.8 usec for commercial systems; after 8192 such
// refreshes, this will have refreshed the whole chip in 64 msec. If
// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory
// controller will divide this by the total number of banks, and kick
// off a refresh to *somebody* every time that amount is counted
// down to zero. (There will be some rounding error there, but it
// should have minimal effect.)
REFRESH_PERIOD: 1560
// tFAW is a DRAM chip parameter which restricts the number of
// activates that can be done within a certain window of time.
// The window is specified here in terms of number of memory
// controller cycles. At most four activates may be done during
// any such sliding window. If this number is set to be no more
// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
// It is typical in real systems for tFAW to have no effect, but
// it may be useful in throttling power. Set to zero to ignore.
TFAW: 0
// By default, the memory controller uses round-robin to arbitrate
// between ready bank queues for use of the address bus. If you
// wish to add randomness to the system, set this parameter to
// one instead, and it will restart the round-robin pointer at a
// random bank number each cycle. If you want additional
// nondeterminism, set the parameter to some integer n >= 2, and
// it will in addition add a n% chance each cycle that a ready bank
// will be delayed an additional cycle. Note that if you are
// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
MEM_RANDOM_ARBITRATE: 0
// The following parameter, if nonzero, will disable the memory
// controller and instead give every request a fixed latency. The
// nonzero value specified here is measured in memory cycles and is
// just added to MEM_CTL_LATENCY. It will also show up in the stats
// file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
MEM_FIXED_DELAY: 0
// If instead of DDR-400, you wanted DDR-800, the channel gets faster
// but the basic operation of the DRAM core is unchanged.
// Busy times appear to double just because they are measured
// in smaller clock cycles. The performance advantage comes because
// the bus busy times don't actually quite double.
// You would use something like these values:
//
// MEM_BUS_CYCLE_MULTIPLIER: 5
// BANK_BUSY_TIME: 22
// RANK_RANK_DELAY: 2
// READ_WRITE_DELAY: 3
// BASIC_BUS_BUSY_TIME: 3
// MEM_CTL_LATENCY: 20
// REFRESH_PERIOD: 3120

View file

@ -1,50 +0,0 @@
//
// This file contains tester specific changes to the rubyconfig.defaults
// parameter values.
//
// Please: - Add new variables only to rubyconfig.defaults file.
// - Change them here only when necessary.
g_SIMICS: false
DATA_BLOCK: true
RANDOMIZATION: true
g_SYNTHETIC_DRIVER: false
g_DETERMINISTIC_DRIVER: true
g_DEADLOCK_THRESHOLD: 500000
g_SpecifiedGenerator: DetermGETXGenerator
PROTOCOL_DEBUG_TRACE: true
//
// Generic cache parameters
//
// Cache sizes are smaller for the random tester to increase the amount
// of false sharing.
L1_CACHE_ASSOC: 2
L1_CACHE_NUM_SETS_BITS: 2
L2_CACHE_ASSOC: 2
L2_CACHE_NUM_SETS_BITS: 5
g_MEMORY_SIZE_BYTES: 1048576
//g_NETWORK_TOPOLOGY: FILE_SPECIFIED
RECYCLE_LATENCY: 1
//NUMBER_OF_VIRTUAL_NETWORKS: 5
//g_NUM_MEMORIES: 16
L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000
DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000
//g_PROCS_PER_CHIP: 2
//g_NUM_L2_BANKS: 16
//g_endpoint_bandwidth: 10000
//g_NUM_PROCESSORS: 16
//g_NUM_SMT_THREADS: 1
//g_GARNET_NETWORK: true
//g_DETAIL_NETWORK: true
//g_NETWORK_TESTING: false
//g_FLIT_SIZE: 32
//g_NUM_PIPE_STAGES: 5
//g_VCS_PER_CLASS: 2
//g_BUFFER_SIZE: 4

View file

@ -58,11 +58,8 @@ RubyRequestType string_to_RubyRequestType(std::string str)
ostream& operator<<(ostream& out, const RubyRequestType& obj)
{
cerr << "in op" << endl;
out << RubyRequestType_to_string(obj);
cerr << "flushing" << endl;
out << flush;
cerr << "done" << endl;
return out;
}

View file

@ -34,7 +34,7 @@ struct RubyRequest {
unsigned proc_id;
RubyRequest() {}
RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 0)
RubyRequest(uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, unsigned _proc_id = 100)
: paddr(_paddr), data(_data), len(_len), pc(_pc), type(_type), access_mode(_access_mode), proc_id(_proc_id)
{}
};
@ -71,6 +71,12 @@ RubyPortHandle libruby_get_port(const char* name, void (*hit_callback)(int64_t a
RubyPortHandle libruby_get_port_by_name(const char* name);
/**
* libruby_issue_request error return codes
*/
#define LIBRUBY_BUFFER_FULL -2
#define LIBRUBY_ALIASED_REQUEST -3
/**
* issue_request returns a unique access_id to identify the ruby
* transaction. This access_id is later returned to the caller via

View file

@ -184,7 +184,7 @@ void PerfectSwitch::wakeup()
assert(m_link_order.size() == m_routing_table.size());
assert(m_link_order.size() == m_out.size());
//changed by SS
if (m_network_ptr->getAdaptiveRouting()) {
if (m_network_ptr->isVNetOrdered(vnet)) {
// Don't adaptively route

View file

@ -79,7 +79,6 @@ void Topology::init(const vector<string> & argv)
m_connections = argv[i+1];
else if (argv[i] == "print_config") {
m_print_config = string_to_bool(argv[i+1]);
cerr << "print config: " << m_print_config << endl;
}
}
assert(m_network_ptr != NULL);

View file

@ -21,9 +21,8 @@ public:
virtual const string toString() const = 0; // returns text version of controller type
virtual const string getName() const = 0; // return instance name
virtual const MachineType getMachineType() const = 0;
virtual void set_atomic(Address addr) = 0;
virtual void started_writes() = 0;
virtual void clear_atomic() = 0;
virtual void blockOnQueue(Address, MessageBuffer*) = 0;
virtual void unblock(Address) = 0;
virtual void print(ostream & out) const = 0;
virtual void printStats(ostream & out) const = 0;

View file

@ -94,6 +94,17 @@ MachineID map_Address_to_DMA(const Address & addr)
return dma;
}
inline
NetDest broadcast(MachineType type)
{
NetDest dest;
for (int i=0; i<MachineType_base_count(type); i++) {
MachineID mach = {type, i};
dest.add(mach);
}
return dest;
}
inline
MachineID mapAddressToRange(const Address & addr, MachineType type, int low_bit, int num_bits)
{

View file

@ -83,10 +83,8 @@ void CacheMemory::init(const vector<string> & argv)
}
}
assert(cache_size != -1);
m_cache_num_sets = (cache_size / m_cache_assoc) / RubySystem::getBlockSizeBytes();
assert(m_cache_num_sets > 1);
int num_lines = cache_size/RubySystem::getBlockSizeBytes();
m_cache_num_sets = num_lines / m_cache_assoc;
m_cache_num_set_bits = log_int(m_cache_num_sets);
assert(m_cache_num_set_bits > 0);
@ -165,13 +163,10 @@ int CacheMemory::findTagInSet(Index cacheSet, const Address& tag) const
{
assert(tag == line_address(tag));
// search the set for the tags
for (int i=0; i < m_cache_assoc; i++) {
if ((m_cache[cacheSet][i] != NULL) &&
(m_cache[cacheSet][i]->m_Address == tag) &&
(m_cache[cacheSet][i]->m_Permission != AccessPermission_NotPresent)) {
return i;
}
}
m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
if (it != m_tag_index.end())
if (m_cache[cacheSet][it->second]->m_Permission != AccessPermission_NotPresent)
return it->second;
return -1; // Not found
}
@ -181,10 +176,9 @@ int CacheMemory::findTagInSetIgnorePermissions(Index cacheSet, const Address& ta
{
assert(tag == line_address(tag));
// search the set for the tags
for (int i=0; i < m_cache_assoc; i++) {
if (m_cache[cacheSet][i] != NULL && m_cache[cacheSet][i]->m_Address == tag)
return i;
}
m5::hash_map<Address, int>::const_iterator it = m_tag_index.find(tag);
if (it != m_tag_index.end())
return it->second;
return -1; // Not found
}
@ -291,6 +285,7 @@ void CacheMemory::allocate(const Address& address, AbstractCacheEntry* entry)
m_cache[cacheSet][i]->m_Address = address;
m_cache[cacheSet][i]->m_Permission = AccessPermission_Invalid;
m_locked[cacheSet][i] = -1;
m_tag_index[address] = i;
m_replacementPolicy_ptr->touch(cacheSet, i, g_eventQueue_ptr->getTime());
@ -311,6 +306,7 @@ void CacheMemory::deallocate(const Address& address)
delete m_cache[cacheSet][location];
m_cache[cacheSet][location] = NULL;
m_locked[cacheSet][location] = -1;
m_tag_index.erase(address);
}
}

View file

@ -54,6 +54,7 @@
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/ruby/profiler/CacheProfiler.hh"
#include "mem/protocol/CacheMsg.hh"
#include "base/hashmap.hh"
#include <vector>
class CacheMemory {
@ -70,8 +71,6 @@ public:
// static CacheMemory* createCache(int level, int num, char split_type, AbstractCacheEntry* (*entry_factory)());
// static CacheMemory* getCache(int cache_id);
static int numberOfLastLevelCaches();
// Public Methods
void printConfig(ostream& out);
@ -106,6 +105,8 @@ public:
AccessPermission getPermission(const Address& address) const;
void changePermission(const Address& address, AccessPermission new_perm);
static int numberOfLastLevelCaches();
int getLatency() const { return m_latency; }
// Hook for checkpointing the contents of the cache
@ -158,6 +159,7 @@ private:
// The first index is the # of cache lines.
// The second index is the the amount associativity.
m5::hash_map<Address, int> m_tag_index;
Vector<Vector<AbstractCacheEntry*> > m_cache;
Vector<Vector<int> > m_locked;
@ -169,9 +171,11 @@ private:
int m_cache_num_set_bits;
int m_cache_assoc;
static Vector< CacheMemory* > m_all_caches;
static int m_num_last_level_caches;
static MachineType m_last_level_machine_type;
static Vector< CacheMemory* > m_all_caches;
};
#endif //CACHEMEMORY_H

View file

@ -25,6 +25,7 @@ public:
void init(const vector<string> & argv);
/* external interface */
int64_t makeRequest(const RubyRequest & request);
bool isReady(const RubyRequest & request, bool dont_set = false) { assert(0); return false;};
// void issueRequest(uint64_t paddr, uint8* data, int len, bool rw);
bool busy() { return m_is_busy;}

View file

@ -44,7 +44,7 @@
int DirectoryMemory::m_num_directories = 0;
int DirectoryMemory::m_num_directories_bits = 0;
int DirectoryMemory::m_total_size_bytes = 0;
uint64_t DirectoryMemory::m_total_size_bytes = 0;
DirectoryMemory::DirectoryMemory(const string & name)
: m_name(name)

View file

@ -91,7 +91,7 @@ private:
static int m_num_directories;
static int m_num_directories_bits;
static int m_total_size_bytes;
static uint64_t m_total_size_bytes;
MemoryVector* m_ram;
};

View file

@ -22,60 +22,104 @@ class MemoryVector {
uint8* read(const Address & paddr, uint8* data, int len);
private:
uint8* getBlockPtr(const Address & paddr);
uint8* getBlockPtr(const PhysAddress & addr);
uint32 m_size;
uint8* m_vec;
uint8** m_pages;
uint32 m_num_pages;
const uint32 m_page_offset_mask;
};
inline
MemoryVector::MemoryVector()
: m_page_offset_mask(4095)
{
m_size = 0;
m_vec = NULL;
m_num_pages = 0;
m_pages = NULL;
}
inline
MemoryVector::MemoryVector(uint32 size)
: m_page_offset_mask(4095)
{
m_size = size;
m_vec = new uint8[size];
setSize(size);
}
inline
MemoryVector::~MemoryVector()
{
delete [] m_vec;
for (int i=0; i<m_num_pages; i++) {
if (m_pages[i] != 0) {
delete [] m_pages[i];
}
}
delete [] m_pages;
}
inline
void MemoryVector::setSize(uint32 size)
{
if (m_pages != NULL){
for (int i=0; i<m_num_pages; i++) {
if (m_pages[i] != 0) {
delete [] m_pages[i];
}
}
delete [] m_pages;
}
m_size = size;
if (m_vec != NULL)
delete [] m_vec;
m_vec = new uint8[size];
assert(size%4096 == 0);
m_num_pages = size >> 12;
m_pages = new uint8*[m_num_pages];
memset(m_pages, 0, m_num_pages * sizeof(uint8*));
}
inline
void MemoryVector::write(const Address & paddr, uint8* data, int len)
{
assert(paddr.getAddress() + len <= m_size);
memcpy(m_vec + paddr.getAddress(), data, len);
uint32 page_num = paddr.getAddress() >> 12;
if (m_pages[page_num] == 0) {
bool all_zeros = true;
for (int i=0;i<len;i++) {
if (data[i] != 0) {
all_zeros = false;
break;
}
}
if (all_zeros) return;
m_pages[page_num] = new uint8[4096];
memset(m_pages[page_num], 0, 4096);
uint32 offset = paddr.getAddress() & m_page_offset_mask;
memcpy(&m_pages[page_num][offset], data, len);
} else {
memcpy(&m_pages[page_num][paddr.getAddress()&m_page_offset_mask], data, len);
}
}
inline
uint8* MemoryVector::read(const Address & paddr, uint8* data, int len)
{
assert(paddr.getAddress() + len <= m_size);
memcpy(data, m_vec + paddr.getAddress(), len);
uint32 page_num = paddr.getAddress() >> 12;
if (m_pages[page_num] == 0) {
memset(data, 0, len);
} else {
memcpy(data, &m_pages[page_num][paddr.getAddress()&m_page_offset_mask], len);
}
return data;
}
inline
uint8* MemoryVector::getBlockPtr(const Address & paddr)
uint8* MemoryVector::getBlockPtr(const PhysAddress & paddr)
{
return m_vec + paddr.getAddress();
uint32 page_num = paddr.getAddress() >> 12;
if (m_pages[page_num] == 0) {
m_pages[page_num] = new uint8[4096];
memset(m_pages[page_num], 0, 4096);
}
return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
}
#endif // MEMORYVECTOR_H

View file

@ -27,6 +27,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mem/ruby/libruby.hh"
#include "mem/ruby/common/Global.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "mem/ruby/system/System.hh"
@ -44,14 +45,14 @@
//Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q)
#define LLSC_FAIL -2
ostream& operator<<(std::ostream& out, const SequencerRequest& obj) {
out << obj.ruby_request << flush;
return out;
}
long int already = 0;
Sequencer::Sequencer(const string & name)
:RubyPort(name)
{
m_store_waiting_on_load_cycles = 0;
m_store_waiting_on_store_cycles = 0;
m_load_waiting_on_store_cycles = 0;
m_load_waiting_on_load_cycles = 0;
}
void Sequencer::init(const vector<string> & argv)
@ -65,8 +66,6 @@ void Sequencer::init(const vector<string> & argv)
m_instCache_ptr = NULL;
m_dataCache_ptr = NULL;
m_controller = NULL;
m_servicing_atomic = -1;
m_atomics_counter = 0;
for (size_t i=0; i<argv.size(); i+=2) {
if ( argv[i] == "controller") {
m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@ -110,8 +109,9 @@ void Sequencer::wakeup() {
SequencerRequest* request = m_readRequestTable.lookup(keys[i]);
if (current_time - request->issue_time >= m_deadlock_threshold) {
WARN_MSG("Possible Deadlock detected");
WARN_EXPR(request->ruby_request);
WARN_EXPR(request);
WARN_EXPR(m_version);
WARN_EXPR(request->ruby_request.paddr);
WARN_EXPR(keys.size());
WARN_EXPR(current_time);
WARN_EXPR(request->issue_time);
@ -125,7 +125,7 @@ void Sequencer::wakeup() {
SequencerRequest* request = m_writeRequestTable.lookup(keys[i]);
if (current_time - request->issue_time >= m_deadlock_threshold) {
WARN_MSG("Possible Deadlock detected");
WARN_EXPR(request->ruby_request);
WARN_EXPR(request);
WARN_EXPR(m_version);
WARN_EXPR(current_time);
WARN_EXPR(request->issue_time);
@ -145,6 +145,14 @@ void Sequencer::wakeup() {
}
}
void Sequencer::printStats(ostream & out) const {
out << "Sequencer: " << m_name << endl;
out << " store_waiting_on_load_cycles: " << m_store_waiting_on_load_cycles << endl;
out << " store_waiting_on_store_cycles: " << m_store_waiting_on_store_cycles << endl;
out << " load_waiting_on_load_cycles: " << m_load_waiting_on_load_cycles << endl;
out << " load_waiting_on_store_cycles: " << m_load_waiting_on_store_cycles << endl;
}
void Sequencer::printProgress(ostream& out) const{
/*
int total_demand = 0;
@ -267,6 +275,7 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
assert(m_writeRequestTable.exist(line_address(address)));
SequencerRequest* request = m_writeRequestTable.lookup(address);
removeRequest(request);
assert((request->ruby_request.type == RubyRequestType_ST) ||
@ -274,15 +283,15 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
(request->ruby_request.type == RubyRequestType_RMW_Write) ||
(request->ruby_request.type == RubyRequestType_Locked_Read) ||
(request->ruby_request.type == RubyRequestType_Locked_Write));
// POLINA: the assumption is that atomics are only on data cache and not instruction cache
if (request->ruby_request.type == RubyRequestType_Locked_Read) {
m_dataCache_ptr->setLocked(address, m_version);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
m_controller->set_atomic(address);
m_controller->blockOnQueue(address, m_mandatory_q_ptr);
}
else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
m_controller->clear_atomic();
m_controller->unblock(address);
}
hitCallback(request, data);
@ -354,47 +363,33 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) {
}
// Returns true if the sequencer already has a load or store outstanding
bool Sequencer::isReady(const RubyRequest& request) {
// POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready
// to simulate stalling of the front-end
// Do we stall all the sequencers? If it is atomic instruction - yes!
int Sequencer::isReady(const RubyRequest& request) {
bool is_outstanding_store = m_writeRequestTable.exist(line_address(Address(request.paddr)));
bool is_outstanding_load = m_readRequestTable.exist(line_address(Address(request.paddr)));
if ( is_outstanding_store ) {
if ((request.type == RubyRequestType_LD) ||
(request.type == RubyRequestType_IFETCH) ||
(request.type == RubyRequestType_RMW_Read)) {
m_store_waiting_on_load_cycles++;
} else {
m_store_waiting_on_store_cycles++;
}
return LIBRUBY_ALIASED_REQUEST;
} else if ( is_outstanding_load ) {
if ((request.type == RubyRequestType_ST) ||
(request.type == RubyRequestType_RMW_Write) ) {
m_load_waiting_on_store_cycles++;
} else {
m_load_waiting_on_load_cycles++;
}
return LIBRUBY_ALIASED_REQUEST;
}
if (m_outstanding_count >= m_max_outstanding_requests) {
return false;
return LIBRUBY_BUFFER_FULL;
}
if( m_writeRequestTable.exist(line_address(Address(request.paddr))) ||
m_readRequestTable.exist(line_address(Address(request.paddr))) ){
//cout << "OUTSTANDING REQUEST EXISTS " << p << " VER " << m_version << endl;
//printProgress(cout);
return false;
}
if (m_servicing_atomic != -1 && m_servicing_atomic != (int)request.proc_id) {
assert(m_atomics_counter > 0);
return false;
}
else {
if (request.type == RubyRequestType_RMW_Read) {
if (m_servicing_atomic == -1) {
assert(m_atomics_counter == 0);
m_servicing_atomic = (int)request.proc_id;
}
else {
assert(m_servicing_atomic == (int)request.proc_id);
}
m_atomics_counter++;
}
else if (request.type == RubyRequestType_RMW_Write) {
assert(m_servicing_atomic == (int)request.proc_id);
assert(m_atomics_counter > 0);
m_atomics_counter--;
if (m_atomics_counter == 0) {
m_servicing_atomic = -1;
}
}
}
return true;
return 1;
}
bool Sequencer::empty() const {
@ -405,11 +400,12 @@ bool Sequencer::empty() const {
int64_t Sequencer::makeRequest(const RubyRequest & request)
{
assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes());
if (isReady(request)) {
int ready = isReady(request);
if (ready > 0) {
int64_t id = makeUniqueRequestID();
SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime());
bool found = insertRequest(srequest);
if (!found)
if (!found) {
if (request.type == RubyRequestType_Locked_Write) {
// NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first
// ensuring that nothing comes between checking the flag and servicing the store
@ -420,16 +416,17 @@ int64_t Sequencer::makeRequest(const RubyRequest & request)
m_dataCache_ptr->clearLocked(line_address(Address(request.paddr)));
}
}
if (request.type == RubyRequestType_RMW_Write) {
m_controller->started_writes();
}
issueRequest(request);
// TODO: issue hardware prefetches here
return id;
}
else {
return -1;
assert(0);
return 0;
}
} else {
return ready;
}
}
@ -448,10 +445,8 @@ void Sequencer::issueRequest(const RubyRequest& request) {
ctype = CacheRequestType_ST;
break;
case RubyRequestType_Locked_Read:
ctype = CacheRequestType_ST;
break;
case RubyRequestType_Locked_Write:
ctype = CacheRequestType_ST;
ctype = CacheRequestType_ATOMIC;
break;
case RubyRequestType_RMW_Read:
ctype = CacheRequestType_ATOMIC;

View file

@ -86,10 +86,11 @@ public:
// called by Tester or Simics
int64_t makeRequest(const RubyRequest & request);
bool isReady(const RubyRequest& request);
int isReady(const RubyRequest& request);
bool empty() const;
void print(ostream& out) const;
void printStats(ostream & out) const;
void checkCoherence(const Address& address);
// bool getRubyMemoryValue(const Address& addr, char* value, unsigned int size_in_bytes);
@ -127,8 +128,11 @@ private:
// Global outstanding request count, across all request tables
int m_outstanding_count;
bool m_deadlock_check_scheduled;
int m_servicing_atomic;
int m_atomics_counter;
int m_store_waiting_on_load_cycles;
int m_store_waiting_on_store_cycles;
int m_load_waiting_on_store_cycles;
int m_load_waiting_on_load_cycles;
};
// Output operator declaration

View file

@ -335,6 +335,10 @@ void RubySystem::printStats(ostream& out)
m_profiler_ptr->printStats(out);
m_network_ptr->printStats(out);
for (map<string, Sequencer*>::const_iterator it = m_sequencers.begin();
it != m_sequencers.end(); it++) {
(*it).second->printStats(out);
}
for (map<string, CacheMemory*>::const_iterator it = m_caches.begin();
it != m_caches.end(); it++) {
(*it).second->printStats(out);

View file

@ -29,8 +29,8 @@ from slicc.ast.StatementAST import StatementAST
from slicc.symbols import Var
class PeekStatementAST(StatementAST):
def __init__(self, slicc, queue_name, type_ast, statements, method):
super(PeekStatementAST, self).__init__(slicc)
def __init__(self, slicc, queue_name, type_ast, pairs, statements, method):
super(PeekStatementAST, self).__init__(slicc, pairs)
self.queue_name = queue_name
self.type_ast = type_ast
@ -62,6 +62,17 @@ class PeekStatementAST(StatementAST):
const $mtid* in_msg_ptr;
in_msg_ptr = dynamic_cast<const $mtid *>(($qcode).${{self.method}}());
assert(in_msg_ptr != NULL);
''')
if self.pairs.has_key("block_on"):
address_field = self.pairs['block_on']
code('''
if ( (m_is_blocking == true) &&
(m_block_map.count(in_msg_ptr->m_$address_field) == 1) ) {
if (m_block_map[in_msg_ptr->m_$address_field] != &$qcode) {
$qcode.delayHead();
continue;
}
}
''')
# The other statements

View file

@ -514,8 +514,8 @@ class SLICC(Grammar):
p[0] = ast.EnqueueStatementAST(self, p[3], p[5], p[6], p[8])
def p_statement__peek(self, p):
"statement : PEEK '(' var ',' type ')' statements"
p[0] = ast.PeekStatementAST(self, p[3], p[5], p[7], "peek")
"statement : PEEK '(' var ',' type pairs ')' statements"
p[0] = ast.PeekStatementAST(self, p[3], p[5], p[6], p[8], "peek")
def p_statement__copy_head(self, p):
"statement : COPY_HEAD '(' var ',' var pairs ')' SEMI"

View file

@ -185,11 +185,10 @@ public:
void print(ostream& out) const;
void printConfig(ostream& out) const;
void wakeup();
void set_atomic(Address addr);
void started_writes();
void clear_atomic();
void printStats(ostream& out) const { s_profiler.dumpStats(out); }
void clearStats() { s_profiler.clearStats(); }
void blockOnQueue(Address addr, MessageBuffer* port);
void unblock(Address addr);
private:
''')
@ -198,17 +197,6 @@ private:
for param in self.config_parameters:
code('int m_${{param.ident}};')
if self.ident == "L1Cache":
code('''
int servicing_atomic;
bool started_receiving_writes;
Address locked_read_request1;
Address locked_read_request2;
Address locked_read_request3;
Address locked_read_request4;
int read_counter;
''')
code('''
int m_number_of_TBEs;
@ -222,6 +210,8 @@ map< string, string > m_cfg;
NodeID m_version;
Network* m_net_ptr;
MachineID m_machineID;
bool m_is_blocking;
map< Address, MessageBuffer* > m_block_map;
${ident}_Profiler s_profiler;
static int m_num_controllers;
// Internal functions
@ -298,16 +288,6 @@ $c_ident::$c_ident(const string &name)
{
''')
code.indent()
if self.ident == "L1Cache":
code('''
servicing_atomic = 0;
started_receiving_writes = false;
locked_read_request1 = Address(-1);
locked_read_request2 = Address(-1);
locked_read_request3 = Address(-1);
locked_read_request4 = Address(-1);
read_counter = 0;
''')
code('m_num_controllers++;')
for var in self.objects:
@ -517,6 +497,17 @@ const MachineType $c_ident::getMachineType() const{
return MachineType_${ident};
}
void $c_ident::blockOnQueue(Address addr, MessageBuffer* port) {
m_is_blocking = true;
m_block_map[addr] = port;
}
void $c_ident::unblock(Address addr) {
m_block_map.erase(addr);
if (m_block_map.size() == 0) {
m_is_blocking = false;
}
}
void $c_ident::print(ostream& out) const { out << "[$c_ident " << m_version << "]"; }
void $c_ident::printConfig(ostream& out) const {
@ -582,144 +573,12 @@ void ${ident}_Controller::wakeup()
# InPorts
#
# Find the position of the mandatory queue in the vector so
# that we can print it out first
mandatory_q = None
if self.ident == "L1Cache":
for i,port in enumerate(self.in_ports):
assert "c_code_in_port" in port
if str(port).find("mandatoryQueue_in") >= 0:
assert mandatory_q is None
mandatory_q = port
assert mandatory_q is not None
# print out the mandatory queue here
port = mandatory_q
code('// ${ident}InPort $port')
output = port["c_code_in_port"]
pos = output.find("TransitionResult result = doTransition((L1Cache_mandatory_request_type_to_event(((*in_msg_ptr)).m_Type)), L1Cache_getState(addr), addr);")
assert pos >= 0
atomics_string = '''
if ((((*in_msg_ptr)).m_Type) == CacheRequestType_ATOMIC) {
if (servicing_atomic == 0) {
if (locked_read_request1 == Address(-1)) {
assert(read_counter == 0);
locked_read_request1 = addr;
assert(read_counter == 0);
read_counter++;
}
else if (addr == locked_read_request1) {
; // do nothing
}
else {
assert(0); // should never be here if servicing one request at a time
}
}
else if (!started_receiving_writes) {
if (servicing_atomic == 1) {
if (locked_read_request2 == Address(-1)) {
assert(locked_read_request1 != Address(-1));
assert(read_counter == 1);
locked_read_request2 = addr;
assert(read_counter == 1);
read_counter++;
}
else if (addr == locked_read_request2) {
; // do nothing
}
else {
assert(0); // should never be here if servicing one request at a time
}
}
else if (servicing_atomic == 2) {
if (locked_read_request3 == Address(-1)) {
assert(locked_read_request1 != Address(-1));
assert(locked_read_request2 != Address(-1));
assert(read_counter == 1);
locked_read_request3 = addr;
assert(read_counter == 2);
read_counter++;
}
else if (addr == locked_read_request3) {
; // do nothing
}
else {
assert(0); // should never be here if servicing one request at a time
}
}
else if (servicing_atomic == 3) {
if (locked_read_request4 == Address(-1)) {
assert(locked_read_request1 != Address(-1));
assert(locked_read_request2 != Address(-1));
assert(locked_read_request3 != Address(-1));
assert(read_counter == 1);
locked_read_request4 = addr;
assert(read_counter == 3);
read_counter++;
}
else if (addr == locked_read_request4) {
; // do nothing
}
else {
assert(0); // should never be here if servicing one request at a time
}
}
else {
assert(0);
}
}
}
else {
if (servicing_atomic > 0) {
// reset
servicing_atomic = 0;
read_counter = 0;
started_receiving_writes = false;
locked_read_request1 = Address(-1);
locked_read_request2 = Address(-1);
locked_read_request3 = Address(-1);
locked_read_request4 = Address(-1);
}
}
'''
output = output[:pos] + atomics_string + output[pos:]
code('$output')
for port in self.in_ports:
# don't print out mandatory queue twice
if port == mandatory_q:
continue
if ident == "L1Cache":
if str(port).find("forwardRequestNetwork_in") >= 0:
code('''
bool postpone = false;
if ((((*m_L1Cache_forwardToCache_ptr)).isReady())) {
const RequestMsg* in_msg_ptr;
in_msg_ptr = dynamic_cast<const RequestMsg*>(((*m_L1Cache_forwardToCache_ptr)).peek());
if ((((servicing_atomic == 1) && (locked_read_request1 == ((*in_msg_ptr)).m_Address)) ||
((servicing_atomic == 2) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address)) ||
((servicing_atomic == 3) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address)) ||
((servicing_atomic == 4) && (locked_read_request1 == ((*in_msg_ptr)).m_Address || locked_read_request2 == ((*in_msg_ptr)).m_Address || locked_read_request3 == ((*in_msg_ptr)).m_Address || locked_read_request1 == ((*in_msg_ptr)).m_Address)))) {
postpone = true;
}
}
if (!postpone) {
''')
code.indent()
code('// ${ident}InPort $port')
code('${{port["c_code_in_port"]}}')
code.dedent()
if ident == "L1Cache":
if str(port).find("forwardRequestNetwork_in") >= 0:
code.dedent()
code('}')
code.indent()
code('')
code.dedent()
@ -730,52 +589,6 @@ if (!postpone) {
}
''')
if self.ident == "L1Cache":
code('''
void ${ident}_Controller::set_atomic(Address addr)
{
servicing_atomic++;
}
void ${ident}_Controller::started_writes()
{
started_receiving_writes = true;
}
void ${ident}_Controller::clear_atomic()
{
assert(servicing_atomic > 0);
read_counter--;
servicing_atomic--;
if (read_counter == 0) {
servicing_atomic = 0;
started_receiving_writes = false;
locked_read_request1 = Address(-1);
locked_read_request2 = Address(-1);
locked_read_request3 = Address(-1);
locked_read_request4 = Address(-1);
}
}
''')
else:
code('''
void ${ident}_Controller::started_writes()
{
assert(0);
}
void ${ident}_Controller::set_atomic(Address addr)
{
assert(0);
}
void ${ident}_Controller::clear_atomic()
{
assert(0);
}
''')
code.write(path, "%s_Wakeup.cc" % self.ident)
def printCSwitch(self, path):

View file

@ -430,8 +430,10 @@ enum ${{self.c_ident}} {
# For each field
for i,(ident,enum) in enumerate(self.enums.iteritems()):
desc = enum.get("desc", "No description avaliable")
init = ' = %s_FIRST' % self.c_ident if i == 0 else ''
if i == 0:
init = ' = %s_FIRST' % self.c_ident
else:
init = ''
code('${{self.c_ident}}_${{enum.ident}}$init, /**< $desc */')
code.dedent()
code('''

View file

@ -35,10 +35,7 @@ nb_cores = 8
cpus = [ MemTest() for i in xrange(nb_cores) ]
import ruby_config
ruby_memory = ruby_config.generate("MI_example-homogeneous.rb",
cores = nb_cores,
cache_size = 256,
cache_assoc = 2)
ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
# system simulated
system = System(cpu = cpus, funcmem = PhysicalMemory(),

View file

@ -8,10 +8,11 @@ from m5.params import *
def generate(config_file, cores=1, memories=1, memory_size=1024, \
cache_size=32768, cache_assoc=8, dmas=1,
ruby_tick='1t', ports_per_cpu=2):
ruby_tick='1t', ports_per_cpu=2, protocol='MOESI_CMP_directory'):
default = joinpath(dirname(__file__), '../../src/mem/ruby/config')
ruby_config = os.environ.get('RUBY_CONFIG', default)
args = [ "ruby", "-I", ruby_config, joinpath(ruby_config, "print_cfg.rb"),
"-c", str(protocol),
"-r", joinpath(ruby_config, config_file), "-p", str(cores),
"-m", str(memories), "-s", str(memory_size), "-C", str(cache_size),
"-A", str(cache_assoc), "-D", str(dmas)]

File diff suppressed because it is too large Load diff

View file

@ -1,76 +1,74 @@
["-r", "tests/configs/../../src/mem/ruby/config/MI_example-homogeneous.rb", "-p", "8", "-m", "1", "-s", "1024", "-C", "256", "-A", "2", "-D", "1"]
print config: 1
system.cpu7: completed 10000 read accesses @7023642
system.cpu5: completed 10000 read accesses @7028438
system.cpu3: completed 10000 read accesses @7034626
system.cpu1: completed 10000 read accesses @7035790
system.cpu2: completed 10000 read accesses @7062558
system.cpu6: completed 10000 read accesses @7078882
system.cpu0: completed 10000 read accesses @7080455
system.cpu4: completed 10000 read accesses @7095500
system.cpu1: completed 20000 read accesses @12915324
system.cpu3: completed 20000 read accesses @12958052
system.cpu5: completed 20000 read accesses @12993554
system.cpu2: completed 20000 read accesses @13010879
system.cpu4: completed 20000 read accesses @13014760
system.cpu6: completed 20000 read accesses @13031684
system.cpu7: completed 20000 read accesses @13051162
system.cpu0: completed 20000 read accesses @13128234
system.cpu3: completed 30000 read accesses @18784435
system.cpu1: completed 30000 read accesses @18859194
system.cpu5: completed 30000 read accesses @18903265
system.cpu7: completed 30000 read accesses @18952860
system.cpu4: completed 30000 read accesses @18981745
system.cpu6: completed 30000 read accesses @18987772
system.cpu0: completed 30000 read accesses @18993365
system.cpu2: completed 30000 read accesses @18994061
system.cpu3: completed 40000 read accesses @24748372
system.cpu2: completed 40000 read accesses @24758090
system.cpu1: completed 40000 read accesses @24768884
system.cpu7: completed 40000 read accesses @24891866
system.cpu0: completed 40000 read accesses @24907680
system.cpu6: completed 40000 read accesses @24933908
system.cpu5: completed 40000 read accesses @24949374
system.cpu4: completed 40000 read accesses @24963853
system.cpu3: completed 50000 read accesses @30655893
system.cpu2: completed 50000 read accesses @30705287
system.cpu1: completed 50000 read accesses @30752130
system.cpu0: completed 50000 read accesses @30795942
system.cpu5: completed 50000 read accesses @30809328
system.cpu7: completed 50000 read accesses @30857254
system.cpu6: completed 50000 read accesses @30935432
system.cpu4: completed 50000 read accesses @30960853
system.cpu3: completed 60000 read accesses @36647735
system.cpu2: completed 60000 read accesses @36648110
system.cpu1: completed 60000 read accesses @36690971
system.cpu7: completed 60000 read accesses @36746000
system.cpu5: completed 60000 read accesses @36746430
system.cpu0: completed 60000 read accesses @36840602
system.cpu6: completed 60000 read accesses @36900332
system.cpu4: completed 60000 read accesses @36954562
system.cpu2: completed 70000 read accesses @42614948
system.cpu1: completed 70000 read accesses @42616200
system.cpu5: completed 70000 read accesses @42679549
system.cpu7: completed 70000 read accesses @42707038
system.cpu3: completed 70000 read accesses @42725206
system.cpu0: completed 70000 read accesses @42774272
system.cpu6: completed 70000 read accesses @42850956
system.cpu4: completed 70000 read accesses @42872700
system.cpu5: completed 80000 read accesses @48577066
system.cpu7: completed 80000 read accesses @48608169
system.cpu2: completed 80000 read accesses @48616581
system.cpu1: completed 80000 read accesses @48637808
system.cpu0: completed 80000 read accesses @48726360
system.cpu3: completed 80000 read accesses @48754087
system.cpu4: completed 80000 read accesses @48848416
system.cpu6: completed 80000 read accesses @48849321
system.cpu5: completed 90000 read accesses @54536042
system.cpu0: completed 90000 read accesses @54536954
system.cpu7: completed 90000 read accesses @54554538
system.cpu1: completed 90000 read accesses @54575168
system.cpu2: completed 90000 read accesses @54648034
system.cpu3: completed 90000 read accesses @54719200
system.cpu6: completed 90000 read accesses @54807510
system.cpu4: completed 90000 read accesses @54840954
system.cpu1: completed 100000 read accesses @60455258
["-c", "MOESI_CMP_directory", "-r", "tests/configs/../../src/mem/ruby/config/TwoLevel_SplitL1UnifiedL2.rb", "-p", "8", "-m", "1", "-s", "1024", "-C", "32768", "-A", "8", "-D", "1"]
system.cpu0: completed 10000 read accesses @449430
system.cpu4: completed 10000 read accesses @459465
system.cpu7: completed 10000 read accesses @472231
system.cpu1: completed 10000 read accesses @477652
system.cpu2: completed 10000 read accesses @477942
system.cpu5: completed 10000 read accesses @490692
system.cpu6: completed 10000 read accesses @490860
system.cpu3: completed 10000 read accesses @498830
system.cpu0: completed 20000 read accesses @902454
system.cpu4: completed 20000 read accesses @921903
system.cpu7: completed 20000 read accesses @943132
system.cpu2: completed 20000 read accesses @963224
system.cpu1: completed 20000 read accesses @974292
system.cpu5: completed 20000 read accesses @979817
system.cpu6: completed 20000 read accesses @985156
system.cpu3: completed 20000 read accesses @1004197
system.cpu0: completed 30000 read accesses @1354388
system.cpu4: completed 30000 read accesses @1389321
system.cpu7: completed 30000 read accesses @1410714
system.cpu2: completed 30000 read accesses @1450104
system.cpu5: completed 30000 read accesses @1465068
system.cpu1: completed 30000 read accesses @1470940
system.cpu6: completed 30000 read accesses @1477854
system.cpu3: completed 30000 read accesses @1508078
system.cpu0: completed 40000 read accesses @1811799
system.cpu4: completed 40000 read accesses @1844299
system.cpu7: completed 40000 read accesses @1889814
system.cpu2: completed 40000 read accesses @1927073
system.cpu1: completed 40000 read accesses @1953874
system.cpu5: completed 40000 read accesses @1957168
system.cpu6: completed 40000 read accesses @1970748
system.cpu3: completed 40000 read accesses @2016002
system.cpu0: completed 50000 read accesses @2262162
system.cpu4: completed 50000 read accesses @2303172
system.cpu7: completed 50000 read accesses @2354840
system.cpu2: completed 50000 read accesses @2408362
system.cpu1: completed 50000 read accesses @2441228
system.cpu5: completed 50000 read accesses @2451414
system.cpu6: completed 50000 read accesses @2467657
system.cpu3: completed 50000 read accesses @2528058
system.cpu0: completed 60000 read accesses @2711396
system.cpu4: completed 60000 read accesses @2767668
system.cpu7: completed 60000 read accesses @2817212
system.cpu2: completed 60000 read accesses @2897042
system.cpu1: completed 60000 read accesses @2926178
system.cpu5: completed 60000 read accesses @2935676
system.cpu6: completed 60000 read accesses @2963110
system.cpu3: completed 60000 read accesses @3030360
system.cpu0: completed 70000 read accesses @3162444
system.cpu4: completed 70000 read accesses @3222154
system.cpu7: completed 70000 read accesses @3277574
system.cpu2: completed 70000 read accesses @3381865
system.cpu1: completed 70000 read accesses @3415612
system.cpu5: completed 70000 read accesses @3416504
system.cpu6: completed 70000 read accesses @3460152
system.cpu3: completed 70000 read accesses @3529552
system.cpu0: completed 80000 read accesses @3611998
system.cpu4: completed 80000 read accesses @3676108
system.cpu7: completed 80000 read accesses @3736694
system.cpu2: completed 80000 read accesses @3853296
system.cpu5: completed 80000 read accesses @3905416
system.cpu1: completed 80000 read accesses @3907045
system.cpu6: completed 80000 read accesses @3947118
system.cpu3: completed 80000 read accesses @4038186
system.cpu0: completed 90000 read accesses @4055478
system.cpu4: completed 90000 read accesses @4135882
system.cpu7: completed 90000 read accesses @4192986
system.cpu2: completed 90000 read accesses @4335194
system.cpu5: completed 90000 read accesses @4388557
system.cpu1: completed 90000 read accesses @4398614
system.cpu6: completed 90000 read accesses @4430678
system.cpu0: completed 100000 read accesses @4504972
hack: be nice to actually delete the event here

View file

@ -1,5 +1,3 @@
Redirecting stdout to build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby/simout
Redirecting stderr to build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby/simerr
M5 Simulator System
Copyright (c) 2001-2008
@ -7,11 +5,11 @@ The Regents of The University of Michigan
All Rights Reserved
M5 compiled Nov 18 2009 16:36:52
M5 revision c1d634e76817 6798 default qtip tip brad/ruby_memtest_refresh
M5 started Nov 18 2009 16:37:05
M5 executing on cabr0354
M5 compiled Jan 19 2010 17:11:57
M5 revision 21fbf0412e0d 6840 default tip
M5 started Jan 19 2010 17:12:00
M5 executing on bluedevil.cs.wisc.edu
command line: build/ALPHA_SE/m5.fast -d build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby -re tests/run.py build/ALPHA_SE/tests/fast/quick/50.memtest/alpha/linux/memtest-ruby
Global frequency set at 1000000000000 ticks per second
info: Entering event queue @ 0. Starting simulation...
Exiting @ tick 60455258 because maximum number of loads reached
Exiting @ tick 4504972 because maximum number of loads reached

View file

@ -1,34 +1,34 @@
---------- Begin Simulation Statistics ----------
host_mem_usage 2438776 # Number of bytes of host memory used
host_seconds 3924.24 # Real time elapsed on the host
host_tick_rate 15406 # Simulator tick rate (ticks/s)
host_mem_usage 1414304 # Number of bytes of host memory used
host_seconds 219.48 # Real time elapsed on the host
host_tick_rate 20525 # Simulator tick rate (ticks/s)
sim_freq 1000000000000 # Frequency of simulated ticks
sim_seconds 0.000060 # Number of seconds simulated
sim_ticks 60455258 # Number of ticks simulated
sim_seconds 0.000005 # Number of seconds simulated
sim_ticks 4504972 # Number of ticks simulated
system.cpu0.num_copies 0 # number of copy accesses completed
system.cpu0.num_reads 99982 # number of read accesses completed
system.cpu0.num_writes 53168 # number of write accesses completed
system.cpu0.num_reads 100000 # number of read accesses completed
system.cpu0.num_writes 54115 # number of write accesses completed
system.cpu1.num_copies 0 # number of copy accesses completed
system.cpu1.num_reads 100000 # number of read accesses completed
system.cpu1.num_writes 53657 # number of write accesses completed
system.cpu1.num_reads 92132 # number of read accesses completed
system.cpu1.num_writes 49991 # number of write accesses completed
system.cpu2.num_copies 0 # number of copy accesses completed
system.cpu2.num_reads 99758 # number of read accesses completed
system.cpu2.num_writes 53630 # number of write accesses completed
system.cpu2.num_reads 93521 # number of read accesses completed
system.cpu2.num_writes 50418 # number of write accesses completed
system.cpu3.num_copies 0 # number of copy accesses completed
system.cpu3.num_reads 99707 # number of read accesses completed
system.cpu3.num_writes 53628 # number of write accesses completed
system.cpu3.num_reads 89205 # number of read accesses completed
system.cpu3.num_writes 48106 # number of write accesses completed
system.cpu4.num_copies 0 # number of copy accesses completed
system.cpu4.num_reads 99425 # number of read accesses completed
system.cpu4.num_writes 53969 # number of write accesses completed
system.cpu4.num_reads 97961 # number of read accesses completed
system.cpu4.num_writes 52598 # number of write accesses completed
system.cpu5.num_copies 0 # number of copy accesses completed
system.cpu5.num_reads 99810 # number of read accesses completed
system.cpu5.num_writes 53444 # number of write accesses completed
system.cpu5.num_reads 92452 # number of read accesses completed
system.cpu5.num_writes 49744 # number of write accesses completed
system.cpu6.num_copies 0 # number of copy accesses completed
system.cpu6.num_reads 99532 # number of read accesses completed
system.cpu6.num_writes 53907 # number of write accesses completed
system.cpu6.num_reads 91570 # number of read accesses completed
system.cpu6.num_writes 49935 # number of write accesses completed
system.cpu7.num_copies 0 # number of copy accesses completed
system.cpu7.num_reads 99819 # number of read accesses completed
system.cpu7.num_writes 53668 # number of write accesses completed
system.cpu7.num_reads 96862 # number of read accesses completed
system.cpu7.num_writes 51935 # number of write accesses completed
---------- End Simulation Statistics ----------

View file

@ -65,23 +65,23 @@ def whitespace_file(filename):
if filename.startswith("SCons"):
return True
return False
return True
format_types = ( 'C', 'C++' )
def format_file(filename):
if file_type(filename) in format_types:
return True
return False
return True
def checkwhite_line(line):
match = lead.search(line)
if match and match.group(1).find('\t') != -1:
return False
return True
match = trail.search(line)
if match:
return False
return True
return True