ruby: removed obsolete configuration files
This commit is contained in:
parent
909bac6840
commit
22d9a53080
3 changed files with 0 additions and 691 deletions
|
@ -1,236 +0,0 @@
|
|||
|
||||
// FOR MOESI_CMP_token
|
||||
//PARAM_BOOL( FilteringEnabled, false, false );
|
||||
//PARAM_BOOL( DistributedPersistentEnabled, true, false );
|
||||
//PARAM_BOOL( DynamicTimeoutEnabled, true, false );
|
||||
//PARAM( RetryThreshold, 1, false );
|
||||
//PARAM( FixedTimeoutLatency, 300, false );
|
||||
|
||||
//PARAM( TraceWarmupLength, 1000000, false );
|
||||
|
||||
//PARAM( callback_counter, 0, false );
|
||||
//PARAM( NUM_COMPLETIONS_BEFORE_PASS, 0, false );
|
||||
|
||||
//PARAM( tester_length, 0, false );
|
||||
//PARAM( synthetic_locks, 2048, false );
|
||||
//PARAM( think_time, 5, false );
|
||||
//PARAM( wait_time, 5, false );
|
||||
//PARAM( hold_time, 5, false );
|
||||
//PARAM( deterministic_addrs, 1, false );
|
||||
//PARAM_STRING( SpecifiedGenerator, "DetermInvGenerator", false );
|
||||
|
||||
// For debugging purposes, one can enable a trace of all the protocol
|
||||
// state machine changes. Unfortunately, the code to generate the
|
||||
// trace is protocol specific. To enable the code for some of the
|
||||
// standard protocols,
|
||||
// 1. change "PROTOCOL_DEBUG_TRACE = true"
|
||||
// 2. enable debug in Makefile
|
||||
// 3. use the "--start 1" command line parameter or
|
||||
// "g_debug_ptr->setDebugTime(1)" to beging the following to set the
|
||||
// debug begin time
|
||||
//
|
||||
// this use to be ruby/common/Global.hh
|
||||
|
||||
//PARAM_BOOL( ProtocolDebugTrace, true, false );
|
||||
// a string for filtering debugging output (for all g_debug vars see Debug.hh)
|
||||
//PARAM_STRING( DEBUG_FILTER_STRING, "", false );
|
||||
// filters debugging messages based on priority (low, med, high)
|
||||
//PARAM_STRING( DEBUG_VERBOSITY_STRING, "", false );
|
||||
// filters debugging messages based on a ruby time
|
||||
//PARAM_ULONG( DEBUG_START_TIME, 0, false );
|
||||
// sends debugging messages to a output filename
|
||||
//PARAM_STRING( DEBUG_OUTPUT_FILENAME, "", false );
|
||||
|
||||
//PARAM_BOOL( ProfileHotLines, false, false );
|
||||
|
||||
// PROFILE_ALL_INSTRUCTIONS is used if you want Ruby to profile all instructions executed
|
||||
// The following need to be true for this to work correctly:
|
||||
// 1. Disable istc and dstc for this simulation run
|
||||
// 2. Add the following line to the object "sim" in the checkpoint you run from:
|
||||
// instruction_profile_line_size: 4
|
||||
// This is used to have simics report back all instruction requests
|
||||
|
||||
// For more details on how to find out how to interpret the output physical instruction
|
||||
// address, please read the document in the simics-howto directory
|
||||
//PARAM_BOOL( ProfileAllInstructions, false, false );
|
||||
|
||||
// Set the following variable to true if you want a complete trace of
|
||||
// PCs (physical address of program counters, with executing processor IDs)
|
||||
// to be printed to stdout. Make sure to direct the simics output to a file.
|
||||
// Otherwise, the run will take a really long time!
|
||||
// A long run may write a file that can exceed the OS limit on file length
|
||||
//PARAM_BOOL( PRINT_INSTRUCTION_TRACE, false, false );
|
||||
//PARAM( DEBUG_CYCLE, 0, false );
|
||||
|
||||
// Make the entire memory system perfect
|
||||
//PARAM_BOOL( PERFECT_MEMORY_SYSTEM, false, false );
|
||||
//PARAM( PERFECT_MEMORY_SYSTEM_LATENCY, 0, false );
|
||||
|
||||
// *********************************************
|
||||
// SYSTEM PARAMETERS
|
||||
// *********************************************
|
||||
|
||||
//PARAM( NumberOfChips, 1, false );
|
||||
//PARAM( NumberOfCores, 2, false );
|
||||
//PARAM_ARRAY( NumberOfCoresPerChip, int, m_NumberOfChips, 2, false);
|
||||
|
||||
// *********************************************
|
||||
// CACHE PARAMETERS
|
||||
// *********************************************
|
||||
|
||||
//PARAM( NumberOfCaches, m_NumberOfCores, false );
|
||||
//PARAM( NumberOfCacheLevels, 1, false );
|
||||
/* this returns the number of discrete CacheMemories per level (i.e. a split L1 counts for 2) */
|
||||
//PARAM_ARRAY( NumberOfCachesPerLevel, int, m_NumberOfCacheLevels, m_NumberOfCores, false ); // this is the number of discrete caches if the level is private
|
||||
// or the number of banks if the level is shared
|
||||
//PARAM( CacheIDFromParams, 1, true ); // returns a unique CacheID from the parameters (level, num, split_type)
|
||||
//PARAM_ARRAY( CacheLatency, int, m_NumberOfCaches, 1, false ); // returns the latency for cache, indexed by CacheID
|
||||
//PARAM_ARRAY( CacheSplitType, string, m_NumberOfCaches, "unified", false ); // returns "data", "instruction", or "unified", indexed by CacheID
|
||||
//PARAM_ARRAY( CacheType, string, m_NumberOfCaches, "SetAssociative", false ); // returns the type of a cache, indexed by CacheID
|
||||
//PARAM_ARRAY( CacheAssoc, int, m_NumberOfCaches, 4, false ); // returns the cache associativity, indexed by CacheID
|
||||
//PARAM_ARRAY( NumberOfCacheSets, int, m_NumberOfCaches, 256, false ); // returns the number of cache sets, indexed by CacheID
|
||||
//PARAM_ARRAY( NumberOfCacheSetBits, int, m_NumberOfCaches, log_int(256), false ); // returns the number of cache set bits, indexed by CacheID
|
||||
//PARAM_ARRAY( CacheReplacementPolicy, string, m_NumberOfCaches, "PSEUDO_LRU", false ); // other option is "LRU"
|
||||
|
||||
//PARAM( DataBlockBytes, 64, false );
|
||||
//PARAM( DataBlockBits, log_int(m_DataBlockBytes), false);
|
||||
|
||||
// ********************************************
|
||||
// MEMORY PARAMETERS
|
||||
// ********************************************
|
||||
|
||||
//PARAM_ARRAY( NumberOfControllersPerType, int, m_NumberOfCacheLevels+2, m_NumberOfCores, false);
|
||||
//PARAM_ARRAY2D( NumberOfControllersPerTypePerChip, int, m_NumberOfCacheLevels+2, m_NumberOfChips, m_NumberOfCores, false);
|
||||
|
||||
// ********************************************
|
||||
// DMA CONTROLLER PARAMETERS
|
||||
// ********************************************
|
||||
|
||||
//PARAM( NumberOfDMA, 1, false );
|
||||
//PARAM_ARRAY( NumberOfDMAPerChip, int, m_NumberOfChips, 1, false);
|
||||
//PARAM_ARRAY( ChipNumFromDMAVersion, int, m_NumberOfDMA, 0, false );
|
||||
|
||||
//PARAM_ULONG( MemorySizeBytes, 4294967296, false );
|
||||
//PARAM_ULONG( MemorySizeBits, 32, false);
|
||||
|
||||
//PARAM( NUM_PROCESSORS, 0, false );
|
||||
//PARAM( NUM_L2_BANKS, 0, false );
|
||||
//PARAM( NUM_MEMORIES, 0, false );
|
||||
//PARAM( ProcsPerChip, 1, false );
|
||||
|
||||
// The following group of parameters are calculated. They must
|
||||
// _always_ be left at zero.
|
||||
//PARAM( NUM_CHIPS, 0, false );
|
||||
//PARAM( NUM_CHIP_BITS, 0, false );
|
||||
//PARAM( MEMORY_SIZE_BITS, 0, false );
|
||||
//PARAM( DATA_BLOCK_BITS, 0, false );
|
||||
//PARAM( PAGE_SIZE_BITS, 0, false );
|
||||
//PARAM( NUM_PROCESSORS_BITS, 0, false );
|
||||
//PARAM( PROCS_PER_CHIP_BITS, 0, false );
|
||||
//PARAM( NUM_L2_BANKS_BITS, 0, false );
|
||||
//PARAM( NUM_L2_BANKS_PER_CHIP_BITS, 0, false );
|
||||
//PARAM( NUM_L2_BANKS_PER_CHIP, 0, false );
|
||||
//PARAM( NUM_MEMORIES_BITS, 0, false );
|
||||
//PARAM( NUM_MEMORIES_PER_CHIP, 0, false );
|
||||
//PARAM( MEMORY_MODULE_BITS, 0, false );
|
||||
//PARAM_ULONG( MEMORY_MODULE_BLOCKS, 0, false );
|
||||
|
||||
// TIMING PARAMETERS
|
||||
//PARAM( DIRECTORY_CACHE_LATENCY, 6, false );
|
||||
|
||||
//PARAM( NULL_LATENCY, 1, false );
|
||||
//PARAM( ISSUE_LATENCY, 2, false );
|
||||
//PARAM( CACHE_RESPONSE_LATENCY, 12, false );
|
||||
//PARAM( L2_RESPONSE_LATENCY, 6, false );
|
||||
//PARAM( L2_TAG_LATENCY, 6, false );
|
||||
//PARAM( L1_RESPONSE_LATENCY, 3, false );
|
||||
|
||||
//PARAM( MEMORY_RESPONSE_LATENCY_MINUS_2, 158, false );
|
||||
//PARAM( DirectoryLatency, 6, false );
|
||||
|
||||
//PARAM( NetworkLinkLatency, 1, false );
|
||||
//PARAM( COPY_HEAD_LATENCY, 4, false );
|
||||
//PARAM( OnChipLinkLatency, 1, false );
|
||||
//PARAM( RecycleLatency, 10, false );
|
||||
//PARAM( L2_RECYCLE_LATENCY, 5, false );
|
||||
//PARAM( TIMER_LATENCY, 10000, false );
|
||||
//PARAM( TBE_RESPONSE_LATENCY, 1, false );
|
||||
//PARAM_BOOL( PERIODIC_TIMER_WAKEUPS, true, false );
|
||||
|
||||
// constants used by CMP protocols
|
||||
//PARAM( L1_REQUEST_LATENCY, 2, false );
|
||||
//PARAM( L2_REQUEST_LATENCY, 4, false );
|
||||
//PARAM_BOOL( SINGLE_ACCESS_L2_BANKS, true, false ); // hack to simulate multi-cycle L2 bank accesses
|
||||
|
||||
// Ruby cycles between when a sequencer issues a miss it arrives at
|
||||
// the L1 cache controller
|
||||
//PARAM( SequencerToControllerLatency, 4, false );
|
||||
|
||||
// Number of transitions each controller state machines can complete per cycle
|
||||
//PARAM( L1CacheTransitionsPerCycle, 32, false );
|
||||
//PARAM( L2CACHE_TRANSITIONS_PER_RUBY_CYCLE, 32, false );
|
||||
//PARAM( DirectoryTransitionsPerCycle, 32, false );
|
||||
//PARAM( DMATransitionsPerCycle, 1, false );
|
||||
|
||||
// Number of TBEs available for demand misses, prefetches, and replacements
|
||||
//PARAM( NumberOfTBEs, 128, false );
|
||||
//PARAM( NumberOfL1TBEs, 32, false );
|
||||
//PARAM( NumberOfL2TBEs, 32, false );
|
||||
|
||||
// NOTE: Finite buffering allows us to simulate a wormhole routed network
|
||||
// with idealized flow control. All message buffers within the network (i.e.
|
||||
// the switch's input and output buffers) are set to the size specified below
|
||||
// by the PROTOCOL_BUFFER_SIZE
|
||||
//PARAM_BOOL( FiniteBuffering, false, false );
|
||||
//PARAM( FiniteBufferSize, 3, false ); // Zero is unbounded buffers
|
||||
// Number of requests buffered between the sequencer and the L1 conroller
|
||||
// This can be more accurately simulated in Opal, therefore it's set to an
|
||||
// infinite number
|
||||
// Only effects the simualtion when FINITE_BUFFERING is enabled
|
||||
//PARAM( ProcessorBufferSize, 10, false );
|
||||
// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
|
||||
// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
|
||||
//PARAM( ProtocolBufferSize, 32, false );
|
||||
|
||||
// NETWORK PARAMETERS
|
||||
|
||||
// Network Topology: See TopologyType in external.sm for valid values
|
||||
//PARAM_STRING( NetworkTopology, "PT_TO_PT", false );
|
||||
|
||||
// Cache Design specifies file prefix for topology
|
||||
//PARAM_STRING( CacheDesign, "NUCA", false );
|
||||
|
||||
//PARAM( EndpointBandwidth, 10000, false );
|
||||
//PARAM_BOOL( AdaptiveRouting, true, false );
|
||||
//PARAM( NumberOfVirtualNetworks, 6, false );
|
||||
//PARAM( FanOutDegree, 4, false );
|
||||
//PARAM_BOOL( PrintTopology, true, false );
|
||||
|
||||
// Princeton Network (Garnet)
|
||||
//PARAM_BOOL( UsingGarnetNetwork, true, false );
|
||||
//PARAM_BOOL( UsingDetailNetwork, false, false );
|
||||
//PARAM_BOOL( UsingNetworkTesting, false, false );
|
||||
//PARAM( FlitSize, 16, false );
|
||||
//PARAM( NumberOfPipeStages, 4, false );
|
||||
//PARAM( VCSPerClass, 4, false );
|
||||
//PARAM( BufferSize, 4, false );
|
||||
|
||||
// MemoryControl:
|
||||
//PARAM( MEM_BUS_CYCLE_MULTIPLIER, 10, false );
|
||||
//PARAM( BANKS_PER_RANK, 8, false );
|
||||
//PARAM( RANKS_PER_DIMM, 2, false );
|
||||
//PARAM( DIMMS_PER_CHANNEL, 2, false );
|
||||
//PARAM( BANK_BIT_0, 8, false );
|
||||
//PARAM( RANK_BIT_0, 11, false );
|
||||
//PARAM( DIMM_BIT_0, 12, false );
|
||||
//PARAM( BANK_QUEUE_SIZE, 12, false );
|
||||
//PARAM( BankBusyTime, 11, false );
|
||||
//PARAM( RANK_RANK_DELAY, 1, false );
|
||||
//PARAM( READ_WRITE_DELAY, 2, false );
|
||||
//PARAM( BASIC_BUS_BUSY_TIME, 2, false );
|
||||
//PARAM( MEM_CTL_LATENCY, 12, false );
|
||||
//PARAM( REFRESH_PERIOD, 1560, false );
|
||||
//PARAM( TFAW, 0, false );
|
||||
//PARAM( MEM_RANDOM_ARBITRATE, 0, false );
|
||||
//PARAM( MEM_FIXED_DELAY, 0, false );
|
||||
|
|
@ -1,405 +0,0 @@
|
|||
//
|
||||
// This file has been modified by Kevin Moore and Dan Nussbaum of the
|
||||
// Scalable Systems Research Group at Sun Microsystems Laboratories
|
||||
// (http://research.sun.com/scalable/) to support the Adaptive
|
||||
// Transactional Memory Test Platform (ATMTP). For information about
|
||||
// ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
|
||||
//
|
||||
// Please send email to atmtp-interest@sun.com with feedback, questions, or
|
||||
// to request future announcements about ATMTP.
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// File modification date: 2008-02-23
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// ATMTP is distributed as part of the GEMS software toolset and is
|
||||
// available for use and modification under the terms of version 2 of the
|
||||
// GNU General Public License. The GNU General Public License is contained
|
||||
// in the file $GEMS/LICENSE.
|
||||
//
|
||||
// Multifacet GEMS is free software; you can redistribute it and/or modify
|
||||
// it under the terms of version 2 of the GNU General Public License as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// Multifacet GEMS is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with the Multifacet GEMS; if not, write to the Free Software Foundation,
|
||||
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
|
||||
g_RANDOM_SEED: 1
|
||||
|
||||
g_DEADLOCK_THRESHOLD: 500000
|
||||
|
||||
// determines how many Simics cycles advance for every Ruby cycle
|
||||
// (does not apply when running Opal)
|
||||
SIMICS_RUBY_MULTIPLIER: 4
|
||||
|
||||
// Ruby cycles between when a sequencer issues a request and it arrives at
|
||||
// the L1 cache controller
|
||||
//
|
||||
// ** important ** this parameter determines the L2 hit latency when
|
||||
// using the SMP protocols with a combined L1/L2 controller (-cache.sm)
|
||||
//
|
||||
SEQUENCER_TO_CONTROLLER_LATENCY: 4
|
||||
|
||||
|
||||
// When set to false, the L1 cache structures are probed for a hit in Sequencer.C
|
||||
// If a request hits, it is *not* issued to the cache controller
|
||||
// When set to true, all processor data requests issue to cache controller
|
||||
//
|
||||
// ** important ** this parameter must be set to false for proper L1/L2 hit timing
|
||||
// for the SMP protocols with combined L1/L2 controllers (-cache.sm)
|
||||
//
|
||||
REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false
|
||||
|
||||
|
||||
// When running with Opal in SMT configurations, this indicates the number of threads per physical processor
|
||||
g_NUM_SMT_THREADS: 1
|
||||
|
||||
|
||||
// Maximum number of requests (including SW prefetches) outstanding from
|
||||
// the sequencer (Note: this also include items buffered in the store
|
||||
// buffer)
|
||||
g_SEQUENCER_OUTSTANDING_REQUESTS: 16
|
||||
|
||||
|
||||
PROTOCOL_DEBUG_TRACE: true
|
||||
DEBUG_FILTER_STRING: none
|
||||
DEBUG_VERBOSITY_STRING: none
|
||||
DEBUG_START_TIME: 0
|
||||
DEBUG_OUTPUT_FILENAME: none
|
||||
|
||||
|
||||
TRANSACTION_TRACE_ENABLED: false
|
||||
USER_MODE_DATA_ONLY: false
|
||||
PROFILE_HOT_LINES: false
|
||||
|
||||
PROFILE_ALL_INSTRUCTIONS: false
|
||||
PRINT_INSTRUCTION_TRACE: false
|
||||
g_DEBUG_CYCLE: 0
|
||||
BLOCK_STC: false
|
||||
PERFECT_MEMORY_SYSTEM: false
|
||||
PERFECT_MEMORY_SYSTEM_LATENCY: 0
|
||||
DATA_BLOCK: false
|
||||
|
||||
|
||||
// *********************************************
|
||||
// CACHE & MEMORY PARAMETERS
|
||||
// *********************************************
|
||||
|
||||
|
||||
L1_CACHE_ASSOC: 4
|
||||
L1_CACHE_NUM_SETS_BITS: 8
|
||||
L2_CACHE_ASSOC: 4
|
||||
L2_CACHE_NUM_SETS_BITS: 16
|
||||
|
||||
// 32 bits = 4 GB address space
|
||||
g_MEMORY_SIZE_BYTES: 1073741824 //4294967296
|
||||
g_DATA_BLOCK_BYTES: 64
|
||||
g_PAGE_SIZE_BYTES: 4096
|
||||
g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU
|
||||
|
||||
g_PROCS_PER_CHIP: 1
|
||||
|
||||
|
||||
// set automatically
|
||||
g_NUM_PROCESSORS: 0
|
||||
g_NUM_L2_BANKS: 0
|
||||
g_NUM_MEMORIES: 0
|
||||
|
||||
// The following group of parameters are calculated. They must
|
||||
// _always_ be left at zero.
|
||||
g_NUM_CHIPS: 0
|
||||
g_NUM_CHIP_BITS: 0
|
||||
g_MEMORY_SIZE_BITS: 0
|
||||
g_DATA_BLOCK_BITS: 0
|
||||
g_PAGE_SIZE_BITS: 0
|
||||
g_NUM_PROCESSORS_BITS: 0
|
||||
g_PROCS_PER_CHIP_BITS: 0
|
||||
g_NUM_L2_BANKS_BITS: 0
|
||||
g_NUM_L2_BANKS_PER_CHIP: 0
|
||||
g_NUM_L2_BANKS_PER_CHIP_BITS: 0
|
||||
g_NUM_MEMORIES_BITS: 0
|
||||
g_NUM_MEMORIES_PER_CHIP: 0
|
||||
g_MEMORY_MODULE_BITS: 0
|
||||
g_MEMORY_MODULE_BLOCKS: 0
|
||||
|
||||
|
||||
// For certain CMP protocols, determines whether the lowest bits of a block address
|
||||
// are used to index to a L2 cache bank or into the sets of a
|
||||
// single bank
|
||||
// lowest highest
|
||||
// true: g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
|
||||
// false: g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
|
||||
MAP_L2BANKS_TO_LOWEST_BITS: false
|
||||
|
||||
|
||||
|
||||
// TIMING PARAMETERS -- many of these are protocol specific. See SLICC files
|
||||
// to determine where they apply
|
||||
|
||||
MEMORY_RESPONSE_LATENCY_MINUS_2: 158 // determines memory response latency
|
||||
DIRECTORY_CACHE_LATENCY: 6
|
||||
NULL_LATENCY: 1
|
||||
ISSUE_LATENCY: 2
|
||||
CACHE_RESPONSE_LATENCY: 12
|
||||
L1_RESPONSE_LATENCY: 3
|
||||
L2_RESPONSE_LATENCY: 6
|
||||
L2_TAG_LATENCY: 6
|
||||
DIRECTORY_LATENCY: 80
|
||||
NETWORK_LINK_LATENCY: 1
|
||||
COPY_HEAD_LATENCY: 4
|
||||
ON_CHIP_LINK_LATENCY: 1
|
||||
RECYCLE_LATENCY: 10
|
||||
L2_RECYCLE_LATENCY: 5
|
||||
TIMER_LATENCY: 10000
|
||||
TBE_RESPONSE_LATENCY: 1
|
||||
PERIODIC_TIMER_WAKEUPS: true
|
||||
|
||||
|
||||
// constants used by CMP protocols
|
||||
// cache bank access times
|
||||
L1_REQUEST_LATENCY: 2
|
||||
L2_REQUEST_LATENCY: 4
|
||||
|
||||
|
||||
// Number of transitions each controller state machines can complete per cycle
|
||||
// i.e. the number of ports to each controller
|
||||
// L1cache is the sum of the L1I and L1D cache ports
|
||||
L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
|
||||
// Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
|
||||
// much greater constraint on the concurrency of a L2 cache bank
|
||||
L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
|
||||
DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32
|
||||
DMA_TRANSITIONS_PER_RUBY_CYCLE: 1
|
||||
|
||||
|
||||
// Number of TBEs available for demand misses, ALL prefetches, and replacements
|
||||
// used by one-level protocols
|
||||
NUMBER_OF_TBES: 128
|
||||
// two-level protocols
|
||||
NUMBER_OF_L1_TBES: 32
|
||||
NUMBER_OF_L2_TBES: 32
|
||||
|
||||
// ** INTERCONECT PARAMETERS **
|
||||
//
|
||||
g_PRINT_TOPOLOGY: true
|
||||
g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
|
||||
g_CACHE_DESIGN: NUCA // specifies file prefix for FILE_SPECIFIED topology
|
||||
FAN_OUT_DEGREE: 4 // for HIERARCHICAL SWITCH topology
|
||||
|
||||
g_adaptive_routing: true
|
||||
NUMBER_OF_VIRTUAL_NETWORKS: 6
|
||||
|
||||
// bandwidth unit is 1/1000 byte per cycle. the following parameter is multiplied by
|
||||
// topology specific link weights
|
||||
g_endpoint_bandwidth: 10000
|
||||
|
||||
|
||||
// ** finite buffering parameters
|
||||
//
|
||||
// note: Finite buffering allows us to simulate a realistic virtual cut-through
|
||||
// routed network with idealized flow control. this feature is NOT heavily tested
|
||||
FINITE_BUFFERING: false
|
||||
// All message buffers within the network (i.e. the switch's input and
|
||||
// output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
|
||||
FINITE_BUFFER_SIZE: 3
|
||||
// g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
|
||||
// issued by the sequencer. The PROCESSOR_BUFFER_SIZE controlls the
|
||||
// number of requests in the mandatory queue
|
||||
// Only effects the simualtion when FINITE_BUFFERING is enabled
|
||||
PROCESSOR_BUFFER_SIZE: 10
|
||||
// The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
|
||||
// Controllers. Controlls the number of request issued by the L2 HW Prefetcher
|
||||
PROTOCOL_BUFFER_SIZE: 32
|
||||
// ** end finite buffering parameters
|
||||
|
||||
|
||||
// (deprecated)
|
||||
// Allows on a single accesses to a multi-cycle L2 bank.
|
||||
// Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
|
||||
// number of cycles. However the TBE table can be accessed in parallel.
|
||||
SINGLE_ACCESS_L2_BANKS: true
|
||||
|
||||
|
||||
// MOESI_CMP_token parameters (some might be deprecated)
|
||||
g_FILTERING_ENABLED: false
|
||||
g_DISTRIBUTED_PERSISTENT_ENABLED: true
|
||||
g_RETRY_THRESHOLD: 1
|
||||
g_DYNAMIC_TIMEOUT_ENABLED: true
|
||||
g_FIXED_TIMEOUT_LATENCY: 300
|
||||
|
||||
|
||||
// tester parameters (overridden by testerconfig.defaults)
|
||||
//
|
||||
// injects random message delays to excite protocol races
|
||||
RANDOMIZATION: false
|
||||
g_SYNTHETIC_DRIVER: false
|
||||
g_DETERMINISTIC_DRIVER: false
|
||||
g_trace_warmup_length: 1000000
|
||||
g_bash_bandwidth_adaptive_threshold: 0.75
|
||||
|
||||
g_tester_length: 0
|
||||
// # of synthetic locks == 16 * 128
|
||||
g_synthetic_locks: 2048
|
||||
g_deterministic_addrs: 1
|
||||
g_SpecifiedGenerator: DetermInvGenerator
|
||||
g_callback_counter: 0
|
||||
g_NUM_COMPLETIONS_BEFORE_PASS: 0
|
||||
// parameters used by locking synthetic tester
|
||||
g_think_time: 5
|
||||
g_hold_time: 5
|
||||
g_wait_time: 5
|
||||
|
||||
// Princeton Network (Garnet)
|
||||
g_GARNET_NETWORK: true
|
||||
g_DETAIL_NETWORK: false
|
||||
g_NETWORK_TESTING: false
|
||||
g_FLIT_SIZE: 16
|
||||
g_NUM_PIPE_STAGES: 4
|
||||
g_VCS_PER_CLASS: 4
|
||||
g_BUFFER_SIZE: 4
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// MemoryControl:
|
||||
|
||||
// Basic cycle time of the memory controller. This defines the period which is
|
||||
// used as the memory channel clock period, the address bus bit time, and the
|
||||
// memory controller cycle time.
|
||||
// Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
|
||||
// and a 2 GHz Ruby clock:
|
||||
MEM_BUS_CYCLE_MULTIPLIER: 10
|
||||
|
||||
// How many internal banks in each DRAM chip:
|
||||
BANKS_PER_RANK: 8
|
||||
|
||||
// How many sets of DRAM chips per DIMM.
|
||||
RANKS_PER_DIMM: 2
|
||||
|
||||
// How many DIMMs per channel. (Currently the only thing that
|
||||
// matters is the number of ranks per channel, i.e. the product
|
||||
// of this parameter and RANKS_PER_DIMM. But if and when this is
|
||||
// expanded to do FB-DIMMs, the distinction between the two
|
||||
// will matter.)
|
||||
DIMMS_PER_CHANNEL: 2
|
||||
|
||||
// Which bits to use to find the bank, rank, and DIMM numbers.
|
||||
// You could choose to have the bank bits, rank bits, and DIMM bits
|
||||
// in any order; here they are in that order.
|
||||
// For these defaults, we assume this format for addresses:
|
||||
// Offset within line: [5:0]
|
||||
// Memory controller #: [7:6]
|
||||
// Bank: [10:8]
|
||||
// Rank: [11]
|
||||
// DIMM: [12]
|
||||
// Row addr / Col addr: [top:13]
|
||||
// If you get these bits wrong, then some banks won't see any
|
||||
// requests; you need to check for this in the .stats output.
|
||||
BANK_BIT_0: 8
|
||||
RANK_BIT_0: 11
|
||||
DIMM_BIT_0: 12
|
||||
|
||||
// Number of entries max in each bank queues; set to whatever you want.
|
||||
// If it is too small, you will see in the .stats file a lot of delay
|
||||
// time spent in the common input queue.
|
||||
BANK_QUEUE_SIZE: 12
|
||||
|
||||
// Bank cycle time (tRC) measured in memory cycles:
|
||||
BANK_BUSY_TIME: 11
|
||||
|
||||
// This is how many memory address cycles to delay between reads to
|
||||
// different ranks of DRAMs to allow for clock skew:
|
||||
RANK_RANK_DELAY: 1
|
||||
|
||||
// This is how many memory address cycles to delay between a read
|
||||
// and a write. This is based on two things: (1) the data bus is
|
||||
// used one cycle earlier in the operation; (2) a round-trip wire
|
||||
// delay from the controller to the DIMM that did the reading.
|
||||
READ_WRITE_DELAY: 2
|
||||
|
||||
// Basic address and data bus occupancy. If you are assuming a
|
||||
// 16-byte-wide data bus (pairs of DIMMs side-by-side), then
|
||||
// the data bus occupancy matches the address bus occupancy at
|
||||
// two cycles. But if the channel is only 8 bytes wide, you
|
||||
// need to increase this bus occupancy time to 4 cycles.
|
||||
BASIC_BUS_BUSY_TIME: 2
|
||||
|
||||
// Latency to returning read request or writeback acknowledgement.
|
||||
// Measured in memory address cycles.
|
||||
// This equals tRCD + CL + AL + (four bit times)
|
||||
// + (round trip on channel)
|
||||
// + (memory control internal delays)
|
||||
// It's going to be an approximation, so pick what you like.
|
||||
// Note: The fact that latency is a constant, and does not depend on two
|
||||
// low-order address bits, implies that our memory controller either:
|
||||
// (a) tells the DRAM to read the critical word first, and sends the
|
||||
// critical word first back to the CPU, or (b) waits until it has
|
||||
// seen all four bit times on the data wires before sending anything
|
||||
// back. Either is plausible. If (a), remove the "four bit times"
|
||||
// term from the calculation above.
|
||||
MEM_CTL_LATENCY: 12
|
||||
|
||||
// refresh_period is the number of memory cycles between refresh
|
||||
// of row x in bank n and refresh of row x+1 in bank n. For DDR-400,
|
||||
// this is typically 7.8 usec for commercial systems; after 8192 such
|
||||
// refreshes, this will have refreshed the whole chip in 64 msec. If
|
||||
// we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles. The memory
|
||||
// controller will divide this by the total number of banks, and kick
|
||||
// off a refresh to *somebody* every time that amount is counted
|
||||
// down to zero. (There will be some rounding error there, but it
|
||||
// should have minimal effect.)
|
||||
REFRESH_PERIOD: 1560
|
||||
|
||||
// tFAW is a DRAM chip parameter which restricts the number of
|
||||
// activates that can be done within a certain window of time.
|
||||
// The window is specified here in terms of number of memory
|
||||
// controller cycles. At most four activates may be done during
|
||||
// any such sliding window. If this number is set to be no more
|
||||
// than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
|
||||
// It is typical in real systems for tFAW to have no effect, but
|
||||
// it may be useful in throttling power. Set to zero to ignore.
|
||||
TFAW: 0
|
||||
|
||||
// By default, the memory controller uses round-robin to arbitrate
|
||||
// between ready bank queues for use of the address bus. If you
|
||||
// wish to add randomness to the system, set this parameter to
|
||||
// one instead, and it will restart the round-robin pointer at a
|
||||
// random bank number each cycle. If you want additional
|
||||
// nondeterminism, set the parameter to some integer n >= 2, and
|
||||
// it will in addition add a n% chance each cycle that a ready bank
|
||||
// will be delayed an additional cycle. Note that if you are
|
||||
// in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
|
||||
// have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
|
||||
MEM_RANDOM_ARBITRATE: 0
|
||||
|
||||
// The following parameter, if nonzero, will disable the memory
|
||||
// controller and instead give every request a fixed latency. The
|
||||
// nonzero value specified here is measured in memory cycles and is
|
||||
// just added to MEM_CTL_LATENCY. It will also show up in the stats
|
||||
// file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
|
||||
MEM_FIXED_DELAY: 0
|
||||
|
||||
// If instead of DDR-400, you wanted DDR-800, the channel gets faster
|
||||
// but the basic operation of the DRAM core is unchanged.
|
||||
// Busy times appear to double just because they are measured
|
||||
// in smaller clock cycles. The performance advantage comes because
|
||||
// the bus busy times don't actually quite double.
|
||||
// You would use something like these values:
|
||||
//
|
||||
// MEM_BUS_CYCLE_MULTIPLIER: 5
|
||||
// BANK_BUSY_TIME: 22
|
||||
// RANK_RANK_DELAY: 2
|
||||
// READ_WRITE_DELAY: 3
|
||||
// BASIC_BUS_BUSY_TIME: 3
|
||||
// MEM_CTL_LATENCY: 20
|
||||
// REFRESH_PERIOD: 3120
|
|
@ -1,50 +0,0 @@
|
|||
|
||||
//
|
||||
// This file contains tester specific changes to the rubyconfig.defaults
|
||||
// parameter values.
|
||||
//
|
||||
// Please: - Add new variables only to rubyconfig.defaults file.
|
||||
// - Change them here only when necessary.
|
||||
|
||||
g_SIMICS: false
|
||||
DATA_BLOCK: true
|
||||
RANDOMIZATION: true
|
||||
g_SYNTHETIC_DRIVER: false
|
||||
g_DETERMINISTIC_DRIVER: true
|
||||
g_DEADLOCK_THRESHOLD: 500000
|
||||
g_SpecifiedGenerator: DetermGETXGenerator
|
||||
|
||||
PROTOCOL_DEBUG_TRACE: true
|
||||
|
||||
//
|
||||
// Generic cache parameters
|
||||
//
|
||||
|
||||
// Cache sizes are smaller for the random tester to increase the amount
|
||||
// of false sharing.
|
||||
L1_CACHE_ASSOC: 2
|
||||
L1_CACHE_NUM_SETS_BITS: 2
|
||||
L2_CACHE_ASSOC: 2
|
||||
L2_CACHE_NUM_SETS_BITS: 5
|
||||
|
||||
g_MEMORY_SIZE_BYTES: 1048576
|
||||
|
||||
//g_NETWORK_TOPOLOGY: FILE_SPECIFIED
|
||||
RECYCLE_LATENCY: 1
|
||||
//NUMBER_OF_VIRTUAL_NETWORKS: 5
|
||||
//g_NUM_MEMORIES: 16
|
||||
L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 1000
|
||||
DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 1000
|
||||
//g_PROCS_PER_CHIP: 2
|
||||
//g_NUM_L2_BANKS: 16
|
||||
//g_endpoint_bandwidth: 10000
|
||||
//g_NUM_PROCESSORS: 16
|
||||
//g_NUM_SMT_THREADS: 1
|
||||
//g_GARNET_NETWORK: true
|
||||
//g_DETAIL_NETWORK: true
|
||||
//g_NETWORK_TESTING: false
|
||||
//g_FLIT_SIZE: 32
|
||||
//g_NUM_PIPE_STAGES: 5
|
||||
//g_VCS_PER_CLASS: 2
|
||||
//g_BUFFER_SIZE: 4
|
||||
|
Loading…
Reference in a new issue