2014-04-01 18:44:30 +02:00
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett - Packard Development Company , L . P .
2014-06-03 22:32:59 +02:00
* Copyright ( c ) 2010 - 2013 Advanced Micro Devices , Inc .
2014-04-01 18:44:30 +02:00
* All Rights Reserved
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are
* met : redistributions of source code must retain the above copyright
* notice , this list of conditions and the following disclaimer ;
* redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution ;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission .
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* " AS IS " AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT
* LIMITED TO , THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL ,
* SPECIAL , EXEMPLARY , OR CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT
* LIMITED TO , PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE ,
* DATA , OR PROFITS ; OR BUSINESS INTERRUPTION ) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY , WHETHER IN CONTRACT , STRICT LIABILITY , OR TORT
* ( INCLUDING NEGLIGENCE OR OTHERWISE ) ARISING IN ANY WAY OUT OF THE USE
2014-06-03 22:32:59 +02:00
* OF THIS SOFTWARE , EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE .
2014-04-01 18:44:30 +02:00
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2014-06-03 22:32:59 +02:00
2014-04-01 18:44:30 +02:00
# include <algorithm>
# include <cassert>
# include <cmath>
# include <iostream>
# include <string>
# include "basic_circuit.h"
# include "basic_components.h"
2014-06-03 22:32:59 +02:00
# include "common.h"
2014-04-01 18:44:30 +02:00
# include "const.h"
# include "io.h"
# include "logic.h"
# include "memoryctrl.h"
# include "parameter.h"
/* overview of MC models:
* McPAT memory controllers are modeled according to large number of industrial data points .
* The Basic memory controller architecture is base on the Synopsis designs
* ( DesignWare DDR2 / DDR3 - Lite memory controllers and DDR2 / DDR3 - Lite protocol controllers )
* as in Cadence ChipEstimator Tool
*
* An MC has 3 parts as shown in this design . McPAT models both high performance MC
* based on Niagara processor designs and curving and low power MC based on data points in
* Cadence ChipEstimator Tool .
*
* The frontend is modeled analytically , the backend is modeled empirically according to
* DDR2 / DDR3 - Lite protocol controllers in Cadence ChipEstimator Tool
* The PHY is modeled based on
* " A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces , " ISSCC 2006 ,
* and A 14 mW 6.25 Gb / s Transceiver in 90 nm CMOS for Serial Chip - to - Chip Communication , " ISSCC 2007
*
* In Cadence ChipEstimator Tool there are two types of memory controllers : the full memory controllers
* that includes the frontend as the DesignWare DDR2 / DDR3 - Lite memory controllers and the backend only
* memory controllers as the DDR2 / DDR3 - Lite protocol controllers ( except DesignWare DDR2 / DDR3 - Lite memory
* controllers , all memory controller IP in Cadence ChipEstimator Tool are backend memory controllers such as
* DDRC 1600 A and DDRC 800 A ) . Thus , to some extend the area and power difference between DesignWare
* DDR2 / DDR3 - Lite memory controllers and DDR2 / DDR3 - Lite protocol controllers can be an estimation to the
* frontend power and area , which is very close the analitically modeled results of the frontend for Niagara2 @ 65 nm
*
*/
2014-06-03 22:32:59 +02:00
MCBackend : : MCBackend ( XMLNode * _xml_data , InputParameter * interface_ip_ ,
const MCParameters & mcp_ , const MCStatistics & mcs_ )
: McPATComponent ( _xml_data ) , l_ip ( * interface_ip_ ) , mcp ( mcp_ ) , mcs ( mcs_ ) {
name = " Transaction Engine " ;
local_result = init_interface ( & l_ip , name ) ;
// Set up stats for the power calculations
tdp_stats . reset ( ) ;
tdp_stats . readAc . access = 0.5 * mcp . num_channels * mcp . clockRate ;
tdp_stats . writeAc . access = 0.5 * mcp . num_channels * mcp . clockRate ;
rtp_stats . reset ( ) ;
rtp_stats . readAc . access = mcs . reads ;
rtp_stats . writeAc . access = mcs . writes ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
void MCBackend : : computeArea ( ) {
// The area is in nm^2
if ( mcp . mc_type = = MC ) {
if ( mcp . type = = 0 ) {
output_data . area = ( 2.7927 * log ( mcp . peak_transfer_rate * 2 ) -
19.862 ) / 2.0 * mcp . dataBusWidth / 128.0 *
( l_ip . F_sz_um / 0.09 ) * mcp . num_channels ;
} else {
output_data . area = 0.15 * mcp . dataBusWidth / 72.0 *
( l_ip . F_sz_um / 0.065 ) * ( l_ip . F_sz_um / 0.065 ) *
mcp . num_channels ;
}
} else {
//skip old model
cout < < " Unknown memory controllers " < < endl ;
exit ( 0 ) ;
//area based on Cadence ChipEstimator for 8bit bus
output_data . area = 0.243 * mcp . dataBusWidth / 8 ;
}
}
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
void MCBackend : : computeEnergy ( ) {
double C_MCB , mc_power ;
double backend_dyn ;
double backend_gates ;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio ( ) ;
double NMOS_sizing = g_tp . min_w_nmos_ ;
double PMOS_sizing = g_tp . min_w_nmos_ * pmos_to_nmos_sizing_r ;
double area_um2 = output_data . area * 1e6 ;
if ( mcp . mc_type = = MC ) {
if ( mcp . type = = 0 ) {
//assuming the approximately same scaling factor as seen in processors.
//C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process.
//mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et
mc_power = 4.32 * 0.1 ; //4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
C_MCB = mc_power / 1e9 / 72 / 1.1 / 1.1 * l_ip . F_sz_um / 0.065 ;
//per access energy in memory controller
power . readOp . dynamic = C_MCB * g_tp . peri_global . Vdd *
g_tp . peri_global . Vdd *
( mcp . dataBusWidth /*+mcp.addressBusWidth*/ ) ;
power . readOp . leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Isub_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . gate_leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Ig_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
} else {
//Average on DDR2/3 protocol controller and DDRC 1600/800A in
//Cadence ChipEstimate
backend_dyn = 0.9e-9 / 800e6 * mcp . clockRate / 12800 *
mcp . peak_transfer_rate * mcp . dataBusWidth / 72.0 *
g_tp . peri_global . Vdd / 1.1 * g_tp . peri_global . Vdd / 1.1 *
( l_ip . F_sz_nm / 65.0 ) ;
//Scaling to technology and DIMM feature. The base IP support
//DDR3-1600(PC3 12800)
//5000 is from Cadence ChipEstimator
backend_gates = 50000 * mcp . dataBusWidth / 64.0 ;
power . readOp . dynamic = backend_dyn ;
power . readOp . leakage = ( backend_gates ) *
cmos_Isub_leakage ( NMOS_sizing , PMOS_sizing , 2 , nand ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . gate_leakage = ( backend_gates ) *
cmos_Ig_leakage ( NMOS_sizing , PMOS_sizing , 2 , nand ) *
g_tp . peri_global . Vdd ; //unit W
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
} else {
//skip old model
cout < < " Unknown memory controllers " < < endl ; exit ( 0 ) ;
//mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend
C_MCB = mc_power / 1e9 / 72 / 1.1 / 1.1 * l_ip . F_sz_um / 0.065 ;
power . readOp . leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Isub_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . gate_leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Ig_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . dynamic * = 1.2 ;
power . readOp . leakage * = 1.2 ;
power . readOp . gate_leakage * = 1.2 ;
//flash controller has about 20% more backend power since BCH ECC in
//flash is complex and power hungry
}
double long_channel_device_reduction =
longer_channel_device_reduction ( Uncore_device ) ;
power . readOp . longer_channel_leakage = power . readOp . leakage *
long_channel_device_reduction ;
// Output leakage power calculations
output_data . subthreshold_leakage_power =
longer_channel_device ? power . readOp . longer_channel_leakage :
power . readOp . leakage ;
output_data . gate_leakage_power = power . readOp . gate_leakage ;
// Peak dynamic power calculation
output_data . peak_dynamic_power = power . readOp . dynamic *
( tdp_stats . readAc . access + tdp_stats . writeAc . access ) ;
// Runtime dynamic energy calculation
output_data . runtime_dynamic_energy =
power . readOp . dynamic *
( rtp_stats . readAc . access + rtp_stats . writeAc . access ) *
mcp . llcBlockSize * BITS_PER_BYTE / mcp . dataBusWidth +
// Original McPAT code: Assume 10% of peak power is consumed by routine
// job including memory refreshing and scrubbing
power . readOp . dynamic * 0.1 * execution_time ;
}
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
MCPHY : : MCPHY ( XMLNode * _xml_data , InputParameter * interface_ip_ ,
const MCParameters & mcp_ , const MCStatistics & mcs_ )
: McPATComponent ( _xml_data ) , l_ip ( * interface_ip_ ) , mcp ( mcp_ ) , mcs ( mcs_ ) {
name = " Physical Interface (PHY) " ;
local_result = init_interface ( & l_ip , name ) ;
// Set up stats for the power calculations
// TODO: Figure out why TDP stats aren't used
tdp_stats . reset ( ) ;
tdp_stats . readAc . access = 0.5 * mcp . num_channels ;
tdp_stats . writeAc . access = 0.5 * mcp . num_channels ;
rtp_stats . reset ( ) ;
rtp_stats . readAc . access = mcs . reads ;
rtp_stats . writeAc . access = mcs . writes ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
void MCPHY : : computeArea ( ) {
if ( mcp . mc_type = = MC ) {
if ( mcp . type = = 0 ) {
//Based on die photos from Niagara 1 and 2.
//TODO merge this into undifferentiated core.PHY only achieves
//square root of the ideal scaling.
output_data . area = ( 6.4323 * log ( mcp . peak_transfer_rate * 2 ) -
48.134 ) * mcp . dataBusWidth / 128.0 *
( l_ip . F_sz_um / 0.09 ) * mcp . num_channels / 2 ; //TODO:/2
} else {
//Designware/synopsis 16bit DDR3 PHY is 1.3mm (WITH IOs) at 40nm
//for upto DDR3 2133 (PC3 17066)
double non_IO_percentage = 0.2 ;
output_data . area = 1.3 * non_IO_percentage / 2133.0e6 *
mcp . clockRate / 17066 * mcp . peak_transfer_rate *
mcp . dataBusWidth / 16.0 * ( l_ip . F_sz_um / 0.040 ) *
( l_ip . F_sz_um / 0.040 ) * mcp . num_channels ; //um^2
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
} else {
//area based on Cadence ChipEstimator for 8bit bus
output_data . area = 0.4e6 / 2 * mcp . dataBusWidth / 8 / 1e6 ;
2014-04-01 18:44:30 +02:00
}
}
2014-06-03 22:32:59 +02:00
void MCPHY : : computeEnergy ( ) {
//PHY uses internal data buswidth but the actuall off-chip datawidth is 64bits + ecc
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio ( ) ;
/*
* according to " A 100mW 9.6Gb/s Transceiver in 90nm CMOS for next-generation memory interfaces , " ISSCC 2006 ;
* From Cadence ChipEstimator for normal I / O around 0.4 ~ 0.8 mW / Gb / s
*/
double power_per_gb_per_s , phy_dyn , phy_gates ;
double NMOS_sizing = g_tp . min_w_nmos_ ;
double PMOS_sizing = g_tp . min_w_nmos_ * pmos_to_nmos_sizing_r ;
double area_um2 = output_data . area * 1e6 ;
if ( mcp . mc_type = = MC ) {
if ( mcp . type = = 0 ) {
power_per_gb_per_s = mcp . LVDS ? 0.01 : 0.04 ;
//This is from curve fitting based on Niagara 1 and 2's PHY die photo.
//This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
//power.readOp.dynamic = 0.02*memAccesses*llcBlocksize*8;//change from Bytes to bits.
power . readOp . dynamic = power_per_gb_per_s *
sqrt ( l_ip . F_sz_um / 0.09 ) * g_tp . peri_global . Vdd / 1.2 *
g_tp . peri_global . Vdd / 1.2 ;
power . readOp . leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Isub_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . gate_leakage = area_um2 / 2 *
( g_tp . scaling_factor . core_tx_density ) *
cmos_Ig_leakage ( NMOS_sizing , PMOS_sizing , 1 , inv ) *
g_tp . peri_global . Vdd ; //unit W
} else {
phy_gates = 200000 * mcp . dataBusWidth / 64.0 ;
power_per_gb_per_s = 0.01 ;
//This is power not energy, 10mw/Gb/s @90nm for each channel and scaling down
power . readOp . dynamic = power_per_gb_per_s * ( l_ip . F_sz_um / 0.09 ) *
g_tp . peri_global . Vdd / 1.2 * g_tp . peri_global . Vdd / 1.2 ;
power . readOp . leakage = ( mcp . withPHY ? phy_gates : 0 ) *
cmos_Isub_leakage ( NMOS_sizing , PMOS_sizing , 2 , nand ) *
g_tp . peri_global . Vdd ; //unit W
power . readOp . gate_leakage = ( mcp . withPHY ? phy_gates : 0 ) *
cmos_Ig_leakage ( NMOS_sizing , PMOS_sizing , 2 , nand ) *
g_tp . peri_global . Vdd ; //unit W
}
}
2014-04-01 18:44:30 +02:00
// double phy_factor = (int)ceil(mcp.dataBusWidth/72.0);//Previous phy power numbers are based on 72 bit DIMM interface
// power_t.readOp.dynamic *= phy_factor;
// power_t.readOp.leakage *= phy_factor;
// power_t.readOp.gate_leakage *= phy_factor;
2014-06-03 22:32:59 +02:00
double long_channel_device_reduction =
longer_channel_device_reduction ( Uncore_device ) ;
power . readOp . longer_channel_leakage =
power . readOp . leakage * long_channel_device_reduction ;
// Leakage power calculations
output_data . subthreshold_leakage_power =
longer_channel_device ? power . readOp . longer_channel_leakage :
power . readOp . leakage ;
output_data . gate_leakage_power = power . readOp . gate_leakage ;
// Peak dynamic power calculation
double data_transfer_unit = ( mcp . mc_type = = MC ) ? 72 : 16 ; /*DIMM data width*/
output_data . peak_dynamic_power = power . readOp . dynamic *
( mcp . peak_transfer_rate * BITS_PER_BYTE / 1e3 ) * mcp . dataBusWidth /
data_transfer_unit * mcp . num_channels / mcp . clockRate ;
// Runtime dynamic energy calculation
output_data . runtime_dynamic_energy =
power . readOp . dynamic *
( rtp_stats . readAc . access + rtp_stats . writeAc . access ) *
mcp . llcBlockSize * BITS_PER_BYTE / 1e9 +
// Original McPAT code: Assume 10% of peak power is consumed by routine
// job including memory refreshing and scrubbing
power . readOp . dynamic * 0.1 * execution_time ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
MCFrontEnd : : MCFrontEnd ( XMLNode * _xml_data , InputParameter * interface_ip_ ,
const MCParameters & mcp_ , const MCStatistics & mcs_ )
: McPATComponent ( _xml_data ) , frontendBuffer ( NULL ) , readBuffer ( NULL ) ,
writeBuffer ( NULL ) , MC_arb ( NULL ) , interface_ip ( * interface_ip_ ) ,
mcp ( mcp_ ) , mcs ( mcs_ ) {
int tag , data ;
bool is_default = true ; //indication for default setup
/* MC frontend engine channels share the same engines but logically partitioned
* For all hardware inside MC . different channels do not share resources .
* TODO : add docodeing / mux stage to steer memory requests to different channels .
*/
name = " Front End " ;
// Memory Request Reorder Buffer
tag = mcp . addressbus_width + EXTRA_TAG_BITS + mcp . opcodeW ;
data = int ( ceil ( ( physical_address_width + mcp . opcodeW ) / BITS_PER_BYTE ) ) ;
interface_ip . cache_sz = data * mcp . req_window_size_per_channel ;
interface_ip . line_sz = data ;
interface_ip . assoc = mcp . reorder_buffer_assoc ;
interface_ip . nbanks = mcp . reorder_buffer_nbanks ;
interface_ip . out_w = interface_ip . line_sz * BITS_PER_BYTE ;
interface_ip . specific_tag = tag > 0 ;
interface_ip . tag_w = tag ;
interface_ip . access_mode = Normal ;
interface_ip . obj_func_dyn_energy = 0 ;
interface_ip . obj_func_dyn_power = 0 ;
interface_ip . obj_func_leak_power = 0 ;
interface_ip . obj_func_cycle_t = 1 ;
interface_ip . num_rw_ports = 0 ;
interface_ip . num_rd_ports = mcp . num_channels ;
interface_ip . num_wr_ports = interface_ip . num_rd_ports ;
interface_ip . num_se_rd_ports = 0 ;
interface_ip . num_search_ports = mcp . num_channels ;
interface_ip . is_cache = true ;
interface_ip . pure_cam = false ;
interface_ip . pure_ram = false ;
interface_ip . throughput = 1.0 / mcp . clockRate ;
interface_ip . latency = 1.0 / mcp . clockRate ;
frontendBuffer = new CacheArray ( xml_data , & interface_ip , " Reorder Buffer " ,
Uncore_device , mcp . clockRate ) ;
children . push_back ( frontendBuffer ) ;
frontendBuffer - > tdp_stats . reset ( ) ;
frontendBuffer - > tdp_stats . readAc . access =
frontendBuffer - > l_ip . num_search_ports +
frontendBuffer - > l_ip . num_wr_ports ;
frontendBuffer - > tdp_stats . writeAc . access =
frontendBuffer - > l_ip . num_search_ports ;
frontendBuffer - > tdp_stats . searchAc . access =
frontendBuffer - > l_ip . num_wr_ports ;
frontendBuffer - > rtp_stats . reset ( ) ;
// TODO: These stats assume that access power is calculated per buffer
// bit, which requires the stats to take into account the number of
// bits for each buffer slot. This should be revised...
//For each channel, each memory word need to check the address data to
//achieve best scheduling results.
//and this need to be done on all physical DIMMs in each logical memory
//DIMM *mcp.dataBusWidth/72
frontendBuffer - > rtp_stats . readAc . access = mcs . reads * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth * mcp . dataBusWidth / 72 ;
frontendBuffer - > rtp_stats . writeAc . access = mcs . writes * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth * mcp . dataBusWidth / 72 ;
frontendBuffer - > rtp_stats . searchAc . access =
frontendBuffer - > rtp_stats . readAc . access +
frontendBuffer - > rtp_stats . writeAc . access ;
// Read Buffers
//Support key words first operation
data = ( int ) ceil ( mcp . dataBusWidth / BITS_PER_BYTE ) ;
interface_ip . cache_sz = data * mcp . IO_buffer_size_per_channel ;
interface_ip . line_sz = data ;
interface_ip . assoc = mcp . read_buffer_assoc ;
interface_ip . nbanks = mcp . read_buffer_nbanks ;
interface_ip . out_w = interface_ip . line_sz * BITS_PER_BYTE ;
interface_ip . specific_tag = mcp . read_buffer_tag_width > 0 ;
interface_ip . tag_w = mcp . read_buffer_tag_width ;
interface_ip . access_mode = Sequential ;
interface_ip . obj_func_dyn_energy = 0 ;
interface_ip . obj_func_dyn_power = 0 ;
interface_ip . obj_func_leak_power = 0 ;
interface_ip . obj_func_cycle_t = 1 ;
interface_ip . num_rw_ports = 0 ;
interface_ip . num_rd_ports = mcp . num_channels ;
interface_ip . num_wr_ports = interface_ip . num_rd_ports ;
interface_ip . num_se_rd_ports = 0 ;
interface_ip . num_search_ports = 0 ;
interface_ip . is_cache = false ;
interface_ip . pure_cam = false ;
interface_ip . pure_ram = true ;
interface_ip . throughput = 1.0 / mcp . clockRate ;
interface_ip . latency = 1.0 / mcp . clockRate ;
readBuffer = new CacheArray ( xml_data , & interface_ip , " Read Buffer " ,
Uncore_device , mcp . clockRate ) ;
children . push_back ( readBuffer ) ;
readBuffer - > tdp_stats . reset ( ) ;
readBuffer - > tdp_stats . readAc . access = readBuffer - > l_ip . num_rd_ports *
mcs . duty_cycle ;
readBuffer - > tdp_stats . writeAc . access = readBuffer - > l_ip . num_wr_ports *
mcs . duty_cycle ;
readBuffer - > rtp_stats . reset ( ) ;
readBuffer - > rtp_stats . readAc . access = mcs . reads * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth ;
readBuffer - > rtp_stats . writeAc . access = mcs . reads * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth ;
// Write Buffer
//Support key words first operation
data = ( int ) ceil ( mcp . dataBusWidth / BITS_PER_BYTE ) ;
interface_ip . cache_sz = data * mcp . IO_buffer_size_per_channel ;
interface_ip . line_sz = data ;
interface_ip . assoc = mcp . write_buffer_assoc ;
interface_ip . nbanks = mcp . write_buffer_nbanks ;
interface_ip . out_w = interface_ip . line_sz * BITS_PER_BYTE ;
interface_ip . specific_tag = mcp . write_buffer_tag_width > 0 ;
interface_ip . tag_w = mcp . write_buffer_tag_width ;
interface_ip . access_mode = Normal ;
interface_ip . obj_func_dyn_energy = 0 ;
interface_ip . obj_func_dyn_power = 0 ;
interface_ip . obj_func_leak_power = 0 ;
interface_ip . obj_func_cycle_t = 1 ;
interface_ip . num_rw_ports = 0 ;
interface_ip . num_rd_ports = mcp . num_channels ;
interface_ip . num_wr_ports = interface_ip . num_rd_ports ;
interface_ip . num_se_rd_ports = 0 ;
interface_ip . num_search_ports = 0 ;
interface_ip . is_cache = false ;
interface_ip . pure_cam = false ;
interface_ip . pure_ram = true ;
interface_ip . throughput = 1.0 / mcp . clockRate ;
interface_ip . latency = 1.0 / mcp . clockRate ;
writeBuffer = new CacheArray ( xml_data , & interface_ip , " Write Buffer " ,
Uncore_device , mcp . clockRate ) ;
children . push_back ( writeBuffer ) ;
writeBuffer - > tdp_stats . reset ( ) ;
writeBuffer - > tdp_stats . readAc . access = writeBuffer - > l_ip . num_rd_ports *
mcs . duty_cycle ;
writeBuffer - > tdp_stats . writeAc . access = writeBuffer - > l_ip . num_wr_ports *
mcs . duty_cycle ;
writeBuffer - > rtp_stats . reset ( ) ;
writeBuffer - > rtp_stats . readAc . access = mcs . reads * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth ;
writeBuffer - > rtp_stats . writeAc . access = mcs . writes * mcp . llcBlockSize *
BITS_PER_BYTE / mcp . dataBusWidth ;
// TODO: Set up selection logic as a leaf node in tree
//selection and arbitration logic
MC_arb =
new selection_logic ( xml_data , is_default ,
mcp . req_window_size_per_channel , 1 , & interface_ip ,
" Arbitration Logic " , ( mcs . reads + mcs . writes ) ,
mcp . clockRate , Uncore_device ) ;
// MC_arb is not included in the roll-up due to the uninitialized area
//children.push_back(MC_arb);
}
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
MemoryController : : MemoryController ( XMLNode * _xml_data ,
InputParameter * interface_ip_ )
: McPATComponent ( _xml_data ) , interface_ip ( * interface_ip_ ) {
name = " Memory Controller " ;
set_mc_param ( ) ;
// TODO: Pass params and stats as pointers
children . push_back ( new MCFrontEnd ( xml_data , & interface_ip , mcp , mcs ) ) ;
children . push_back ( new MCBackend ( xml_data , & interface_ip , mcp , mcs ) ) ;
if ( mcp . type = = 0 | | ( mcp . type = = 1 & & mcp . withPHY ) ) {
children . push_back ( new MCPHY ( xml_data , & interface_ip , mcp , mcs ) ) ;
2014-04-01 18:44:30 +02:00
}
}
2014-06-03 22:32:59 +02:00
void MemoryController : : initialize_params ( ) {
memset ( & mcp , 0 , sizeof ( MCParameters ) ) ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
void MemoryController : : set_mc_param ( ) {
initialize_params ( ) ;
int num_children = xml_data - > nChildNode ( " param " ) ;
int tech_type ;
int mat_type ;
int i ;
for ( i = 0 ; i < num_children ; i + + ) {
XMLNode * paramNode = xml_data - > getChildNodePtr ( " param " , & i ) ;
XMLCSTR node_name = paramNode - > getAttribute ( " name " ) ;
XMLCSTR value = paramNode - > getAttribute ( " value " ) ;
if ( ! node_name )
warnMissingParamName ( paramNode - > getAttribute ( " id " ) ) ;
ASSIGN_FP_IF ( " mc_clock " , mcp . clockRate ) ;
ASSIGN_INT_IF ( " tech_type " , tech_type ) ;
ASSIGN_ENUM_IF ( " mc_type " , mcp . mc_type , MemoryCtrl_type ) ;
ASSIGN_FP_IF ( " num_mcs " , mcp . num_mcs ) ;
ASSIGN_INT_IF ( " llc_line_length " , mcp . llc_line_length ) ;
ASSIGN_INT_IF ( " databus_width " , mcp . databus_width ) ;
ASSIGN_INT_IF ( " memory_channels_per_mc " , mcp . num_channels ) ;
ASSIGN_INT_IF ( " req_window_size_per_channel " ,
mcp . req_window_size_per_channel ) ;
ASSIGN_INT_IF ( " IO_buffer_size_per_channel " ,
mcp . IO_buffer_size_per_channel ) ;
ASSIGN_INT_IF ( " addressbus_width " , mcp . addressbus_width ) ;
ASSIGN_INT_IF ( " opcode_width " , mcp . opcodeW ) ;
ASSIGN_INT_IF ( " type " , mcp . type ) ;
ASSIGN_ENUM_IF ( " LVDS " , mcp . LVDS , bool ) ;
ASSIGN_ENUM_IF ( " withPHY " , mcp . withPHY , bool ) ;
ASSIGN_INT_IF ( " peak_transfer_rate " , mcp . peak_transfer_rate ) ;
ASSIGN_INT_IF ( " number_ranks " , mcp . number_ranks ) ;
ASSIGN_INT_IF ( " reorder_buffer_assoc " , mcp . reorder_buffer_assoc ) ;
ASSIGN_INT_IF ( " reorder_buffer_nbanks " , mcp . reorder_buffer_nbanks ) ;
ASSIGN_INT_IF ( " read_buffer_assoc " , mcp . read_buffer_assoc ) ;
ASSIGN_INT_IF ( " read_buffer_nbanks " , mcp . read_buffer_nbanks ) ;
ASSIGN_INT_IF ( " read_buffer_tag_width " , mcp . read_buffer_tag_width ) ;
ASSIGN_INT_IF ( " write_buffer_assoc " , mcp . write_buffer_assoc ) ;
ASSIGN_INT_IF ( " write_buffer_nbanks " , mcp . write_buffer_nbanks ) ;
ASSIGN_INT_IF ( " write_buffer_tag_width " , mcp . write_buffer_tag_width ) ;
ASSIGN_INT_IF ( " wire_mat_type " , mat_type ) ;
ASSIGN_ENUM_IF ( " wire_type " , interface_ip . wt , Wire_type ) ;
else {
warnUnrecognizedParam ( node_name ) ;
}
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
if ( mcp . mc_type ! = MC ) {
cout < < " Unknown memory controller type: Only DRAM controller is "
< < " supported for now " < < endl ;
exit ( 0 ) ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
// Change from MHz to Hz
mcp . clockRate * = 1e6 ;
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
interface_ip . data_arr_ram_cell_tech_type = tech_type ;
interface_ip . data_arr_peri_global_tech_type = tech_type ;
interface_ip . tag_arr_ram_cell_tech_type = tech_type ;
interface_ip . tag_arr_peri_global_tech_type = tech_type ;
interface_ip . wire_is_mat_type = mat_type ;
interface_ip . wire_os_mat_type = mat_type ;
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
num_children = xml_data - > nChildNode ( " stat " ) ;
for ( i = 0 ; i < num_children ; i + + ) {
XMLNode * statNode = xml_data - > getChildNodePtr ( " stat " , & i ) ;
XMLCSTR node_name = statNode - > getAttribute ( " name " ) ;
XMLCSTR value = statNode - > getAttribute ( " value " ) ;
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
if ( ! node_name )
warnMissingStatName ( statNode - > getAttribute ( " id " ) ) ;
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
ASSIGN_FP_IF ( " duty_cycle " , mcs . duty_cycle ) ;
ASSIGN_FP_IF ( " perc_load " , mcs . perc_load ) ;
ASSIGN_FP_IF ( " memory_reads " , mcs . reads ) ;
ASSIGN_INT_IF ( " memory_writes " , mcs . writes ) ;
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
else {
warnUnrecognizedStat ( node_name ) ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
}
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
// Add ECC overhead
mcp . llcBlockSize = int ( ceil ( mcp . llc_line_length / BITS_PER_BYTE ) ) +
mcp . llc_line_length ;
mcp . dataBusWidth = int ( ceil ( mcp . databus_width / BITS_PER_BYTE ) ) +
mcp . databus_width ;
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
MCFrontEnd : : ~ MCFrontEnd ( ) {
2014-04-01 18:44:30 +02:00
2014-06-03 22:32:59 +02:00
if ( MC_arb ) {
delete MC_arb ;
MC_arb = NULL ;
}
if ( frontendBuffer ) {
delete frontendBuffer ;
frontendBuffer = NULL ;
}
if ( readBuffer ) {
delete readBuffer ;
readBuffer = NULL ;
}
if ( writeBuffer ) {
delete writeBuffer ;
writeBuffer = NULL ;
}
2014-04-01 18:44:30 +02:00
}
2014-06-03 22:32:59 +02:00
MemoryController : : ~ MemoryController ( ) {
// TODO: use default constructor to delete children
2014-04-01 18:44:30 +02:00
}