ext: McPAT interface changes and fixes

This patch includes software engineering changes and some generic bug fixes
Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known
issues/concernts we did not have a chance to address in this patch.

High-level changes in this patch include:
 1) Making XML parsing modular and hierarchical:
   - Shift parsing responsibility into the components
   - Read XML in a (mostly) context-free recursive manner so that McPAT input
     files can contain arbitrary component hierarchies
 2) Making power, energy, and area calculations a hierarchical and recursive
    process
   - Components track their subcomponents and recursively call compute
     functions in stages
   - Make C++ object hierarchy reflect inheritance of classes of components
     with similar structures
   - Simplify computeArea() and computeEnergy() functions to eliminate
     successive calls to calculate separate TDP vs. runtime energy
   - Remove Processor component (now unnecessary) and introduce a more abstract
     System component
 3) Standardizing McPAT output across all components
   - Use a single, common data structure for storing and printing McPAT output
   - Recursively call print functions through component hierarchy
 4) For caches, allow splitting data array and tag array reads and writes for
    better accuracy
 5) Improving the usability of CACTI by printing more helpful warning and error
    messages
 6) Minor: Impose more rigorous code style for clarity (more work still to be
    done)
Overall, these changes greatly reduce the amount of replicated code, and they
improve McPAT runtime and decrease memory footprint.
This commit is contained in:
Yasuko Eckert 2014-06-03 13:32:59 -07:00
parent 1104199115
commit 0deef376d9
71 changed files with 23147 additions and 28461 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,591 +0,0 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef XML_PARSE_H_
#define XML_PARSE_H_
//#ifdef WIN32
//#define _CRT_SECURE_NO_DEPRECATE
//#endif
#include <stdio.h>
#include <string.h>
#include <iostream>
#include "xmlParser.h"
using namespace std;
/*
void myfree(char *t); // {free(t);}
ToXMLStringTool tx,tx2;
*/
//all subnodes at the level of system.core(0-n)
//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
typedef struct{
int prediction_width;
char prediction_scheme[20];
int predictor_size;
int predictor_entries;
int local_predictor_size[20];
int local_predictor_entries;
int global_predictor_entries;
int global_predictor_bits;
int chooser_predictor_entries;
int chooser_predictor_bits;
double predictor_accesses;
} predictor_systemcore;
typedef struct{
int number_entries;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
double total_hits;
double total_accesses;
double total_misses;
double conflicts;
} itlb_systemcore;
typedef struct{
//params
double icache_config[20];
int buffer_sizes[20];
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
//stats
double total_accesses;
double read_accesses;
double read_misses;
double replacements;
double read_hits;
double total_hits;
double total_misses;
double miss_buffer_access;
double fill_buffer_accesses;
double prefetch_buffer_accesses;
double prefetch_buffer_writes;
double prefetch_buffer_reads;
double prefetch_buffer_hits;
double conflicts;
} icache_systemcore;
typedef struct{
//params
int number_entries;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double write_hits;
double read_hits;
double read_misses;
double write_misses;
double total_hits;
double total_misses;
double conflicts;
} dtlb_systemcore;
typedef struct{
//params
double dcache_config[20];
int buffer_sizes[20];
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double total_hits;
double total_misses;
double read_hits;
double write_hits;
double read_misses;
double write_misses;
double replacements;
double write_backs;
double miss_buffer_access;
double fill_buffer_accesses;
double prefetch_buffer_accesses;
double prefetch_buffer_writes;
double prefetch_buffer_reads;
double prefetch_buffer_hits;
double wbb_writes;
double wbb_reads;
double conflicts;
} dcache_systemcore;
typedef struct{
//params
int BTB_config[20];
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double total_hits;
double total_misses;
double read_hits;
double write_hits;
double read_misses;
double write_misses;
double replacements;
} BTB_systemcore;
typedef struct{
//all params at the level of system.core(0-n)
int clock_rate;
bool opt_local;
bool x86;
int machine_bits;
int virtual_address_width;
int physical_address_width;
int opcode_width;
int micro_opcode_width;
int instruction_length;
int machine_type;
int internal_datapath_width;
int number_hardware_threads;
int fetch_width;
int number_instruction_fetch_ports;
int decode_width;
int issue_width;
int peak_issue_width;
int commit_width;
int pipelines_per_core[20];
int pipeline_depth[20];
char FPU[20];
char divider_multiplier[20];
int ALU_per_core;
double FPU_per_core;
int MUL_per_core;
int instruction_buffer_size;
int decoded_stream_buffer_size;
int instruction_window_scheme;
int instruction_window_size;
int fp_instruction_window_size;
int ROB_size;
int archi_Regs_IRF_size;
int archi_Regs_FRF_size;
int phy_Regs_IRF_size;
int phy_Regs_FRF_size;
int rename_scheme;
int register_windows_size;
char LSU_order[20];
int store_buffer_size;
int load_buffer_size;
int memory_ports;
char Dcache_dual_pump[20];
int RAS_size;
int fp_issue_width;
int prediction_width;
int number_of_BTB;
int number_of_BPT;
//all stats at the level of system.core(0-n)
double total_instructions;
double int_instructions;
double fp_instructions;
double branch_instructions;
double branch_mispredictions;
double committed_instructions;
double committed_int_instructions;
double committed_fp_instructions;
double load_instructions;
double store_instructions;
double total_cycles;
double idle_cycles;
double busy_cycles;
double instruction_buffer_reads;
double instruction_buffer_write;
double ROB_reads;
double ROB_writes;
double rename_accesses;
double fp_rename_accesses;
double rename_reads;
double rename_writes;
double fp_rename_reads;
double fp_rename_writes;
double inst_window_reads;
double inst_window_writes;
double inst_window_wakeup_accesses;
double inst_window_selections;
double fp_inst_window_reads;
double fp_inst_window_writes;
double fp_inst_window_wakeup_accesses;
double fp_inst_window_selections;
double archi_int_regfile_reads;
double archi_float_regfile_reads;
double phy_int_regfile_reads;
double phy_float_regfile_reads;
double phy_int_regfile_writes;
double phy_float_regfile_writes;
double archi_int_regfile_writes;
double archi_float_regfile_writes;
double int_regfile_reads;
double float_regfile_reads;
double int_regfile_writes;
double float_regfile_writes;
double windowed_reg_accesses;
double windowed_reg_transports;
double function_calls;
double context_switches;
double ialu_accesses;
double fpu_accesses;
double mul_accesses;
double cdb_alu_accesses;
double cdb_mul_accesses;
double cdb_fpu_accesses;
double load_buffer_reads;
double load_buffer_writes;
double load_buffer_cams;
double store_buffer_reads;
double store_buffer_writes;
double store_buffer_cams;
double store_buffer_forwards;
double main_memory_access;
double main_memory_read;
double main_memory_write;
double pipeline_duty_cycle;
double IFU_duty_cycle ;
double BR_duty_cycle ;
double LSU_duty_cycle ;
double MemManU_I_duty_cycle;
double MemManU_D_duty_cycle ;
double ALU_duty_cycle ;
double MUL_duty_cycle ;
double FPU_duty_cycle ;
double ALU_cdb_duty_cycle ;
double MUL_cdb_duty_cycle ;
double FPU_cdb_duty_cycle ;
//all subnodes at the level of system.core(0-n)
predictor_systemcore predictor;
itlb_systemcore itlb;
icache_systemcore icache;
dtlb_systemcore dtlb;
dcache_systemcore dcache;
BTB_systemcore BTB;
} system_core;
typedef struct{
//params
int Directory_type;
double Dir_config[20];
int buffer_sizes[20];
int clockrate;
int ports[20];
int device_type;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
char threeD_stack[20];
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double read_misses;
double write_misses;
double conflicts;
double duty_cycle;
} system_L1Directory;
typedef struct{
//params
int Directory_type;
double Dir_config[20];
int buffer_sizes[20];
int clockrate;
int ports[20];
int device_type;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
char threeD_stack[20];
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double read_misses;
double write_misses;
double conflicts;
double duty_cycle;
} system_L2Directory;
typedef struct{
//params
double L2_config[20];
int clockrate;
int ports[20];
int device_type;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
char threeD_stack[20];
int buffer_sizes[20];
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double total_hits;
double total_misses;
double read_hits;
double write_hits;
double read_misses;
double write_misses;
double replacements;
double write_backs;
double miss_buffer_accesses;
double fill_buffer_accesses;
double prefetch_buffer_accesses;
double prefetch_buffer_writes;
double prefetch_buffer_reads;
double prefetch_buffer_hits;
double wbb_writes;
double wbb_reads;
double conflicts;
double duty_cycle;
bool merged_dir;
double homenode_read_accesses;
double homenode_write_accesses;
double homenode_read_hits;
double homenode_write_hits;
double homenode_read_misses;
double homenode_write_misses;
double dir_duty_cycle;
} system_L2;
typedef struct{
//params
double L3_config[20];
int clockrate;
int ports[20];
int device_type;
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
char threeD_stack[20];
int buffer_sizes[20];
//stats
double total_accesses;
double read_accesses;
double write_accesses;
double total_hits;
double total_misses;
double read_hits;
double write_hits;
double read_misses;
double write_misses;
double replacements;
double write_backs;
double miss_buffer_accesses;
double fill_buffer_accesses;
double prefetch_buffer_accesses;
double prefetch_buffer_writes;
double prefetch_buffer_reads;
double prefetch_buffer_hits;
double wbb_writes;
double wbb_reads;
double conflicts;
double duty_cycle;
bool merged_dir;
double homenode_read_accesses;
double homenode_write_accesses;
double homenode_read_hits;
double homenode_write_hits;
double homenode_read_misses;
double homenode_write_misses;
double dir_duty_cycle;
} system_L3;
typedef struct{
//params
int number_of_inputs_of_crossbars;
int number_of_outputs_of_crossbars;
int flit_bits;
int input_buffer_entries_per_port;
int ports_of_input_buffer[20];
//stats
double crossbar_accesses;
} xbar0_systemNoC;
typedef struct{
//params
int clockrate;
bool type;
bool has_global_link;
char topology[20];
int horizontal_nodes;
int vertical_nodes;
int link_throughput;
int link_latency;
int input_ports;
int output_ports;
int virtual_channel_per_port;
int flit_bits;
int input_buffer_entries_per_vc;
int ports_of_input_buffer[20];
int dual_pump;
int number_of_crossbars;
char crossbar_type[20];
char crosspoint_type[20];
xbar0_systemNoC xbar0;
int arbiter_type;
double chip_coverage;
//stats
double total_accesses;
double duty_cycle;
double route_over_perc;
} system_NoC;
typedef struct{
//params
int mem_tech_node;
int device_clock;
int peak_transfer_rate;
int internal_prefetch_of_DRAM_chip;
int capacity_per_channel;
int number_ranks;
int num_banks_of_DRAM_chip;
int Block_width_of_DRAM_chip;
int output_width_of_DRAM_chip;
int page_size_of_DRAM_chip;
int burstlength_of_DRAM_chip;
//stats
double memory_accesses;
double memory_reads;
double memory_writes;
} system_mem;
typedef struct{
//params
//Common Param for mc and fc
double peak_transfer_rate;
int number_mcs;
bool withPHY;
int type;
//FCParam
//stats
double duty_cycle;
double total_load_perc;
//McParam
int mc_clock;
int llc_line_length;
int memory_channels_per_mc;
int number_ranks;
int req_window_size_per_channel;
int IO_buffer_size_per_channel;
int databus_width;
int addressbus_width;
bool LVDS;
//stats
double memory_accesses;
double memory_reads;
double memory_writes;
} system_mc;
typedef struct{
//params
int clockrate;
int number_units;
int type;
//stats
double duty_cycle;
double total_load_perc;
} system_niu;
typedef struct{
//params
int clockrate;
int number_units;
int num_channels;
int type;
bool withPHY;
//stats
double duty_cycle;
double total_load_perc;
} system_pcie;
typedef struct{
//All number_of_* at the level of 'system' Ying 03/21/2009
int number_of_cores;
int number_of_L1Directories;
int number_of_L2Directories;
int number_of_L2s;
bool Private_L2;
int number_of_L3s;
int number_of_NoCs;
int number_of_dir_levels;
int domain_size;
int first_level_dir;
// All params at the level of 'system'
int homogeneous_cores;
int homogeneous_L1Directories;
int homogeneous_L2Directories;
double core_tech_node;
int target_core_clockrate;
int target_chip_area;
int temperature;
int number_cache_levels;
int L1_property;
int L2_property;
int homogeneous_L2s;
int L3_property;
int homogeneous_L3s;
int homogeneous_NoCs;
int homogeneous_ccs;
int Max_area_deviation;
int Max_power_deviation;
int device_type;
bool longer_channel_device;
bool Embedded;
bool opt_dynamic_power;
bool opt_lakage_power;
bool opt_clockrate;
bool opt_area;
int interconnect_projection_type;
int machine_bits;
int virtual_address_width;
int physical_address_width;
int virtual_memory_page_size;
double total_cycles;
//system.core(0-n):3rd level
system_core core[64];
system_L1Directory L1Directory[64];
system_L2Directory L2Directory[64];
system_L2 L2[64];
system_L3 L3[64];
system_NoC NoC[64];
system_mem mem;
system_mc mc;
system_mc flashc;
system_niu niu;
system_pcie pcie;
} root_system;
class ParseXML
{
public:
void parse(char* filepath);
void initialize();
public:
root_system sys;
};
#endif /* XML_PARSE_H_ */

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,232 +26,242 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#define GLOBALVAR
#include <cassert>
#include <cmath>
#include <iostream>
#include <math.h>
#include "area.h"
#include "array.h"
#include "common.h"
#include "decoder.h"
#include "globalvar.h"
#include "parameter.h"
using namespace std;
ArrayST::ArrayST(const InputParameter *configure_interface,
string _name,
enum Device_ty device_ty_,
bool opt_local_,
enum Core_type core_ty_,
bool _is_default)
:l_ip(*configure_interface),
name(_name),
device_ty(device_ty_),
opt_local(opt_local_),
core_ty(core_ty_),
is_default(_is_default)
{
if (l_ip.cache_sz<64) l_ip.cache_sz=64;
l_ip.error_checking();//not only do the error checking but also fill some missing parameters
optimize_array();
}
double ArrayST::area_efficiency_threshold = 20.0;
int ArrayST::ed = 0;
//Fixed number, make sure timing can be satisfied.
int ArrayST::delay_wt = 100;
int ArrayST::cycle_time_wt = 1000;
//Fixed number, This is used to exhaustive search for individual components.
int ArrayST::area_wt = 10;
//Fixed number, This is used to exhaustive search for individual components.
int ArrayST::dynamic_power_wt = 10;
int ArrayST::leakage_power_wt = 10;
//Fixed number, make sure timing can be satisfied.
int ArrayST::delay_dev = 1000000;
int ArrayST::cycle_time_dev = 100;
//Fixed number, This is used to exhaustive search for individual components.
int ArrayST::area_dev = 1000000;
//Fixed number, This is used to exhaustive search for individual components.
int ArrayST::dynamic_power_dev = 1000000;
int ArrayST::leakage_power_dev = 1000000;
int ArrayST::cycle_time_dev_threshold = 10;
void ArrayST::compute_base_power()
{
//l_ip.out_w =l_ip.line_sz*8;
local_result=cacti_interface(&l_ip);
ArrayST::ArrayST(XMLNode* _xml_data,
const InputParameter *configure_interface, string _name,
enum Device_ty device_ty_, double _clockRate,
bool opt_local_, enum Core_type core_ty_, bool _is_default)
: McPATComponent(_xml_data), l_ip(*configure_interface),
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
is_default(_is_default) {
name = _name;
clockRate = _clockRate;
if (l_ip.cache_sz < MIN_BUFFER_SIZE)
l_ip.cache_sz = MIN_BUFFER_SIZE;
if (!l_ip.error_checking(name)) {
exit(1);
}
void ArrayST::optimize_array()
{
list<uca_org_t > candidate_solutions(0);
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
output_data.reset();
uca_org_t * temp_res = 0;
local_result.valid=false;
computeEnergy();
computeArea();
}
double throughput=l_ip.throughput, latency=l_ip.latency;
double area_efficiency_threshold = 20.0;
bool throughput_overflow=true, latency_overflow=true;
compute_base_power();
void ArrayST::compute_base_power() {
local_result = cacti_interface(&l_ip);
}
if ((local_result.cycle_time - throughput) <= 1e-10 )
throughput_overflow=false;
if ((local_result.access_time - latency)<= 1e-10)
latency_overflow=false;
void ArrayST::computeArea() {
area.set_area(local_result.area);
output_data.area = local_result.area / 1e6;
}
if (opt_for_clk && opt_local)
{
if (throughput_overflow || latency_overflow)
{
l_ip.ed=0;
void ArrayST::computeEnergy() {
list<uca_org_t > candidate_solutions(0);
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
l_ip.cycle_time_wt = 1000;
uca_org_t* temp_res = NULL;
local_result.valid = false;
l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
l_ip.leakage_power_wt = 10;
double throughput = l_ip.throughput;
double latency = l_ip.latency;
bool throughput_overflow = true;
bool latency_overflow = true;
compute_base_power();
l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied.
l_ip.cycle_time_dev = 100;
if ((local_result.cycle_time - throughput) <= 1e-10 )
throughput_overflow = false;
if ((local_result.access_time - latency) <= 1e-10)
latency_overflow = false;
l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
l_ip.leakage_power_dev = 1000000;
if (opt_for_clk && opt_local) {
if (throughput_overflow || latency_overflow) {
l_ip.ed = ed;
throughput_overflow=true; //Reset overflow flag before start optimization iterations
latency_overflow=true;
l_ip.delay_wt = delay_wt;
l_ip.cycle_time_wt = cycle_time_wt;
temp_res = &local_result; //Clean up the result for optimized for ED^2P
temp_res->cleanup();
}
l_ip.area_wt = area_wt;
l_ip.dynamic_power_wt = dynamic_power_wt;
l_ip.leakage_power_wt = leakage_power_wt;
l_ip.delay_dev = delay_dev;
l_ip.cycle_time_dev = cycle_time_dev;
while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
{
compute_base_power();
l_ip.area_dev = area_dev;
l_ip.dynamic_power_dev = dynamic_power_dev;
l_ip.leakage_power_dev = leakage_power_dev;
l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
//Reset overflow flag before start optimization iterations
throughput_overflow = true;
latency_overflow = true;
// from best area to worst area -->worst timing to best timing
if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
(local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
{ //if no satisfiable solution is found,the most aggressive one is left
candidate_solutions.push_back(local_result);
//output_data_csv(candidate_solutions.back());
if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
//ensure stop opt not because of cam
{
throughput_overflow=false;
latency_overflow=false;
}
}
else
{
//TODO: whether checking the partial satisfied results too, or just change the mark???
if ((local_result.cycle_time - throughput) <= 1e-10)
throughput_overflow=false;
if ((local_result.access_time - latency)<= 1e-10)
latency_overflow=false;
if (l_ip.cycle_time_dev > 10)
{ //if not >10 local_result is the last result, it cannot be cleaned up
temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
temp_res->cleanup();
}
}
// l_ip.cycle_time_dev-=10;
// l_ip.delay_dev-=10;
}
if (l_ip.assoc > 0)
{
//For array structures except CAM and FA, Give warning but still provide a result with best timing found
if (throughput_overflow==true)
cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
if (latency_overflow==true)
cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
//Clean up the result for optimized for ED^2P
temp_res = &local_result;
temp_res->cleanup();
}
// else
// {
// /*According to "Content-Addressable Memory (CAM) Circuits and
// Architectures": A Tutorial and Survey
// by Kostas Pagiamtzis et al.
// CAM structures can be heavily pipelined and use look-ahead techniques,
// therefore timing can be relaxed. But McPAT does not model the advanced
// techniques. If continue optimizing, the area efficiency will be too low
// */
// //For CAM and FA, stop opt if area efficiency is too low
// if (throughput_overflow==true)
// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
// if (latency_overflow==true)
// cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
// }
//double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
double min_dynamic_energy=BIGNUM;
if (candidate_solutions.empty()==false)
{
local_result.valid=true;
for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter)
{
if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic)
{
min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
min_dynamic_energy_iter = candidate_iter;
local_result = *(min_dynamic_energy_iter);
//TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
}
else
{
candidate_iter->cleanup() ;
}
}
while ((throughput_overflow || latency_overflow) &&
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
compute_base_power();
//This is the time_dev to be used for next iteration
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
// from best area to worst area -->worst timing to best timing
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
(local_result.access_time - latency) <= 1e-10) ||
(local_result.data_array2->area_efficiency <
area_efficiency_threshold && l_ip.assoc == 0)) {
//if no satisfiable solution is found,the most aggressive one
//is left
candidate_solutions.push_back(local_result);
if (((local_result.cycle_time - throughput) <= 1e-10) &&
((local_result.access_time - latency) <= 1e-10)) {
//ensure stop opt not because of cam
throughput_overflow = false;
latency_overflow = false;
}
} else {
if ((local_result.cycle_time - throughput) <= 1e-10)
throughput_overflow = false;
if ((local_result.access_time - latency) <= 1e-10)
latency_overflow = false;
//if not >10 local_result is the last result, it cannot be
//cleaned up
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
//Only solutions not saved in the list need to be
//cleaned up
temp_res = &local_result;
temp_res->cleanup();
}
}
}
if (l_ip.assoc > 0) {
//For array structures except CAM and FA, Give warning but still
//provide a result with best timing found
if (throughput_overflow == true)
cout << "Warning: " << name
<< " array structure cannot satisfy throughput constraint."
<< endl;
if (latency_overflow == true)
cout << "Warning: " << name
<< " array structure cannot satisfy latency constraint."
<< endl;
}
double min_dynamic_energy = BIGNUM;
if (candidate_solutions.empty() == false) {
local_result.valid = true;
for (candidate_iter = candidate_solutions.begin();
candidate_iter != candidate_solutions.end();
++candidate_iter) {
if (min_dynamic_energy >
(candidate_iter)->power.readOp.dynamic) {
min_dynamic_energy =
(candidate_iter)->power.readOp.dynamic;
min_dynamic_energy_iter = candidate_iter;
local_result = *(min_dynamic_energy_iter);
} else {
candidate_iter->cleanup() ;
}
}
}
candidate_solutions.clear();
}
}
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
double long_channel_device_reduction =
longer_channel_device_reduction(device_ty, core_ty);
double macro_layout_overhead = g_tp.macro_layout_overhead;
double chip_PR_overhead = g_tp.chip_layout_overhead;
double total_overhead = macro_layout_overhead*chip_PR_overhead;
local_result.area *= total_overhead;
double macro_layout_overhead = g_tp.macro_layout_overhead;
double chip_PR_overhead = g_tp.chip_layout_overhead;
double total_overhead = macro_layout_overhead * chip_PR_overhead;
local_result.area *= total_overhead;
//maintain constant power density
double pppm_t[4] = {total_overhead,1,1,total_overhead};
//maintain constant power density
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
double sckRation = g_tp.sckt_co_eff;
local_result.power.readOp.dynamic *= sckRation;
local_result.power.writeOp.dynamic *= sckRation;
local_result.power.searchOp.dynamic *= sckRation;
local_result.power.readOp.leakage *= l_ip.nbanks;
local_result.power.readOp.longer_channel_leakage =
local_result.power.readOp.leakage*long_channel_device_reduction;
local_result.power = local_result.power* pppm_t;
double sckRation = g_tp.sckt_co_eff;
local_result.power.readOp.dynamic *= sckRation;
local_result.power.writeOp.dynamic *= sckRation;
local_result.power.searchOp.dynamic *= sckRation;
local_result.power.readOp.leakage *= l_ip.nbanks;
local_result.power.readOp.longer_channel_leakage =
local_result.power.readOp.leakage * long_channel_device_reduction;
local_result.power = local_result.power * pppm_t;
local_result.data_array2->power.readOp.dynamic *= sckRation;
local_result.data_array2->power.writeOp.dynamic *= sckRation;
local_result.data_array2->power.searchOp.dynamic *= sckRation;
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.data_array2->power.readOp.longer_channel_leakage =
local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
local_result.data_array2->power = local_result.data_array2->power* pppm_t;
local_result.data_array2->power.readOp.dynamic *= sckRation;
local_result.data_array2->power.writeOp.dynamic *= sckRation;
local_result.data_array2->power.searchOp.dynamic *= sckRation;
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.data_array2->power.readOp.longer_channel_leakage =
local_result.data_array2->power.readOp.leakage *
long_channel_device_reduction;
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
{
local_result.tag_array2->power.readOp.dynamic *= sckRation;
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.tag_array2->power.readOp.longer_channel_leakage =
local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
}
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
local_result.tag_array2->power.readOp.dynamic *= sckRation;
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.tag_array2->power.readOp.longer_channel_leakage =
local_result.tag_array2->power.readOp.leakage *
long_channel_device_reduction;
local_result.tag_array2->power =
local_result.tag_array2->power * pppm_t;
}
power = local_result.power;
output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
output_data.subthreshold_leakage_power = power.readOp.leakage;
output_data.gate_leakage_power = power.readOp.gate_leakage;
}
void ArrayST::leakage_feedback(double temperature)
@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature)
}
}
ArrayST:: ~ArrayST()
{
local_result.cleanup();
ArrayST::~ArrayST() {
local_result.cleanup();
}

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -43,59 +44,42 @@
using namespace std;
class ArrayST :public Component{
public:
ArrayST(){};
ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true);
InputParameter l_ip;
string name;
enum Device_ty device_ty;
bool opt_local;
enum Core_type core_ty;
bool is_default;
uca_org_t local_result;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
virtual void optimize_array();
virtual void compute_base_power();
virtual ~ArrayST();
void leakage_feedback(double temperature);
};
class InstCache :public Component{
class ArrayST : public McPATComponent {
public:
ArrayST* caches;
ArrayST* missb;
ArrayST* ifb;
ArrayST* prefetchb;
powerDef power_t;//temp value holder for both (max) power and runtime power
InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;};
~InstCache(){
if (caches) {//caches->local_result.cleanup();
delete caches; caches=0;}
if (missb) {//missb->local_result.cleanup();
delete missb; missb=0;}
if (ifb) {//ifb->local_result.cleanup();
delete ifb; ifb=0;}
if (prefetchb) {//prefetchb->local_result.cleanup();
delete prefetchb; prefetchb=0;}
};
static double area_efficiency_threshold;
// These are used for the CACTI interface.
static int ed;
static int delay_wt;
static int cycle_time_wt;
static int area_wt;
static int dynamic_power_wt;
static int leakage_power_wt;
static int delay_dev;
static int cycle_time_dev;
static int area_dev;
static int dynamic_power_dev;
static int leakage_power_dev;
static int cycle_time_dev_threshold;
InputParameter l_ip;
enum Device_ty device_ty;
bool opt_local;
enum Core_type core_ty;
bool is_default;
uca_org_t local_result;
statsDef stats_t;
ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface,
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
bool opt_local_ = true,
enum Core_type core_ty_ = Inorder, bool _is_default = true);
void computeArea();
void computeEnergy();
void compute_base_power();
~ArrayST();
void leakage_feedback(double temperature);
};
class DataCache :public InstCache{
public:
ArrayST* wbb;
DataCache(){wbb=0;};
~DataCache(){
if (wbb) {//wbb->local_result.cleanup();
delete wbb; wbb=0;}
};
};
#endif /* TLB_H_ */
#endif /* ARRAY_H_ */

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -34,94 +35,327 @@
#include <iostream>
#include "basic_components.h"
#include "cacheunit.h"
#include "common.h"
// Turn this to true to get debugging messages
bool McPATComponent::debug = false;
bool McPATComponent::opt_for_clk = true;
int McPATComponent::longer_channel_device = 0;
// Number of cycles per second, 2GHz = 2e9
double McPATComponent::target_core_clockrate = 2e9;
double McPATComponent::total_cycles = 0.0f;
double McPATComponent::execution_time = 0.0f;
int McPATComponent::physical_address_width = 0;
int McPATComponent::virtual_address_width = 0;
int McPATComponent::virtual_memory_page_size = 0;
int McPATComponent::data_path_width = 0;
void McPATOutput::reset() {
storage = 0.0;
area = 0.0;
peak_dynamic_power = 0.0;
subthreshold_leakage_power = 0.0;
gate_leakage_power = 0.0;
runtime_dynamic_energy = 0.0;
}
McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) {
McPATOutput to_return;
to_return.storage = lhs.storage + rhs.storage;
to_return.area = lhs.area + rhs.area;
to_return.peak_dynamic_power = lhs.peak_dynamic_power +
rhs.peak_dynamic_power;
to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power +
rhs.subthreshold_leakage_power;
to_return.gate_leakage_power = lhs.gate_leakage_power +
rhs.gate_leakage_power;
to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy +
rhs.runtime_dynamic_energy;
return to_return;
}
void McPATOutput::operator+=(const McPATOutput &rhs) {
storage += rhs.storage;
area += rhs.area;
peak_dynamic_power += rhs.peak_dynamic_power;
subthreshold_leakage_power += rhs.subthreshold_leakage_power;
gate_leakage_power += rhs.gate_leakage_power;
runtime_dynamic_energy += rhs.runtime_dynamic_energy;
}
McPATComponent::McPATComponent()
: xml_data(NULL), name("") {
}
McPATComponent::McPATComponent(XMLNode* _xml_data)
: xml_data(_xml_data), name("") {
}
McPATComponent::McPATComponent(XMLNode* _xml_data,
InputParameter* _interface_ip)
: xml_data(_xml_data), interface_ip(*_interface_ip), name("") {
}
McPATComponent::~McPATComponent() {
}
void McPATComponent::recursiveInstantiate() {
if (debug) {
fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ",
"'type' %s\n", name.c_str(), xml_data->getAttribute("type"));
}
int i;
int numChildren = xml_data->nChildNode("component");
for (i = 0; i < numChildren; i++ ) {
// For each child node of the system,
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = childXML->getAttribute("type");
if (!type)
warnMissingComponentType(childXML->getAttribute("id"));
STRCMP(type, "Core")
warnIncompleteComponentType(type);
STRCMP(type, "CacheUnit")
children.push_back(new CacheUnit(childXML, &interface_ip));
STRCMP(type, "CacheController")
warnIncompleteComponentType(type);
STRCMP(type, "MemoryController")
warnIncompleteComponentType(type);
STRCMP(type, "Memory")
warnIncompleteComponentType(type);
STRCMP(type, "OnChipNetwork")
warnIncompleteComponentType(type);
STRCMP(type, "BusInterconnect")
warnIncompleteComponentType(type);
STRCMP(type, "Directory")
warnIncompleteComponentType(type);
else
warnUnrecognizedComponent(type);
}
}
void McPATComponent::computeArea() {
if (debug) {
fprintf(stderr, "WARNING: Called computeArea from %s, with 'type' ",
"%s\n", name.c_str(), xml_data->getAttribute("type"));
}
// TODO: This calculation is incorrect and is overwritten by computeEnergy
// Fix it up so that the values are available at the correct times
int i;
int numChildren = children.size();
area.set_area(0.0);
output_data.area = 0.0;
for (i = 0; i < numChildren; i++) {
children[i]->computeArea();
output_data.area += area.get_area();
}
}
void McPATComponent::computeEnergy() {
if (debug) {
fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ",
"%s\n", name.c_str(), xml_data->getAttribute("type"));
}
power.reset();
rt_power.reset();
memset(&output_data, 0, sizeof(McPATOutput));
int i;
int numChildren = children.size();
for (i = 0; i < numChildren; i++) {
children[i]->computeEnergy();
output_data += children[i]->output_data;
}
}
void McPATComponent::displayData(uint32_t indent, int plevel) {
if (debug) {
fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ",
"%s\n", name.c_str(), xml_data->getAttribute("type"));
}
string indent_str(indent, ' ');
string indent_str_next(indent + 2, ' ');
double leakage_power = output_data.subthreshold_leakage_power +
output_data.gate_leakage_power;
double total_runtime_energy = output_data.runtime_dynamic_energy +
leakage_power * execution_time;
cout << indent_str << name << ":" << endl;
cout << indent_str_next << "Area = " << output_data.area << " mm^2"
<< endl;
cout << indent_str_next << "Peak Dynamic Power = "
<< output_data.peak_dynamic_power << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage Power = "
<< output_data.subthreshold_leakage_power << " W" << endl;
cout << indent_str_next << "Gate Leakage Power = "
<< output_data.gate_leakage_power << " W" << endl;
cout << indent_str_next << "Runtime Dynamic Power = "
<< (output_data.runtime_dynamic_energy / execution_time) << " W"
<< endl;
cout << indent_str_next << "Runtime Dynamic Energy = "
<< output_data.runtime_dynamic_energy << " J" << endl;
cout << indent_str_next << "Total Runtime Energy = "
<< total_runtime_energy << " J" << endl;
cout << endl;
// Recursively print children
int i;
int numChildren = children.size();
for (i = 0; i < numChildren; i++) {
children[i]->displayData(indent + 4, plevel);
}
}
void McPATComponent::errorUnspecifiedParam(string param) {
fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n",
name.c_str(), param.c_str());
exit(1);
}
void McPATComponent::errorNonPositiveParam(string param) {
fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n",
name.c_str(), param.c_str());
exit(1);
}
void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) {
fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n",
name.c_str(), component);
}
void McPATComponent::warnUnrecognizedParam(XMLCSTR param) {
fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n",
name.c_str(), param);
}
void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) {
fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n",
name.c_str(), stat);
}
void McPATComponent::warnIncompleteComponentType(XMLCSTR type) {
fprintf(stderr, " WARNING: %s handling not yet complete\n", type);
}
void McPATComponent::warnMissingComponentType(XMLCSTR id) {
if (id) {
fprintf(stderr,
"WARNING: Ignoring a component due to the missing type: %s\n",
id);
} else {
fprintf(stderr,
"WARNING: Ignoring a component in %s due to the missing type\n",
name.c_str());
}
}
void McPATComponent::warnMissingParamName(XMLCSTR id) {
if (id) {
fprintf(stderr,
"WARNING: Ignoring a parameter due to the missing name: %s\n",
id);
} else {
fprintf(stderr,
"WARNING: Ignoring a parameter in %s due to the missing name\n",
name.c_str());
}
}
void McPATComponent::warnMissingStatName(XMLCSTR id) {
if (id) {
fprintf(stderr,
"WARNING: Ignoring a statistic due to the missing name: %s\n",
id);
} else {
fprintf(stderr,
"WARNING: Ignoring a statistic in %s due to the missing name\n",
name.c_str());
}
}
double longer_channel_device_reduction(
enum Device_ty device_ty,
enum Core_type core_ty)
{
enum Device_ty device_ty,
enum Core_type core_ty) {
double longer_channel_device_percentage_core;
double longer_channel_device_percentage_uncore;
double longer_channel_device_percentage_llc;
double longer_channel_device_percentage_core;
double longer_channel_device_percentage_uncore;
double longer_channel_device_percentage_llc;
double long_channel_device_reduction;
double long_channel_device_reduction;
longer_channel_device_percentage_llc = 1.0;
longer_channel_device_percentage_uncore = 0.82;
if (core_ty==OOO)
{
longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
//longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
longer_channel_device_percentage_llc = 1.0;
longer_channel_device_percentage_uncore = 0.82;
if (core_ty == OOO) {
//0.54 Xeon Tulsa //0.58 Nehelam
longer_channel_device_percentage_core = 0.56;
} else {
//0.8;//Niagara
longer_channel_device_percentage_core = 0.8;
}
}
else
{
longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
//longer_channel_device_percentage_uncore = 0.9;//Niagara
}
if (device_ty == Core_device) {
long_channel_device_reduction =
(1 - longer_channel_device_percentage_core) +
longer_channel_device_percentage_core *
g_tp.peri_global.long_channel_leakage_reduction;
} else if (device_ty == Uncore_device) {
long_channel_device_reduction =
(1 - longer_channel_device_percentage_uncore) +
longer_channel_device_percentage_uncore *
g_tp.peri_global.long_channel_leakage_reduction;
} else if (device_ty == LLC_device) {
long_channel_device_reduction =
(1 - longer_channel_device_percentage_llc) +
longer_channel_device_percentage_llc *
g_tp.peri_global.long_channel_leakage_reduction;
} else {
cout << "ERROR: Unknown device category: " << device_ty << endl;
exit(0);
}
if (device_ty==Core_device)
{
long_channel_device_reduction = (1- longer_channel_device_percentage_core)
+ longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
}
else if (device_ty==Uncore_device)
{
long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
+ longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
}
else if (device_ty==LLC_device)
{
long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
+ longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
}
else
{
cout<<"unknown device category"<<endl;
exit(0);
}
return long_channel_device_reduction;
return long_channel_device_reduction;
}
statsComponents operator+(const statsComponents & x, const statsComponents & y)
{
statsComponents z;
statsComponents operator+(const statsComponents & x, const statsComponents & y) {
statsComponents z;
z.access = x.access + y.access;
z.hit = x.hit + y.hit;
z.miss = x.miss + y.miss;
z.access = x.access + y.access;
z.hit = x.hit + y.hit;
z.miss = x.miss + y.miss;
return z;
return z;
}
statsComponents operator*(const statsComponents & x, double const * const y)
{
statsComponents z;
statsComponents operator*(const statsComponents & x, double const * const y) {
statsComponents z;
z.access = x.access*y[0];
z.hit = x.hit*y[1];
z.miss = x.miss*y[2];
z.access = x.access * y[0];
z.hit = x.hit * y[1];
z.miss = x.miss * y[2];
return z;
return z;
}
statsDef operator+(const statsDef & x, const statsDef & y)
{
statsDef z;
statsDef operator+(const statsDef & x, const statsDef & y) {
statsDef z;
z.readAc = x.readAc + y.readAc;
z.writeAc = x.writeAc + y.writeAc;
z.searchAc = x.searchAc + y.searchAc;
return z;
z.readAc = x.readAc + y.readAc;
z.writeAc = x.writeAc + y.writeAc;
z.searchAc = x.searchAc + y.searchAc;
return z;
}
statsDef operator*(const statsDef & x, double const * const y)
{
statsDef z;
statsDef operator*(const statsDef & x, double const * const y) {
statsDef z;
z.readAc = x.readAc*y;
z.writeAc = x.writeAc*y;
z.searchAc = x.searchAc*y;
return z;
z.readAc = x.readAc * y;
z.writeAc = x.writeAc * y;
z.searchAc = x.searchAc * y;
return z;
}

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -34,9 +35,15 @@
#include <vector>
#include "XML_Parse.h"
#include "component.h"
#include "parameter.h"
#include "xmlParser.h"
/**
* TODO: Since revisions to McPAT aim to make the component hierarchy more
* modular, many of the parameter and statistics classes/structs included in
* this file should be moved to the files for their respective components.
*/
const double cdb_overhead = 1.1;
enum FU_type {
@ -46,21 +53,28 @@ enum FU_type {
};
enum Core_type {
OOO,
Inorder
OOO,
Inorder
};
enum Renaming_type {
RAMbased,
CAMbased
CAMbased
};
enum Scheduler_type {
PhysicalRegFile,
ReservationStation
ReservationStation
};
enum cache_level {
enum Cache_type {
DATA_CACHE,
INSTRUCTION_CACHE,
MIXED
};
enum CacheLevel {
L1,
L2,
L3,
L1Directory,
@ -68,198 +82,408 @@ enum cache_level {
};
enum MemoryCtrl_type {
MC, //memory controller
FLASHC //flash controller
MC, //memory controller
FLASHC //flash controller
};
enum Dir_type {
ST,//shadowed tag
DC,//directory cache
SBT,//static bank tag
NonDir
ST,//shadowed tag
DC,//directory cache
SBT,//static bank tag
NonDir
};
enum Cache_policy {
Write_through,
Write_back
Write_through,
Write_back
};
enum Device_ty {
Core_device,
Uncore_device,
LLC_device
Core_device,
Uncore_device,
LLC_device
};
class statsComponents
{
public:
enum Access_mode {
Normal,
Sequential,
Fast
};
class statsComponents {
public:
double access;
double hit;
double miss;
statsComponents() : access(0), hit(0), miss(0) {}
statsComponents(const statsComponents & obj) { *this = obj; }
statsComponents & operator=(const statsComponents & rhs)
{
access = rhs.access;
hit = rhs.hit;
miss = rhs.miss;
return *this;
statsComponents(const statsComponents & obj) {
*this = obj;
}
statsComponents & operator=(const statsComponents & rhs) {
access = rhs.access;
hit = rhs.hit;
miss = rhs.miss;
return *this;
}
void reset() {
access = 0;
hit = 0;
miss = 0;
}
void reset() { access = 0; hit = 0; miss = 0;}
friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
friend statsComponents operator*(const statsComponents & x, double const * const y);
friend statsComponents operator+(const statsComponents & x,
const statsComponents & y);
friend statsComponents operator*(const statsComponents & x,
double const * const y);
};
class statsDef
{
public:
class statsDef {
public:
statsComponents readAc;
statsComponents writeAc;
statsComponents searchAc;
statsComponents dataReadAc;
statsComponents dataWriteAc;
statsComponents tagReadAc;
statsComponents tagWriteAc;
statsDef() : readAc(), writeAc(),searchAc() { }
void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
statsDef() : readAc(), writeAc(), searchAc() { }
void reset() {
readAc.reset();
writeAc.reset();
searchAc.reset();
}
friend statsDef operator+(const statsDef & x, const statsDef & y);
friend statsDef operator*(const statsDef & x, double const * const y);
};
/**
* An object to store the computed data that will be output from McPAT on a
* per-component-instance basis. Currently, this includes the amount of storage
* that the component comprises, its chip area, and power and energy
* calculations.
*/
class McPATOutput {
public:
// Storage is in bytes (B)
double storage;
// Area is in mm^2
double area;
// Peak Dynamic Power is in W
double peak_dynamic_power;
// Subthreshold Leakage Power is in W
double subthreshold_leakage_power;
// Gate Leakage Power is in W
double gate_leakage_power;
// Runtime Dynamic Energy is in J
double runtime_dynamic_energy;
void reset();
friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs);
void operator+=(const McPATOutput &rhs);
};
/**
* A McPATComponent encompasses all the parts that are common to any component
* for which McPAT may compute and print power, area, and timing data. It
* includes a pointer to the XML data from which the component gathers its
* input parameters, it stores the variables that are commonly used in all
* components, and it maintains the hierarchical structure to recursively
* compute and print output. This is a base class from which all components
* should inherit these functionality (possibly through other descended
* classes.
*/
class McPATComponent : public Component {
public:
static bool debug;
// Variables shared across the system by all McPATComponents
static bool opt_for_clk;
static int longer_channel_device;
static double execution_time;
static int physical_address_width;
static int virtual_address_width;
static int virtual_memory_page_size;
static int data_path_width;
// Although these two variables are static right now, they need to be
// modulated on a per-frequency-domain basis eventually.
static double target_core_clockrate;
static double total_cycles;
XMLNode* xml_data;
InputParameter interface_ip;
string name;
// Number of cycles per second (consider changing name)
double clockRate;
vector<McPATComponent*> children;
// The data structure that is printed in displayData
McPATOutput output_data;
// Set this to contain the stats to calculate peak dynamic power
statsDef tdp_stats;
// Set this to contain the stats to calculate runtime dynamic energy/power
statsDef rtp_stats;
// Holds the peak dynamic power calculation
powerDef power_t;
// Holds the runtime dynamic power calculation
powerDef rt_power;
McPATComponent();
// Which of these is a better way of doing things?!
McPATComponent(XMLNode* _xml_data);
McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip);
virtual void recursiveInstantiate();
virtual void computeArea();
// This function should probably be pure virtual, but it's too early in
// the modifying process to know for sure. Note that each component has
// to calculate it's own power consumption
virtual void computeEnergy();
virtual void displayData(uint32_t indent, int plevel);
~McPATComponent();
protected:
void errorUnspecifiedParam(string param);
void errorNonPositiveParam(string param);
void warnUnrecognizedComponent(XMLCSTR component);
void warnUnrecognizedParam(XMLCSTR param);
void warnUnrecognizedStat(XMLCSTR stat);
void warnIncompleteComponentType(XMLCSTR type);
void warnMissingComponentType(XMLCSTR id);
void warnMissingParamName(XMLCSTR id);
void warnMissingStatName(XMLCSTR id);
};
double longer_channel_device_reduction(
enum Device_ty device_ty=Core_device,
enum Core_type core_ty=Inorder);
enum Device_ty device_ty = Core_device,
enum Core_type core_ty = Inorder);
class CoreDynParam {
class CoreParameters {
public:
CoreDynParam(){};
CoreDynParam(ParseXML *XML_interface, int ithCore_);
// :XML(XML_interface),
// ithCore(ithCore_)
// core_ty(inorder),
// rm_ty(CAMbased),
// scheu_ty(PhysicalRegFile),
// clockRate(1e9),//1GHz
// arch_ireg_width(32),
// arch_freg_width(32),
// phy_ireg_width(128),
// phy_freg_width(128),
// perThreadState(8),
// globalCheckpoint(32),
// instructionLength(32){};
//ParseXML * XML;
bool opt_local;
bool x86;
bool Embedded;
enum Core_type core_ty;
enum Renaming_type rm_ty;
bool opt_local;
bool x86;
bool Embedded;
enum Core_type core_ty;
enum Renaming_type rm_ty;
enum Scheduler_type scheu_ty;
double clockRate,executionTime;
int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width;
int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
int num_alus, num_muls;
double clockRate;
int arch_ireg_width;
int arch_freg_width;
int archi_Regs_IRF_size;
int archi_Regs_FRF_size;
int phy_ireg_width;
int phy_freg_width;
int num_IRF_entry;
int num_FRF_entry;
int num_ifreelist_entries;
int num_ffreelist_entries;
int fetchW;
int decodeW;
int issueW;
int peak_issueW;
int commitW;
int peak_commitW;
int predictionW;
int fp_issueW;
int fp_decodeW;
int perThreadState;
int globalCheckpoint;
int instruction_length;
int pc_width;
int opcode_width;
int micro_opcode_length;
int num_hthreads;
int pipeline_stages;
int fp_pipeline_stages;
int num_pipelines;
int num_fp_pipelines;
int num_alus;
int num_muls;
double num_fpus;
int int_data_width, fp_data_width,v_address_width, p_address_width;
double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
bool regWindowing,multithreaded;
int int_data_width;
int fp_data_width;
int v_address_width;
int p_address_width;
bool regWindowing;
bool multithreaded;
double pppm_lkg_multhread[4];
double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
FPU_cdb_duty_cycle;
~CoreDynParam(){};
int ROB_size;
int ROB_assoc;
int ROB_nbanks;
int ROB_tag_width;
int scheduler_assoc;
int scheduler_nbanks;
int register_window_size;
double register_window_throughput;
double register_window_latency;
int register_window_assoc;
int register_window_nbanks;
int register_window_tag_width;
int register_window_rw_ports;
int phy_Regs_IRF_size;
int phy_Regs_IRF_assoc;
int phy_Regs_IRF_nbanks;
int phy_Regs_IRF_tag_width;
int phy_Regs_IRF_rd_ports;
int phy_Regs_IRF_wr_ports;
int phy_Regs_FRF_size;
int phy_Regs_FRF_assoc;
int phy_Regs_FRF_nbanks;
int phy_Regs_FRF_tag_width;
int phy_Regs_FRF_rd_ports;
int phy_Regs_FRF_wr_ports;
int front_rat_nbanks;
int front_rat_rw_ports;
int retire_rat_nbanks;
int retire_rat_rw_ports;
int freelist_nbanks;
int freelist_rw_ports;
int memory_ports;
int load_buffer_size;
int load_buffer_assoc;
int load_buffer_nbanks;
int store_buffer_size;
int store_buffer_assoc;
int store_buffer_nbanks;
int instruction_window_size;
int fp_instruction_window_size;
int instruction_buffer_size;
int instruction_buffer_assoc;
int instruction_buffer_nbanks;
int instruction_buffer_tag_width;
int number_instruction_fetch_ports;
int RAS_size;
int execu_int_bypass_ports;
int execu_mul_bypass_ports;
int execu_fp_bypass_ports;
Wire_type execu_bypass_wire_type;
Wire_type execu_broadcast_wt;
int execu_wire_mat_type;
double execu_bypass_base_width;
double execu_bypass_base_height;
int execu_bypass_start_wiring_level;
double execu_bypass_route_over_perc;
double broadcast_numerator;
};
class CacheDynParam {
class CoreStatistics {
public:
CacheDynParam(){};
CacheDynParam(ParseXML *XML_interface, int ithCache_);
string name;
enum Dir_type dir_ty;
double clockRate,executionTime;
double capacity, blockW, assoc, nbanks;
double throughput, latency;
double duty_cycle, dir_duty_cycle;
//double duty_cycle;
int missb_size, fu_size, prefetchb_size, wbb_size;
~CacheDynParam(){};
double pipeline_duty_cycle;
double total_cycles;
double busy_cycles;
double idle_cycles;
double IFU_duty_cycle;
double BR_duty_cycle;
double LSU_duty_cycle;
double MemManU_I_duty_cycle;
double MemManU_D_duty_cycle;
double ALU_duty_cycle;
double MUL_duty_cycle;
double FPU_duty_cycle;
double ALU_cdb_duty_cycle;
double MUL_cdb_duty_cycle;
double FPU_cdb_duty_cycle;
double ROB_reads;
double ROB_writes;
double total_instructions;
double int_instructions;
double fp_instructions;
double branch_instructions;
double branch_mispredictions;
double load_instructions;
double store_instructions;
double committed_instructions;
double committed_int_instructions;
double committed_fp_instructions;
double rename_reads;
double rename_writes;
double fp_rename_reads;
double fp_rename_writes;
double inst_window_reads;
double inst_window_writes;
double inst_window_wakeup_accesses;
double fp_inst_window_reads;
double fp_inst_window_writes;
double fp_inst_window_wakeup_accesses;
double int_regfile_reads;
double float_regfile_reads;
double int_regfile_writes;
double float_regfile_writes;
double context_switches;
double ialu_accesses;
double fpu_accesses;
double mul_accesses;
double cdb_alu_accesses;
double cdb_fpu_accesses;
double cdb_mul_accesses;
double function_calls;
};
class MCParam {
class MCParameters {
public:
MCParam(){};
MCParam(ParseXML *XML_interface, int ithCache_);
string name;
double clockRate,num_mcs, peakDataTransferRate, num_channels;
// double mcTEPowerperGhz;
// double mcPHYperGbit;
// double area;
int llcBlockSize, dataBusWidth, addressBusWidth;
int opcodeW;
int memAccesses;
int memRank;
int type;
double frontend_duty_cycle, duty_cycle, perc_load;
double executionTime, reads, writes;
bool LVDS, withPHY;
~MCParam(){};
double clockRate;
enum MemoryCtrl_type mc_type;
double num_mcs;
int num_channels;
int llcBlockSize;
int dataBusWidth;
int databus_width;
int llc_line_length;
int req_window_size_per_channel;
int IO_buffer_size_per_channel;
int addressbus_width;
int opcodeW;
int type;
bool LVDS;
bool withPHY;
int peak_transfer_rate;
int number_ranks;
int reorder_buffer_assoc;
int reorder_buffer_nbanks;
int read_buffer_assoc;
int read_buffer_nbanks;
int read_buffer_tag_width;
int write_buffer_assoc;
int write_buffer_nbanks;
int write_buffer_tag_width;
};
class NoCParam {
class MCStatistics {
public:
NoCParam(){};
NoCParam(ParseXML *XML_interface, int ithCache_);
string name;
double clockRate;
int flit_size;
int input_ports, output_ports, min_ports, global_linked_ports;
int virtual_channel_per_port,input_buffer_entries_per_vc;
int horizontal_nodes,vertical_nodes, total_nodes;
double executionTime, total_access, link_throughput,link_latency,
duty_cycle, chip_coverage, route_over_perc;
bool has_global_link, type;
~NoCParam(){};
double duty_cycle;
double perc_load;
double reads;
double writes;
};
class ProcParam {
public:
ProcParam(){};
ProcParam(ParseXML *XML_interface, int ithCache_);
string name;
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
~ProcParam(){};
class NIUParameters {
public:
double clockRate;
int num_units;
int type;
};
class NIUParam {
public:
NIUParam(){};
NIUParam(ParseXML *XML_interface, int ithCache_);
string name;
double clockRate;
int num_units;
int type;
double duty_cycle, perc_load;
~NIUParam(){};
class NIUStatistics {
public:
double duty_cycle;
double perc_load;
};
class PCIeParam {
public:
PCIeParam(){};
PCIeParam(ParseXML *XML_interface, int ithCache_);
string name;
double clockRate;
int num_channels, num_units;
bool withPHY;
int type;
double duty_cycle, perc_load;
~PCIeParam(){};
class PCIeParameters {
public:
double clockRate;
int num_channels;
int num_units;
bool withPHY;
int type;
};
class PCIeStatistics {
public:
double duty_cycle;
double perc_load;
};
#endif /* BASIC_COMPONENTS_H_ */

View file

@ -0,0 +1,179 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Joel Hestness
*
***************************************************************************/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <string>
#include "basic_circuit.h"
#include "bus_interconnect.h"
#include "common.h"
#include "const.h"
#include "io.h"
#include "parameter.h"
BusInterconnect::BusInterconnect(XMLNode* _xml_data,
InputParameter* interface_ip_)
: McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) {
name = "Bus Interconnect";
set_param_stats();
local_result = init_interface(&interface_ip, name);
scktRatio = g_tp.sckt_co_eff;
interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate;
interface_ip.latency = bus_params.link_latency / bus_params.clockRate;
link_len /= bus_params.total_nodes;
if (bus_params.total_nodes > 1) {
//All links are shared by neighbors
link_len /= 2;
}
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
bus_params.link_base_width,
bus_params.link_base_height,
bus_params.flit_size, link_len, &interface_ip,
bus_params.link_start_wiring_level,
bus_params.clockRate,
bus_params.pipelinable,
bus_params.route_over_perc);
children.push_back(link_bus);
}
void BusInterconnect::computeEnergy() {
// Initialize stats for TDP
tdp_stats.reset();
tdp_stats.readAc.access = bus_stats.duty_cycle;
link_bus->int_params.active_ports = bus_params.min_ports - 1;
link_bus->int_stats.duty_cycle =
bus_params.M_traffic_pattern * bus_stats.duty_cycle;
// Initialize stats for runtime energy and power
rtp_stats.reset();
rtp_stats.readAc.access = bus_stats.total_access;
link_bus->int_stats.accesses = bus_stats.total_access;
// Recursively compute energy
McPATComponent::computeEnergy();
}
void BusInterconnect::set_param_stats() {
memset(&bus_params, 0, sizeof(BusInterconnectParameters));
int num_children = xml_data->nChildNode("param");
int i;
int mat_type;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_FP_IF("clockrate", bus_params.clockRate);
ASSIGN_INT_IF("flit_bits", bus_params.flit_size);
ASSIGN_FP_IF("link_throughput", bus_params.link_throughput);
ASSIGN_FP_IF("link_latency", bus_params.link_latency);
ASSIGN_INT_IF("total_nodes", bus_params.total_nodes);
ASSIGN_INT_IF("input_ports", bus_params.input_ports);
ASSIGN_INT_IF("output_ports", bus_params.output_ports);
ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports);
ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage);
ASSIGN_INT_IF("pipelinable", bus_params.pipelinable);
ASSIGN_FP_IF("link_routing_over_percentage",
bus_params.route_over_perc);
ASSIGN_INT_IF("virtual_channel_per_port",
bus_params.virtual_channel_per_port);
ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern);
ASSIGN_FP_IF("link_len", link_len);
ASSIGN_FP_IF("link_base_width", bus_params.link_base_width);
ASSIGN_FP_IF("link_base_height", bus_params.link_base_height);
ASSIGN_FP_IF("link_start_wiring_level",
bus_params.link_start_wiring_level);
ASSIGN_INT_IF("wire_mat_type", mat_type);
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
bus_params.clockRate *= 1e6;
interface_ip.wire_is_mat_type = mat_type;
interface_ip.wire_os_mat_type = mat_type;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle);
ASSIGN_FP_IF("total_accesses", bus_stats.total_access);
else {
warnUnrecognizedStat(node_name);
}
}
clockRate = bus_params.clockRate;
bus_params.min_ports =
min(bus_params.input_ports, bus_params.output_ports);
assert(bus_params.chip_coverage <= 1);
assert(bus_params.route_over_perc <= 1);
assert(link_len > 0);
}
void
BusInterconnect::set_duty_cycle(double duty_cycle) {
bus_stats.duty_cycle = duty_cycle;
}
void
BusInterconnect::set_number_of_accesses(double total_accesses) {
bus_stats.total_access = total_accesses;
}
BusInterconnect::~BusInterconnect() {
delete link_bus;
link_bus = NULL;
}

View file

@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,65 +25,71 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Joel Hestness
*
***************************************************************************/
#ifndef SHAREDCACHE_H_
#define SHAREDCACHE_H_
#include <vector>
#ifndef BUS_INTERCONNECT_H_
#define BUS_INTERCONNECT_H_
#include "XML_Parse.h"
#include "area.h"
#include "array.h"
#include "basic_components.h"
#include "interconnect.h"
#include "logic.h"
#include "parameter.h"
class SharedCache :public Component{
public:
ParseXML * XML;
int ithCache;
InputParameter interface_ip;
enum cache_level cacheL;
DataCache unicache;//Shared cache
CacheDynParam cachep;
statsDef homenode_tdp_stats;
statsDef homenode_rtp_stats;
statsDef homenode_stats_t;
double dir_overhead;
// cache_processor llCache,directory, directory1, inv_dir;
//pipeline pipeLogicCache, pipeLogicDirectory;
//clock_network clockNetwork;
double scktRatio, executionTime;
// Component L2Tot, cc, cc1, ccTot;
SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2);
void set_cache_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
~SharedCache(){};
class BusInterconnectParameters {
public:
double clockRate;
int flit_size;
int input_ports;
int output_ports;
int min_ports;
int global_linked_ports;
int virtual_channel_per_port;
int input_buffer_entries_per_vc;
int total_nodes;
double link_throughput;
double link_latency;
double chip_coverage;
bool pipelinable;
double route_over_perc;
bool has_global_link;
bool type;
double M_traffic_pattern;
double link_base_width;
double link_base_height;
int link_start_wiring_level;
};
class CCdir :public Component{
public:
ParseXML * XML;
int ithCache;
InputParameter interface_ip;
DataCache dc;//Shared cache
ArrayST * shadow_dir;
// cache_processor llCache,directory, directory1, inv_dir;
//pipeline pipeLogicCache, pipeLogicDirectory;
//clock_network clockNetwork;
double scktRatio, clockRate, executionTime;
Component L2Tot, cc, cc1, ccTot;
CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
~CCdir();
class BusInterconnectStatistics {
public:
double duty_cycle;
double total_access;
};
#endif /* SHAREDCACHE_H_ */
class BusInterconnect : public McPATComponent {
public:
Interconnect* link_bus;
int ithNoC;
InputParameter interface_ip;
double link_len;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
BusInterconnectParameters bus_params;
BusInterconnectStatistics bus_stats;
uca_org_t local_result;
statsDef stats_t;
double M_traffic_pattern;
BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_param_stats();
void set_duty_cycle(double duty_cycle);
void set_number_of_accesses(double total_accesses);
void computeEnergy();
~BusInterconnect();
};
#endif /* BUS_INTERCONNECT_H_ */

321
ext/mcpat/cachearray.cc Normal file
View file

@ -0,0 +1,321 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#include <cmath>
#include <iostream>
#include "area.h"
#include "cachearray.h"
#include "common.h"
#include "decoder.h"
#include "parameter.h"
using namespace std;
double CacheArray::area_efficiency_threshold = 20.0;
int CacheArray::ed = 0;
//Fixed number, make sure timing can be satisfied.
int CacheArray::delay_wt = 100;
int CacheArray::cycle_time_wt = 1000;
//Fixed number, This is used to exhaustive search for individual components.
int CacheArray::area_wt = 10;
//Fixed number, This is used to exhaustive search for individual components.
int CacheArray::dynamic_power_wt = 10;
int CacheArray::leakage_power_wt = 10;
//Fixed number, make sure timing can be satisfied.
int CacheArray::delay_dev = 1000000;
int CacheArray::cycle_time_dev = 100;
//Fixed number, This is used to exhaustive search for individual components.
int CacheArray::area_dev = 1000000;
//Fixed number, This is used to exhaustive search for individual components.
int CacheArray::dynamic_power_dev = 1000000;
int CacheArray::leakage_power_dev = 1000000;
int CacheArray::cycle_time_dev_threshold = 10;
CacheArray::CacheArray(XMLNode* _xml_data,
const InputParameter *configure_interface, string _name,
enum Device_ty device_ty_, double _clockRate,
bool opt_local_, enum Core_type core_ty_, bool _is_default)
: McPATComponent(_xml_data), l_ip(*configure_interface),
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
is_default(_is_default), sbt_dir_overhead(0) {
name = _name;
clockRate = _clockRate;
if (l_ip.cache_sz < MIN_BUFFER_SIZE) {
l_ip.cache_sz = MIN_BUFFER_SIZE;
}
if (!l_ip.error_checking(name)) {
exit(1);
}
sbt_tdp_stats.reset();
sbt_rtp_stats.reset();
// Compute initial search point
local_result.valid = false;
compute_base_power();
// Set up the cache by searching design space with cacti
list<uca_org_t > candidate_solutions(0);
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
uca_org_t* temp_res = NULL;
double throughput = l_ip.throughput;
double latency = l_ip.latency;
bool throughput_overflow = true;
bool latency_overflow = true;
if ((local_result.cycle_time - throughput) <= 1e-10 )
throughput_overflow = false;
if ((local_result.access_time - latency) <= 1e-10)
latency_overflow = false;
if (opt_for_clk && opt_local) {
if (throughput_overflow || latency_overflow) {
l_ip.ed = ed;
l_ip.delay_wt = delay_wt;
l_ip.cycle_time_wt = cycle_time_wt;
l_ip.area_wt = area_wt;
l_ip.dynamic_power_wt = dynamic_power_wt;
l_ip.leakage_power_wt = leakage_power_wt;
l_ip.delay_dev = delay_dev;
l_ip.cycle_time_dev = cycle_time_dev;
l_ip.area_dev = area_dev;
l_ip.dynamic_power_dev = dynamic_power_dev;
l_ip.leakage_power_dev = leakage_power_dev;
//Reset overflow flag before start optimization iterations
throughput_overflow = true;
latency_overflow = true;
//Clean up the result for optimized for ED^2P
temp_res = &local_result;
temp_res->cleanup();
}
while ((throughput_overflow || latency_overflow) &&
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
compute_base_power();
//This is the time_dev to be used for next iteration
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
// from best area to worst area -->worst timing to best timing
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
(local_result.access_time - latency) <= 1e-10) ||
(local_result.data_array2->area_efficiency <
area_efficiency_threshold && l_ip.assoc == 0)) {
//if no satisfiable solution is found,the most aggressive one
//is left
candidate_solutions.push_back(local_result);
if (((local_result.cycle_time - throughput) <= 1e-10) &&
((local_result.access_time - latency) <= 1e-10)) {
//ensure stop opt not because of cam
throughput_overflow = false;
latency_overflow = false;
}
} else {
if ((local_result.cycle_time - throughput) <= 1e-10)
throughput_overflow = false;
if ((local_result.access_time - latency) <= 1e-10)
latency_overflow = false;
//if not >10 local_result is the last result, it cannot be
//cleaned up
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
//Only solutions not saved in the list need to be
//cleaned up
temp_res = &local_result;
temp_res->cleanup();
}
}
}
if (l_ip.assoc > 0) {
//For array structures except CAM and FA, Give warning but still
//provide a result with best timing found
if (throughput_overflow == true)
cout << "Warning: " << name
<< " array structure cannot satisfy throughput constraint."
<< endl;
if (latency_overflow == true)
cout << "Warning: " << name
<< " array structure cannot satisfy latency constraint."
<< endl;
}
double min_dynamic_energy = BIGNUM;
if (candidate_solutions.empty() == false) {
local_result.valid = true;
for (candidate_iter = candidate_solutions.begin();
candidate_iter != candidate_solutions.end();
++candidate_iter) {
if (min_dynamic_energy >
(candidate_iter)->power.readOp.dynamic) {
min_dynamic_energy =
(candidate_iter)->power.readOp.dynamic;
min_dynamic_energy_iter = candidate_iter;
local_result = *(min_dynamic_energy_iter);
} else {
candidate_iter->cleanup() ;
}
}
}
candidate_solutions.clear();
}
double long_channel_device_reduction =
longer_channel_device_reduction(device_ty, core_ty);
double macro_layout_overhead = g_tp.macro_layout_overhead;
double chip_PR_overhead = g_tp.chip_layout_overhead;
double total_overhead = macro_layout_overhead * chip_PR_overhead;
local_result.area *= total_overhead;
//maintain constant power density
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
double sckRation = g_tp.sckt_co_eff;
local_result.power.readOp.dynamic *= sckRation;
local_result.power.writeOp.dynamic *= sckRation;
local_result.power.searchOp.dynamic *= sckRation;
local_result.power.readOp.leakage *= l_ip.nbanks;
local_result.power.readOp.longer_channel_leakage =
local_result.power.readOp.leakage * long_channel_device_reduction;
local_result.power = local_result.power * pppm_t;
local_result.data_array2->power.readOp.dynamic *= sckRation;
local_result.data_array2->power.writeOp.dynamic *= sckRation;
local_result.data_array2->power.searchOp.dynamic *= sckRation;
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.data_array2->power.readOp.longer_channel_leakage =
local_result.data_array2->power.readOp.leakage *
long_channel_device_reduction;
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
local_result.tag_array2->power.readOp.dynamic *= sckRation;
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
local_result.tag_array2->power.readOp.longer_channel_leakage =
local_result.tag_array2->power.readOp.leakage *
long_channel_device_reduction;
local_result.tag_array2->power =
local_result.tag_array2->power * pppm_t;
}
}
void CacheArray::compute_base_power() {
local_result = cacti_interface(&l_ip);
}
void CacheArray::computeArea() {
area.set_area(local_result.area);
output_data.area = local_result.area / 1e6;
}
void CacheArray::computeEnergy() {
// Set the leakage power numbers
output_data.subthreshold_leakage_power = local_result.power.readOp.leakage;
output_data.gate_leakage_power = local_result.power.readOp.gate_leakage;
if (l_ip.assoc && l_ip.is_cache) {
// This is a standard cache array with data and tags
// Calculate peak dynamic power
output_data.peak_dynamic_power =
(local_result.tag_array2->power.readOp.dynamic +
local_result.data_array2->power.readOp.dynamic) *
tdp_stats.readAc.hit +
(local_result.tag_array2->power.readOp.dynamic) *
tdp_stats.readAc.miss +
(local_result.tag_array2->power.readOp.dynamic +
local_result.data_array2->power.writeOp.dynamic) *
tdp_stats.writeAc.hit +
(local_result.tag_array2->power.readOp.dynamic) *
tdp_stats.writeAc.miss;
output_data.peak_dynamic_power *= clockRate;
// Calculate the runtime dynamic power
output_data.runtime_dynamic_energy =
local_result.data_array2->power.readOp.dynamic *
rtp_stats.dataReadAc.access +
local_result.data_array2->power.writeOp.dynamic *
rtp_stats.dataWriteAc.access +
(local_result.tag_array2->power.readOp.dynamic *
rtp_stats.tagReadAc.access +
local_result.tag_array2->power.writeOp.dynamic *
rtp_stats.tagWriteAc.access) * l_ip.assoc;
} else {
// Calculate peak dynamic power
output_data.peak_dynamic_power =
local_result.power.readOp.dynamic * tdp_stats.readAc.access +
local_result.power.writeOp.dynamic * tdp_stats.writeAc.access +
local_result.power.searchOp.dynamic * tdp_stats.searchAc.access;
output_data.peak_dynamic_power *= clockRate;
// Calculate the runtime dynamic power
output_data.runtime_dynamic_energy =
local_result.power.readOp.dynamic * rtp_stats.readAc.access +
local_result.power.writeOp.dynamic * rtp_stats.writeAc.access +
local_result.power.searchOp.dynamic * rtp_stats.searchAc.access;
}
// An SBT directory has more dynamic power
if (sbt_dir_overhead > 0) {
// Calculate peak dynamic power
output_data.peak_dynamic_power +=
(computeSBTDynEnergy(&sbt_tdp_stats) * clockRate);
// Calculate the runtime dynamic power
output_data.runtime_dynamic_energy +=
computeSBTDynEnergy(&sbt_rtp_stats);
}
}
CacheArray::~CacheArray() {
local_result.cleanup();
}

117
ext/mcpat/cachearray.h Normal file
View file

@ -0,0 +1,117 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#ifndef CACHEARRAY_H_
#define CACHEARRAY_H_
#include <iostream>
#include <string>
#include "basic_components.h"
#include "cacti_interface.h"
#include "component.h"
#include "const.h"
#include "parameter.h"
class CacheArray : public McPATComponent {
public:
static double area_efficiency_threshold;
// These are used for the CACTI interface.
static int ed;
static int delay_wt;
static int cycle_time_wt;
static int area_wt;
static int dynamic_power_wt;
static int leakage_power_wt;
static int delay_dev;
static int cycle_time_dev;
static int area_dev;
static int dynamic_power_dev;
static int leakage_power_dev;
static int cycle_time_dev_threshold;
InputParameter l_ip;
enum Device_ty device_ty;
bool opt_local;
enum Core_type core_ty;
bool is_default;
uca_org_t local_result;
// These are only used for static bank tag (SBT) directory type.
double sbt_dir_overhead;
// Set this to contain SBT peak power stats
statsDef sbt_tdp_stats;
// Set this to contain SBT runtime power stats
statsDef sbt_rtp_stats;
CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface,
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
bool opt_local_ = true,
enum Core_type core_ty_ = Inorder, bool _is_default = true);
void computeArea();
void computeEnergy();
void compute_base_power();
void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; }
~CacheArray();
private:
double computeSBTDynEnergy(statsDef *sbt_stats_ptr);
};
extern inline
double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) {
if (sbt_dir_overhead == 0) {
return 0;
}
// Write miss on dynamic home node will generate a replacement write on
// whole cache block
double dynamic =
sbt_stats_p->readAc.hit *
(local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead +
local_result.tag_array2->power.readOp.dynamic) +
sbt_stats_p->readAc.miss *
local_result.tag_array2->power.readOp.dynamic +
sbt_stats_p->writeAc.miss *
local_result.tag_array2->power.readOp.dynamic +
sbt_stats_p->writeAc.hit *
(local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead +
local_result.tag_array2->power.readOp.dynamic+
sbt_stats_p->writeAc.miss *
local_result.power.writeOp.dynamic);
return dynamic;
}
#endif /* CACHEARRAY_H_ */

View file

@ -0,0 +1,42 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Joel Hestness
*
***************************************************************************/
#include "cachecontroller.h"
CacheController::CacheController(XMLNode* _xml_data,
InputParameter* _interface_ip)
: McPATComponent(_xml_data, _interface_ip) {
name = "Cache Controller";
clockRate = target_core_clockrate;
McPATComponent::recursiveInstantiate();
}

View file

@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,24 +25,21 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Joel Hestness
*
***************************************************************************/
#ifndef CACHECONTROLLER_H_
#define CACHECONTROLLER_H_
#ifndef GLOBALVAR_H_
#define GLOBALVAR_H_
#ifdef GLOBALVAR
#define EXTERN
#else
#define EXTERN extern
#endif
EXTERN bool opt_for_clk;
#endif /* GLOBALVAR_H_ */
#include "basic_components.h"
class CacheController : public McPATComponent {
public:
CacheController(XMLNode* _xml_data, InputParameter* _interface_ip);
~CacheController();
};
#endif /* CACHECONTROLLER_H_ */

647
ext/mcpat/cacheunit.cc Normal file
View file

@ -0,0 +1,647 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#include <algorithm>
#include <cmath>
#include <cstring>
#include <iostream>
#include "arbiter.h"
#include "array.h"
#include "basic_circuit.h"
#include "cachearray.h"
#include "cacheunit.h"
#include "common.h"
#include "const.h"
#include "io.h"
#include "logic.h"
#include "parameter.h"
bool CacheUnit::is_cache = true;
bool CacheUnit::pure_cam = false;
bool CacheUnit::opt_local = true;
bool CacheUnit::force_cache_config = false;
CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip)
: dir_overhead(0), McPATComponent(_xml_data, _interface_ip) {
int tag;
int data;
name = "Cache Unit";
CacheArray* arrayPtr = NULL;
set_cache_param_from_xml_data();
//All lower level cache are physically indexed and tagged.
double size;
double line;
double assoc;
double banks;
size = cache_params.capacity;
line = cache_params.blockW;
assoc = cache_params.assoc;
banks = cache_params.nbanks;
if ((cache_params.dir_ty == ST &&
cache_params.cache_level == L1Directory) ||
(cache_params.dir_ty == ST &&
cache_params.cache_level == L2Directory)) {
tag = physical_address_width + EXTRA_TAG_BITS;
} else {
tag = physical_address_width - int(ceil(log2(size / line / assoc))) -
int(ceil(log2(line))) + EXTRA_TAG_BITS;
if (cache_params.dir_ty == SBT) {
dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) *
BITS_PER_BYTE / (line * BITS_PER_BYTE);
line *= (1 + dir_overhead);
size *= (1 + dir_overhead);
}
}
interface_ip.cache_sz = (int)size;
interface_ip.line_sz = (int)line;
interface_ip.assoc = (int)assoc;
interface_ip.nbanks = (int)banks;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
if (cache_params.cache_level == L1) {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
} else {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
}
interface_ip.access_mode = cache_params.cache_access_mode;
interface_ip.throughput= cache_params.throughput;
interface_ip.latency = cache_params.latency;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_cache = is_cache;
interface_ip.pure_ram = cache_params.pure_ram;
interface_ip.pure_cam = pure_cam;
interface_ip.num_rw_ports = cache_params.cache_rw_ports;
interface_ip.num_rd_ports = cache_params.cache_rd_ports;
interface_ip.num_wr_ports = cache_params.cache_wr_ports;
interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports;
interface_ip.num_search_ports = cache_params.cache_search_ports;
arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays",
cache_params.device_ty, clockRate, opt_local,
cache_params.core_ty);
children.push_back(arrayPtr);
// This is for calculating TDP, which depends on the number of
// available ports
int num_tdp_ports = arrayPtr->l_ip.num_rw_ports +
arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports;
// Set new array stats for calculating TDP and runtime power
arrayPtr->tdp_stats.reset();
arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar *
num_tdp_ports * cache_stats.duty_cycle *
cache_stats.homenode_access_scalar;
arrayPtr->tdp_stats.readAc.miss = 0;
arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access -
arrayPtr->tdp_stats.readAc.miss;
arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar *
num_tdp_ports * cache_stats.duty_cycle *
cache_stats.homenode_access_scalar;
arrayPtr->tdp_stats.writeAc.miss = 0;
arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access -
arrayPtr->tdp_stats.writeAc.miss;
arrayPtr->tdp_stats.searchAc.access = 0;
arrayPtr->tdp_stats.searchAc.miss = 0;
arrayPtr->tdp_stats.searchAc.hit = 0;
arrayPtr->rtp_stats.reset();
if (cache_stats.use_detailed_stats) {
arrayPtr->rtp_stats.dataReadAc.access =
cache_stats.num_data_array_reads;
arrayPtr->rtp_stats.dataWriteAc.access =
cache_stats.num_data_array_writes;
arrayPtr->rtp_stats.tagReadAc.access =
cache_stats.num_tag_array_reads;
arrayPtr->rtp_stats.tagWriteAc.access =
cache_stats.num_tag_array_writes;
} else {
// This code makes assumptions. For instance, it assumes that
// tag and data arrays are accessed in parallel on a read request and
// this is a write-allocate cache. It also ignores any coherence
// requests. Using detailed stats as above can avoid the ambiguity
// that is introduced here
arrayPtr->rtp_stats.dataReadAc.access =
cache_stats.read_accesses + cache_stats.write_misses;
arrayPtr->rtp_stats.dataWriteAc.access =
cache_stats.write_accesses + cache_stats.read_misses;
arrayPtr->rtp_stats.tagReadAc.access =
cache_stats.read_accesses + cache_stats.write_accesses;
arrayPtr->rtp_stats.tagWriteAc.access =
cache_stats.read_misses + cache_stats.write_misses;
}
// Set SBT stats if this is an SBT directory type
if (dir_overhead > 0) {
arrayPtr->setSBTDirOverhead(dir_overhead);
// TDP stats
arrayPtr->sbt_tdp_stats.readAc.access =
cache_stats.tdp_read_access_scalar *
num_tdp_ports * cache_stats.dir_duty_cycle *
(1 - cache_stats.homenode_access_scalar);
arrayPtr->sbt_tdp_stats.readAc.miss = 0;
arrayPtr->sbt_tdp_stats.readAc.hit =
arrayPtr->sbt_tdp_stats.readAc.access -
arrayPtr->sbt_tdp_stats.readAc.miss;
arrayPtr->sbt_tdp_stats.writeAc.access =
cache_stats.tdp_sbt_write_access_scalar *
num_tdp_ports * cache_stats.dir_duty_cycle *
(1 - cache_stats.homenode_access_scalar);
arrayPtr->sbt_tdp_stats.writeAc.miss = 0;
arrayPtr->sbt_tdp_stats.writeAc.hit =
arrayPtr->sbt_tdp_stats.writeAc.access -
arrayPtr->sbt_tdp_stats.writeAc.miss;
// Runtime power stats
arrayPtr->sbt_rtp_stats.readAc.access =
cache_stats.homenode_read_accesses;
arrayPtr->sbt_rtp_stats.readAc.miss =
cache_stats.homenode_read_misses;
arrayPtr->sbt_rtp_stats.readAc.access =
cache_stats.homenode_read_accesses -
cache_stats.homenode_read_misses;
arrayPtr->sbt_rtp_stats.writeAc.access =
cache_stats.homenode_write_accesses;
arrayPtr->sbt_rtp_stats.writeAc.miss =
cache_stats.homenode_write_misses;
arrayPtr->sbt_rtp_stats.writeAc.hit =
cache_stats.homenode_write_accesses -
cache_stats.homenode_write_misses;
}
interface_ip.force_cache_config = force_cache_config;
if (!((cache_params.dir_ty == ST &&
cache_params.cache_level == L1Directory) ||
(cache_params.dir_ty == ST &&
cache_params.cache_level== L2Directory))) {
// Miss Buffer
tag = physical_address_width + EXTRA_TAG_BITS;
data = (physical_address_width) +
int(ceil(log2(size / cache_params.blockW))) +
(cache_params.blockW * BITS_PER_BYTE);
line = int(ceil(data / BITS_PER_BYTE));
size = cache_params.missb_size * line;
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = cache_params.missb_assoc;
interface_ip.nbanks = cache_params.missb_banks;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
if (cache_params.cache_level == L1) {
interface_ip.out_w = line * BITS_PER_BYTE;
} else {
interface_ip.out_w = line * BITS_PER_BYTE / 2;
}
interface_ip.access_mode = cache_params.miss_buff_access_mode;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_cache = is_cache;
interface_ip.pure_ram = cache_params.pure_ram;
interface_ip.pure_cam = pure_cam;
interface_ip.throughput = cache_params.throughput;
interface_ip.latency = cache_params.latency;
interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports;
interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports;
interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports;
interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports;
interface_ip.num_search_ports = cache_params.miss_buff_search_ports;
arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer",
cache_params.device_ty, clockRate, opt_local,
cache_params.core_ty);
children.push_back(arrayPtr);
arrayPtr->tdp_stats.reset();
arrayPtr->tdp_stats.readAc.access = 0;
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->rtp_stats.reset();
arrayPtr->rtp_stats.readAc.access =
cache_stats.read_misses + cache_stats.write_misses;
arrayPtr->rtp_stats.writeAc.access =
cache_stats.read_misses + cache_stats.write_misses;
arrayPtr->rtp_stats.searchAc.access = 0;
if (cache_params.dir_ty == SBT) {
arrayPtr->rtp_stats.readAc.access +=
cache_stats.homenode_write_misses;
arrayPtr->rtp_stats.writeAc.access +=
cache_stats.homenode_write_misses;
}
// Fill Buffer
tag = physical_address_width + EXTRA_TAG_BITS;
data = cache_params.blockW;
interface_ip.cache_sz = data * cache_params.fu_size;
interface_ip.line_sz = data;
interface_ip.assoc = cache_params.fu_assoc;
interface_ip.nbanks = cache_params.fu_banks;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
if (cache_params.cache_level == L1) {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
} else {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
}
interface_ip.access_mode = cache_params.fetch_buff_access_mode;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_cache = is_cache;
interface_ip.pure_cam = pure_cam;
interface_ip.throughput = cache_params.throughput;
interface_ip.latency = cache_params.latency;
interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports;
interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports;
interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports;
interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports;
interface_ip.num_search_ports = cache_params.fetch_buff_search_ports;
arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer",
cache_params.device_ty, clockRate, opt_local,
cache_params.core_ty);
children.push_back(arrayPtr);
arrayPtr->tdp_stats.reset();
arrayPtr->tdp_stats.readAc.access = 0;
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->rtp_stats.reset();
arrayPtr->rtp_stats.readAc.access =
cache_stats.read_misses + cache_stats.write_misses;
arrayPtr->rtp_stats.writeAc.access =
cache_stats.read_misses + cache_stats.write_misses;
arrayPtr->rtp_stats.searchAc.access = 0;
if (cache_params.dir_ty == SBT) {
arrayPtr->rtp_stats.readAc.access +=
cache_stats.homenode_write_misses;
arrayPtr->rtp_stats.writeAc.access +=
cache_stats.homenode_write_misses;
}
// Prefetch Buffer
tag = physical_address_width + EXTRA_TAG_BITS;
line = cache_params.blockW;
interface_ip.cache_sz = cache_params.prefetchb_size * line;
interface_ip.line_sz = line;
interface_ip.assoc = cache_params.prefetchb_assoc;
interface_ip.nbanks = cache_params.prefetchb_banks;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
if (cache_params.cache_level == L1) {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
} else {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
}
interface_ip.access_mode = cache_params.prefetch_buff_access_mode;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_cache = is_cache;
interface_ip.pure_ram = cache_params.pure_ram;
interface_ip.pure_cam = pure_cam;
interface_ip.throughput = cache_params.throughput;
interface_ip.latency = cache_params.latency;
interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports;
interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports;
interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports;
interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports;
interface_ip.num_search_ports = cache_params.pf_buff_search_ports;
arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer",
cache_params.device_ty, clockRate, opt_local,
cache_params.core_ty);
children.push_back(arrayPtr);
arrayPtr->tdp_stats.reset();
arrayPtr->tdp_stats.readAc.access = 0;
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
arrayPtr->rtp_stats.reset();
arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses;
arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses;
arrayPtr->rtp_stats.searchAc.access = 0;
if (cache_params.dir_ty == SBT) {
arrayPtr->rtp_stats.readAc.access +=
cache_stats.homenode_write_misses;
arrayPtr->rtp_stats.writeAc.access +=
cache_stats.homenode_write_misses;
}
// Writeback Buffer
if (cache_params.wbb_size > 0) {
tag = physical_address_width + EXTRA_TAG_BITS;
line = cache_params.blockW;
interface_ip.cache_sz = cache_params.wbb_size * line;
interface_ip.line_sz = line;
interface_ip.assoc = cache_params.wbb_assoc;
interface_ip.nbanks = cache_params.wbb_banks;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
if (cache_params.cache_level == L1) {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
} else {
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
}
interface_ip.access_mode = cache_params.writeback_buff_access_mode;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_cache = is_cache;
interface_ip.pure_ram = cache_params.pure_ram;
interface_ip.pure_cam = pure_cam;
interface_ip.throughput = cache_params.throughput;
interface_ip.latency = cache_params.latency;
interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports;
interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports;
interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports;
interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports;
interface_ip.num_search_ports = cache_params.wb_buff_search_ports;
arrayPtr = new CacheArray(xml_data, &interface_ip,
"Writeback Buffer",
cache_params.device_ty, clockRate,
opt_local, cache_params.core_ty);
children.push_back(arrayPtr);
arrayPtr->tdp_stats.reset();
arrayPtr->tdp_stats.readAc.access = 0;
arrayPtr->tdp_stats.writeAc.access =
arrayPtr->l_ip.num_search_ports;
arrayPtr->tdp_stats.searchAc.access =
arrayPtr->l_ip.num_search_ports;
arrayPtr->rtp_stats.reset();
arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses;
arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses;
arrayPtr->rtp_stats.searchAc.access = 0;
if (cache_params.dir_ty == SBT) {
arrayPtr->rtp_stats.readAc.access +=
cache_stats.homenode_write_misses;
arrayPtr->rtp_stats.writeAc.access +=
cache_stats.homenode_write_misses;
}
}
}
}
void CacheUnit::computeEnergy() {
McPATComponent::computeEnergy();
}
void CacheUnit::set_cache_param_from_xml_data() {
int level, type;
// Initialization... move this?
memset(&cache_params, 0, sizeof(CacheParameters));
memset(&cache_stats, 0, sizeof(CacheStatistics));
// By default, use the core clock frequency. This can be changed by
// setting the clockrate param in the XML definition of the CacheUnit
clockRate = target_core_clockrate;
XMLCSTR comp_name = xml_data->getAttribute("name");
if (comp_name) {
name = comp_name;
}
int num_children = xml_data->nChildNode("param");
int i;
int tech_type;
int mat_type;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("level", level);
ASSIGN_FP_IF("size", cache_params.capacity);
ASSIGN_FP_IF("block_size", cache_params.blockW);
ASSIGN_FP_IF("assoc", cache_params.assoc);
ASSIGN_FP_IF("num_banks", cache_params.nbanks);
ASSIGN_FP_IF("latency", cache_params.latency);
ASSIGN_FP_IF("throughput", cache_params.throughput);
ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size);
ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size);
ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size);
ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size);
ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc);
ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc);
ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc);
ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc);
ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks);
ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks);
ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks);
ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks);
ASSIGN_ENUM_IF("cache_access_mode",
cache_params.cache_access_mode, Access_mode);
ASSIGN_ENUM_IF("miss_buff_access_mode",
cache_params.miss_buff_access_mode, Access_mode);
ASSIGN_ENUM_IF("fetch_buff_access_mode",
cache_params.fetch_buff_access_mode, Access_mode);
ASSIGN_ENUM_IF("prefetch_buff_access_mode",
cache_params.prefetch_buff_access_mode, Access_mode);
ASSIGN_ENUM_IF("writeback_buff_access_mode",
cache_params.writeback_buff_access_mode, Access_mode);
ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports);
ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports);
ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports);
ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports);
ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports);
ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports);
ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports);
ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports);
ASSIGN_INT_IF("miss_buff_se_rd_ports" ,
cache_params.miss_buff_se_rd_ports);
ASSIGN_INT_IF("miss_buff_search_ports",
cache_params.miss_buff_search_ports);
ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports);
ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports);
ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports);
ASSIGN_INT_IF("fetch_buff_se_rd_ports",
cache_params.fetch_buff_se_rd_ports);
ASSIGN_INT_IF("fetch_buff_search_ports",
cache_params.fetch_buff_search_ports);
ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports);
ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports);
ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports);
ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports);
ASSIGN_INT_IF("pf_buff_search_ports",
cache_params.pf_buff_search_ports);
ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports);
ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports);
ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports);
ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports);
ASSIGN_INT_IF("wb_buff_search_ports",
cache_params.wb_buff_search_ports);
ASSIGN_FP_IF("clockrate", cache_params.clockRate);
ASSIGN_INT_IF("pure_ram", cache_params.pure_ram);
ASSIGN_INT_IF("tech_type", tech_type);
ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type);
ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty);
ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type);
ASSIGN_INT_IF("num_cores", cache_params.num_cores);
ASSIGN_INT_IF("wire_mat_type", mat_type);
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
cache_params.clockRate *= 1e6;
if (cache_params.clockRate > 0) {
clockRate = cache_params.clockRate;
}
interface_ip.data_arr_ram_cell_tech_type = tech_type;
interface_ip.data_arr_peri_global_tech_type = tech_type;
interface_ip.tag_arr_ram_cell_tech_type = tech_type;
interface_ip.tag_arr_peri_global_tech_type = tech_type;
interface_ip.wire_is_mat_type = mat_type;
interface_ip.wire_os_mat_type = mat_type;
switch(level) {
case 1:
cache_params.cache_level = L1;
break;
case 2:
cache_params.cache_level = L2;
break;
case 3:
cache_params.cache_level = L3;
break;
case 4:
cache_params.cache_level = L1Directory;
break;
case 5:
cache_params.cache_level = L2Directory;
break;
default:
fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n",
name.c_str(), level);
exit(1);
}
cache_stats.use_detailed_stats = false;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads);
ASSIGN_FP_IF("num_data_array_writes",
cache_stats.num_data_array_writes);
ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads);
ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes);
ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle);
ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses);
ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses);
ASSIGN_FP_IF("read_misses", cache_stats.read_misses);
ASSIGN_FP_IF("write_misses", cache_stats.write_misses);
ASSIGN_FP_IF("conflicts", cache_stats.conflicts);
ASSIGN_INT_IF("homenode_read_accesses",
cache_stats.homenode_read_accesses);
ASSIGN_INT_IF("homenode_write_accesses",
cache_stats.homenode_write_accesses);
ASSIGN_INT_IF("homenode_read_misses",
cache_stats.homenode_read_misses);
ASSIGN_INT_IF("homenode_write_misses",
cache_stats.homenode_write_misses);
ASSIGN_FP_IF("homenode_access_scalar",
cache_stats.homenode_access_scalar);
ASSIGN_FP_IF("tdp_read_access_scalar",
cache_stats.tdp_read_access_scalar);
ASSIGN_FP_IF("tdp_write_access_scalar",
cache_stats.tdp_write_access_scalar);
ASSIGN_FP_IF("tdp_sbt_write_access_scalar",
cache_stats.tdp_sbt_write_access_scalar);
ASSIGN_FP_IF("dir_duty_cycle",
cache_stats.dir_duty_cycle);
else {
warnUnrecognizedStat(node_name);
}
}
if (cache_stats.num_data_array_reads > 0 ||
cache_stats.num_data_array_writes > 0 ||
cache_stats.num_tag_array_reads > 0 ||
cache_stats.num_tag_array_writes > 0) {
cache_stats.use_detailed_stats = true;
calculate_runtime_data_and_tag = true;
}
}

167
ext/mcpat/cacheunit.h Normal file
View file

@ -0,0 +1,167 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#ifndef CACHEUNIT_H_
#define CACHEUNIT_H_
#include "area.h"
#include "array.h"
#include "basic_components.h"
#include "logic.h"
#include "parameter.h"
class CacheParameters {
public:
enum Dir_type dir_ty;
double clockRate;
double capacity;
double blockW;
double assoc;
double nbanks;
double throughput;
double latency;
int missb_size;
int fu_size;
int prefetchb_size;
int wbb_size;
int missb_assoc;
int fu_assoc;
int prefetchb_assoc;
int wbb_assoc;
int missb_banks;
int fu_banks;
int prefetchb_banks;
int wbb_banks;
enum Access_mode cache_access_mode;
enum Access_mode miss_buff_access_mode;
enum Access_mode fetch_buff_access_mode;
enum Access_mode prefetch_buff_access_mode;
enum Access_mode writeback_buff_access_mode;
int cache_rw_ports;
int cache_rd_ports;
int cache_wr_ports;
int cache_se_rd_ports;
int cache_search_ports;
int miss_buff_rw_ports;
int miss_buff_rd_ports;
int miss_buff_wr_ports;
int miss_buff_se_rd_ports;
int miss_buff_search_ports;
int fetch_buff_rw_ports;
int fetch_buff_rd_ports;
int fetch_buff_wr_ports;
int fetch_buff_se_rd_ports;
int fetch_buff_search_ports;
int pf_buff_rw_ports;
int pf_buff_rd_ports;
int pf_buff_wr_ports;
int pf_buff_se_rd_ports;
int pf_buff_search_ports;
int wb_buff_rw_ports;
int wb_buff_rd_ports;
int wb_buff_wr_ports;
int wb_buff_se_rd_ports;
int wb_buff_search_ports;
bool pure_ram;
enum CacheLevel cache_level;
enum Device_ty device_ty;
enum Core_type core_ty;
int num_cores;
};
class CacheStatistics {
public:
// Duty cycle is used for estimating TDP. It should reflect the highest
// sustainable rate of access to the cache unit in execution of a benchmark
// Default should be 1.0: one access per cycle
double duty_cycle;
// This duty cycle is only used for SBT directory types
double dir_duty_cycle;
// The following two stats are also used for estimating TDP.
double tdp_read_access_scalar;
double tdp_write_access_scalar;
// There are 2 ways to calculate dynamic power from activity statistics:
// Default is false
bool use_detailed_stats;
// 1) Count the number and type of accesses to each cache array
// splitting data and tag arrays (use_detailed_stats = true).
// These are extremely detailed statistics.
// read_misses and write_misses are still required for this method for
// various buffers associated with this cache.
double num_data_array_reads;
double num_data_array_writes;
double num_tag_array_reads;
double num_tag_array_writes;
// 2) Count the number and type of access to the cache unit and
// use them to extrapolate the number of accesses to the other
// subcomponents (cache arrays and buffers)
double read_accesses;
double write_accesses;
double read_misses;
double write_misses;
double conflicts;
// The following is only used for SBT directory types
int homenode_read_accesses;
int homenode_write_accesses;
int homenode_read_misses;
int homenode_write_misses;
double homenode_access_scalar;
double tdp_sbt_write_access_scalar;
};
class CacheUnit : public McPATComponent {
public:
static bool is_cache;
static bool pure_cam;
// This is used for CacheArray objects
static bool opt_local;
static bool force_cache_config;
int ithCache;
CacheParameters cache_params;
CacheStatistics cache_stats;
Cache_type cacheType;
bool calculate_runtime_data_and_tag;
double dir_overhead;
double scktRatio;
// TODO: REMOVE _interface_ip... It promotes a mess. Find a better way...
CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip);
void set_cache_param_from_xml_data();
void computeEnergy();
~CacheUnit() {};
};
#endif /* CACHEUNIT_H_ */

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -39,9 +40,8 @@
#include "nuca.h"
#include "router.h"
class min_values_t
{
public:
class min_values_t {
public:
double min_delay;
double min_dyn;
double min_leakage;
@ -58,17 +58,16 @@ class min_values_t
struct solution
{
int tag_array_index;
int data_array_index;
list<mem_array *>::iterator tag_array_iter;
list<mem_array *>::iterator data_array_iter;
double access_time;
double cycle_time;
double area;
double efficiency;
powerDef total_power;
struct solution {
int tag_array_index;
int data_array_index;
list<mem_array *>::iterator tag_array_iter;
list<mem_array *>::iterator data_array_iter;
double access_time;
double cycle_time;
double area;
double efficiency;
powerDef total_power;
};
@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res);
void init_tech_params(double tech, bool is_tag);
struct calc_time_mt_wrapper_struct
{
uint32_t tid;
bool is_tag;
bool pure_ram;
bool pure_cam;
bool is_main_mem;
double Nspd_min;
struct calc_time_mt_wrapper_struct {
uint32_t tid;
bool is_tag;
bool pure_ram;
bool pure_cam;
bool is_main_mem;
double Nspd_min;
min_values_t * data_res;
min_values_t * tag_res;
min_values_t * data_res;
min_values_t * tag_res;
list<mem_array *> data_arr;
list<mem_array *> tag_arr;
list<mem_array *> data_arr;
list<mem_array *> tag_arr;
};
void *calc_time_mt_wrapper(void * void_obj);

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -36,95 +37,107 @@ Arbiter::Arbiter(
double flit_size_,
double output_len,
TechnologyParameter::DeviceType *dt
):R(n_req), flit_size(flit_size_),
o_len (output_len), deviceType(dt)
{
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
Vdd = dt->Vdd;
double technology = g_ip->F_sz_um;
NTn1 = 13.5*technology/2;
PTn1 = 76*technology/2;
NTn2 = 13.5*technology/2;
PTn2 = 76*technology/2;
NTi = 12.5*technology/2;
PTi = 25*technology/2;
NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
PTtr = 20*technology/2; /* pmos tr. length*/
): R(n_req), flit_size(flit_size_),
o_len (output_len), deviceType(dt) {
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
Vdd = dt->Vdd;
double technology = g_ip->F_sz_um;
NTn1 = 13.5 * technology / 2;
PTn1 = 76 * technology / 2;
NTn2 = 13.5 * technology / 2;
PTn2 = 76 * technology / 2;
NTi = 12.5 * technology / 2;
PTi = 25 * technology / 2;
NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/
PTtr = 20 * technology / 2; /* pmos tr. length*/
}
Arbiter::~Arbiter(){}
Arbiter::~Arbiter() {}
double
Arbiter::arb_req() {
double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
return temp;
double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 *
gate_C(NTn2, 0) +
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) +
drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
return temp;
}
double
Arbiter::arb_pri() {
double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
of flip-flop is ignored */
return temp;
/* switching capacitance of flip-flop is ignored */
double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0));
return temp;
}
double
Arbiter::arb_grant() {
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
return temp;
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
return temp;
}
double
Arbiter::arb_int() {
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
return temp;
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
2 * gate_C(NTn2, 0) + gate_C(PTn2, 0));
return temp;
}
void
Arbiter::compute_power() {
power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() *
Vdd * Vdd / 2 +
arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd *
Vdd);
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
min_w_pmos * PTn1 * 2, 2, nor);
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R,
min_w_pmos * PTn2 * R, 2, nor);
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi,
min_w_pmos * PTi, 1, inv);
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
min_w_pmos * PTn1 * 2, 2, nor);
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R,
min_w_pmos * PTn2 * R, 2, nor);
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi,
min_w_pmos * PTi, 1, inv);
//FIXME include priority table leakage
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd;
power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd +
not_leak_gate * Vdd;
}
double //wire cap with triple spacing
Arbiter::Cw3(double length) {
Wire wc(g_ip->wt, length, 1, 3, 3);
double temp = (wc.wire_cap(length,true));
return temp;
Wire wc(g_ip->wt, length, 1, 3, 3);
double temp = (wc.wire_cap(length, true));
return temp;
}
double
Arbiter::crossbar_ctrline() {
double temp = (Cw3(o_len * 1e-6 /* m */) +
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
gate_C(NTi, 0) + gate_C(PTi, 0));
return temp;
double temp = (Cw3(o_len * 1e-6 /* m */) +
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
gate_C(NTi, 0) + gate_C(PTi, 0));
return temp;
}
double
Arbiter::transmission_buf_ctrcap() {
double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
return temp;
double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0);
return temp;
}
void Arbiter::print_arbiter()
{
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
cout << "Flit size : " << flit_size << " bits" << endl;
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
void Arbiter::print_arbiter() {
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
cout << "Flit size : " << flit_size << " bits" << endl;
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
}

266
ext/mcpat/cacti/bank.cc Executable file → Normal file
View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -36,163 +37,174 @@
#include "bank.h"
Bank::Bank(const DynamicParameter & dyn_p):
dp(dyn_p), mat(dp),
num_addr_b_mat(dyn_p.number_addr_bits_mat),
num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
{
int RWP;
int ERP;
int EWP;
int SCHP;
dp(dyn_p), mat(dp),
num_addr_b_mat(dyn_p.number_addr_bits_mat),
num_mats_hor_dir(dyn_p.num_mats_h_dir),
num_mats_ver_dir(dyn_p.num_mats_v_dir) {
int RWP;
int ERP;
int EWP;
int SCHP;
if (dp.use_inp_params)
{
RWP = dp.num_rw_ports;
ERP = dp.num_rd_ports;
EWP = dp.num_wr_ports;
SCHP = dp.num_search_ports;
}
else
{
RWP = g_ip->num_rw_ports;
ERP = g_ip->num_rd_ports;
EWP = g_ip->num_wr_ports;
SCHP = g_ip->num_search_ports;
}
int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
int searchinbits;
int searchoutbits;
if (dp.fully_assoc || dp.pure_cam)
{
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
searchinbits = dp.num_si_b_bank_per_port * SCHP;
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
}
if (!(dp.fully_assoc || dp.pure_cam))
{
if (g_ip->fast_access && dp.is_tag == false)
{
dataoutbits *= g_ip->data_assoc;
if (dp.use_inp_params) {
RWP = dp.num_rw_ports;
ERP = dp.num_rd_ports;
EWP = dp.num_wr_ports;
SCHP = dp.num_search_ports;
} else {
RWP = g_ip->num_rw_ports;
ERP = g_ip->num_rd_ports;
EWP = g_ip->num_wr_ports;
SCHP = g_ip->num_search_ports;
}
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
int total_addrbits = (dp.number_addr_bits_mat +
dp.number_subbanks_decode) * (RWP + ERP + EWP);
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
int searchinbits;
int searchoutbits;
if (dp.fully_assoc || dp.pure_cam) {
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
searchinbits = dp.num_si_b_bank_per_port * SCHP;
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
}
if (!(dp.fully_assoc || dp.pure_cam)) {
if (g_ip->fast_access && dp.is_tag == false) {
dataoutbits *= g_ip->data_assoc;
}
htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w,
(double)mat.area.h,
total_addrbits, datainbits, 0, dataoutbits,
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
Add_htree);
htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w,
(double)mat.area.h,
total_addrbits, datainbits, 0, dataoutbits,
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
Data_in_htree);
htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w,
(double)mat.area.h,
total_addrbits, datainbits, 0, dataoutbits,
0, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Data_out_htree);
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
}
else
{
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
} else {
htree_in_add =
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits, dataoutbits,
searchoutbits, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Add_htree);
htree_in_data =
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits, dataoutbits,
searchoutbits, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Data_in_htree);
htree_out_data =
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits, dataoutbits,
searchoutbits, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Data_out_htree);
htree_in_search =
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits, dataoutbits,
searchoutbits, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Data_in_htree, true, true);
htree_out_search =
new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h,
total_addrbits, datainbits, searchinbits, dataoutbits,
searchoutbits, num_mats_ver_dir * 2,
num_mats_hor_dir * 2, Data_out_htree, true);
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
}
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
}
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
num_addr_b_routed_to_mat_for_rd_or_wr =
num_addr_b_mat - num_addr_b_row_dec;
}
Bank::~Bank()
{
delete htree_in_add;
delete htree_out_data;
delete htree_in_data;
if (dp.fully_assoc || dp.pure_cam)
{
delete htree_in_search;
delete htree_out_search;
}
Bank::~Bank() {
delete htree_in_add;
delete htree_out_data;
delete htree_in_data;
if (dp.fully_assoc || dp.pure_cam) {
delete htree_in_search;
delete htree_out_search;
}
}
double Bank::compute_delays(double inrisetime)
{
return mat.compute_delays(inrisetime);
double Bank::compute_delays(double inrisetime) {
return mat.compute_delays(inrisetime);
}
void Bank::compute_power_energy()
{
mat.compute_power_energy();
void Bank::compute_power_energy() {
mat.compute_power_energy();
if (!(dp.fully_assoc || dp.pure_cam))
{
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
if (!(dp.fully_assoc || dp.pure_cam)) {
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
power.readOp.leakage += htree_in_add->power.readOp.leakage;
power.readOp.leakage += htree_in_data->power.readOp.leakage;
power.readOp.leakage += htree_out_data->power.readOp.leakage;
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
}
else
{
power.readOp.leakage += htree_in_add->power.readOp.leakage;
power.readOp.leakage += htree_in_data->power.readOp.leakage;
power.readOp.leakage += htree_out_data->power.readOp.leakage;
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
} else {
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
mat.power_sa.searchOp.dynamic +
mat.power_bitline.searchOp.dynamic +
mat.power_subarray_out_drv.searchOp.dynamic+
mat.ml_to_ram_wl_drv->power.readOp.dynamic;
power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
mat.power_sa.searchOp.dynamic +
mat.power_bitline.searchOp.dynamic +
mat.power_subarray_out_drv.searchOp.dynamic +
mat.ml_to_ram_wl_drv->power.readOp.dynamic;
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
power.readOp.leakage += htree_in_add->power.readOp.leakage;
power.readOp.leakage += htree_in_data->power.readOp.leakage;
power.readOp.leakage += htree_out_data->power.readOp.leakage;
power.readOp.leakage += htree_in_search->power.readOp.leakage;
power.readOp.leakage += htree_out_search->power.readOp.leakage;
power.readOp.leakage += htree_in_add->power.readOp.leakage;
power.readOp.leakage += htree_in_data->power.readOp.leakage;
power.readOp.leakage += htree_out_data->power.readOp.leakage;
power.readOp.leakage += htree_in_search->power.readOp.leakage;
power.readOp.leakage += htree_out_search->power.readOp.leakage;
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
}
}
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -39,9 +40,8 @@
#include "htree2.h"
#include "mat.h"
class Bank : public Component
{
public:
class Bank : public Component {
public:
Bank(const DynamicParameter & dyn_p);
~Bank();
double compute_delays(double inrisetime); // return outrisetime

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -49,10 +50,10 @@ int combination(int n, int m);
//#define DBG
#ifdef DBG
#define PRINTDW(a);\
#define PRINTDW(a);\
a;
#else
#define PRINTDW(a);\
#define PRINTDW(a);\
#endif
@ -76,7 +77,7 @@ enum Htree_type {
enum Gate_type {
nmos,
pmos,
inv,
inv,
nand,
nor,
tri,
@ -164,13 +165,13 @@ double cmos_Ig_n(
double nWidth,
bool _is_dram = false,
bool _is_cell = false,
bool _is_wl_tr= false);
bool _is_wl_tr = false);
double cmos_Ig_p(
double pWidth,
bool _is_dram = false,
bool _is_cell = false,
bool _is_wl_tr= false);
bool _is_wl_tr = false);
double cmos_Isub_leakage(
@ -220,29 +221,29 @@ double shortcircuit_simple(
double vdd);
//set power point product mask; strictly speaking this is not real point product
inline void set_pppm(
double * pppv,
double a=1,
double b=1,
double c=1,
double d=1
){
pppv[0]= a;
pppv[1]= b;
pppv[2]= c;
pppv[3]= d;
double * pppv,
double a = 1,
double b = 1,
double c = 1,
double d = 1
) {
pppv[0] = a;
pppv[1] = b;
pppv[2] = c;
pppv[3] = d;
}
inline void set_sppm(
double * sppv,
double a=1,
double b=1,
double c=1,
double d=1
){
sppv[0]= a;
sppv[1]= b;
sppv[2]= c;
double * sppv,
double a = 1,
double b = 1,
double c = 1,
double d = 1
) {
sppv[0] = a;
sppv[1] = b;
sppv[2] = c;
}
#endif

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -47,127 +48,107 @@
using namespace std;
bool mem_array::lt(const mem_array * m1, const mem_array * m2)
{
if (m1->Nspd < m2->Nspd) return true;
else if (m1->Nspd > m2->Nspd) return false;
else if (m1->Ndwl < m2->Ndwl) return true;
else if (m1->Ndwl > m2->Ndwl) return false;
else if (m1->Ndbl < m2->Ndbl) return true;
else if (m1->Ndbl > m2->Ndbl) return false;
else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
else return false;
bool mem_array::lt(const mem_array * m1, const mem_array * m2) {
if (m1->Nspd < m2->Nspd) return true;
else if (m1->Nspd > m2->Nspd) return false;
else if (m1->Ndwl < m2->Ndwl) return true;
else if (m1->Ndwl > m2->Ndwl) return false;
else if (m1->Ndbl < m2->Ndbl) return true;
else if (m1->Ndbl > m2->Ndbl) return false;
else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
else return false;
}
void uca_org_t::find_delay()
{
mem_array * data_arr = data_array2;
mem_array * tag_arr = tag_array2;
void uca_org_t::find_delay() {
mem_array * data_arr = data_array2;
mem_array * tag_arr = tag_array2;
// check whether it is a regular cache or scratch ram
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
{
access_time = data_arr->access_time;
}
// Both tag and data lookup happen in parallel
// and the entire set is sent over the data array h-tree without
// waiting for the way-select signal --TODO add the corresponding
// power overhead Nav
else if (g_ip->fast_access == true)
{
access_time = MAX(tag_arr->access_time, data_arr->access_time);
}
// Tag is accessed first. On a hit, way-select signal along with the
// address is sent to read/write the appropriate block in the data
// array
else if (g_ip->is_seq_acc == true)
{
access_time = tag_arr->access_time + data_arr->access_time;
}
// Normal access: tag array access and data array access happen in parallel.
// But, the data array will wait for the way-select and transfer only the
// appropriate block over the h-tree.
else
{
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
data_arr->delay_before_subarray_output_driver) +
data_arr->delay_from_subarray_output_driver_to_output;
}
}
void uca_org_t::find_energy()
{
if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
power = data_array2->power + tag_array2->power;
else
power = data_array2->power;
}
void uca_org_t::find_area()
{
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
{
cache_ht = data_array2->height;
cache_len = data_array2->width;
}
else
{
cache_ht = MAX(tag_array2->height, data_array2->height);
cache_len = tag_array2->width + data_array2->width;
}
area = cache_ht * cache_len;
}
void uca_org_t::adjust_area()
{
double area_adjust;
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
{
if (data_array2->area_efficiency/100.0<0.2)
{
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
cache_ht = cache_ht/area_adjust;
cache_len = cache_len/area_adjust;
// check whether it is a regular cache or scratch ram
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
access_time = data_arr->access_time;
}
// Both tag and data lookup happen in parallel
// and the entire set is sent over the data array h-tree without
// waiting for the way-select signal --TODO add the corresponding
// power overhead Nav
else if (g_ip->fast_access == true) {
access_time = MAX(tag_arr->access_time, data_arr->access_time);
}
// Tag is accessed first. On a hit, way-select signal along with the
// address is sent to read/write the appropriate block in the data
// array
else if (g_ip->is_seq_acc == true) {
access_time = tag_arr->access_time + data_arr->access_time;
}
// Normal access: tag array access and data array access happen in parallel.
// But, the data array will wait for the way-select and transfer only the
// appropriate block over the h-tree.
else {
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
data_arr->delay_before_subarray_output_driver) +
data_arr->delay_from_subarray_output_driver_to_output;
}
}
area = cache_ht * cache_len;
}
void uca_org_t::find_cyc()
{
if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
{
cycle_time = data_array2->cycle_time;
}
else
{
cycle_time = MAX(tag_array2->cycle_time,
data_array2->cycle_time);
}
void uca_org_t::find_energy() {
if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc))
power = data_array2->power + tag_array2->power;
else
power = data_array2->power;
}
void uca_org_t::find_area() {
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
cache_ht = data_array2->height;
cache_len = data_array2->width;
} else {
cache_ht = MAX(tag_array2->height, data_array2->height);
cache_len = tag_array2->width + data_array2->width;
}
area = cache_ht * cache_len;
}
void uca_org_t::adjust_area() {
double area_adjust;
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
if (data_array2->area_efficiency / 100.0 < 0.2) {
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0));
cache_ht = cache_ht / area_adjust;
cache_len = cache_len / area_adjust;
}
}
area = cache_ht * cache_len;
}
void uca_org_t::find_cyc() {
if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
cycle_time = data_array2->cycle_time;
} else {
cycle_time = MAX(tag_array2->cycle_time,
data_array2->cycle_time);
}
}
uca_org_t :: uca_org_t()
:tag_array2(0),
data_array2(0)
{
: tag_array2(0),
data_array2(0) {
}
void uca_org_t :: cleanup()
{
if (data_array2!=0)
delete data_array2;
if (tag_array2!=0)
delete tag_array2;
void uca_org_t :: cleanup() {
if (data_array2 != 0)
delete data_array2;
if (tag_array2 != 0)
delete tag_array2;
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -50,9 +51,8 @@ class mem_array;
class uca_org_t;
class powerComponents
{
public:
class powerComponents {
public:
double dynamic;
double leakage;
double gate_leakage;
@ -60,17 +60,24 @@ class powerComponents
double longer_channel_leakage;
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
powerComponents(const powerComponents & obj) { *this = obj; }
powerComponents & operator=(const powerComponents & rhs)
{
dynamic = rhs.dynamic;
leakage = rhs.leakage;
gate_leakage = rhs.gate_leakage;
short_circuit = rhs.short_circuit;
longer_channel_leakage = rhs.longer_channel_leakage;
return *this;
powerComponents(const powerComponents & obj) {
*this = obj;
}
powerComponents & operator=(const powerComponents & rhs) {
dynamic = rhs.dynamic;
leakage = rhs.leakage;
gate_leakage = rhs.gate_leakage;
short_circuit = rhs.short_circuit;
longer_channel_leakage = rhs.longer_channel_leakage;
return *this;
}
void reset() {
dynamic = 0;
leakage = 0;
gate_leakage = 0;
short_circuit = 0;
longer_channel_leakage = 0;
}
void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
friend powerComponents operator*(const powerComponents & x, double const * const y);
@ -78,22 +85,24 @@ class powerComponents
class powerDef
{
public:
class powerDef {
public:
powerComponents readOp;
powerComponents writeOp;
powerComponents searchOp;//Sheng: for CAM and FA
powerDef() : readOp(), writeOp(), searchOp() { }
void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
void reset() {
readOp.reset();
writeOp.reset();
searchOp.reset();
}
friend powerDef operator+(const powerDef & x, const powerDef & y);
friend powerDef operator*(const powerDef & x, double const * const y);
};
enum Wire_type
{
enum Wire_type {
Global /* gloabl wires with repeaters */,
Global_5 /* 5% delay penalty */,
Global_10 /* 10% delay penalty */,
@ -108,12 +117,12 @@ enum Wire_type
class InputParameter
{
public:
class InputParameter {
public:
void parse_cfg(const string & infile);
bool error_checking(); // return false if the input parameters are problematic
// return false if the input parameters are problematic
bool error_checking(string name = "CACTI");
void display_ip();
unsigned int cache_sz; // in bytes
@ -172,14 +181,14 @@ class InputParameter
int force_nuca_bank;
int delay_wt, dynamic_power_wt, leakage_power_wt,
cycle_time_wt, area_wt;
cycle_time_wt, area_wt;
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
cycle_time_wt_nuca, area_wt_nuca;
cycle_time_wt_nuca, area_wt_nuca;
int delay_dev, dynamic_power_dev, leakage_power_dev,
cycle_time_dev, area_dev;
cycle_time_dev, area_dev;
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
cycle_time_dev_nuca, area_dev_nuca;
cycle_time_dev_nuca, area_dev_nuca;
int ed; //ED or ED2 optimization
int nuca;
@ -194,167 +203,113 @@ class InputParameter
bool add_ecc_b_;
//parameters for design constraint
double throughput;
double latency;
bool pipelinable;
int pipeline_stages;
int per_stage_vector;
bool with_clock_grid;
//parameters for design constraint
double throughput;
double latency;
bool pipelinable;
int pipeline_stages;
int per_stage_vector;
bool with_clock_grid;
};
typedef struct{
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
int number_activated_mats_horizontal_direction;
int number_subbanks;
int page_size_in_bits;
double delay_route_to_bank;
double delay_crossbar;
double delay_addr_din_horizontal_htree;
double delay_addr_din_vertical_htree;
double delay_row_predecode_driver_and_block;
double delay_row_decoder;
double delay_bitlines;
double delay_sense_amp;
double delay_subarray_output_driver;
double delay_bit_mux_predecode_driver_and_block;
double delay_bit_mux_decoder;
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
double delay_senseamp_mux_lev_1_decoder;
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
double delay_senseamp_mux_lev_2_decoder;
double delay_input_htree;
double delay_output_htree;
double delay_dout_vertical_htree;
double delay_dout_horizontal_htree;
double delay_comparator;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double delay_request_network;
double delay_inside_mat;
double delay_reply_network;
double trcd;
double cas_latency;
double precharge_delay;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_addr_horizontal_htree;
powerDef power_datain_horizontal_htree;
powerDef power_dataout_horizontal_htree;
powerDef power_addr_vertical_htree;
powerDef power_datain_vertical_htree;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_crossbar;
powerDef total_power;
double area;
double all_banks_height;
double all_banks_width;
double bank_height;
double bank_width;
double subarray_memory_cell_area_height;
double subarray_memory_cell_area_width;
double mat_height;
double mat_width;
double routing_area_height_within_bank;
double routing_area_width_within_bank;
double area_efficiency;
// double perc_power_dyn_routing_to_bank;
// double perc_power_dyn_addr_horizontal_htree;
// double perc_power_dyn_datain_horizontal_htree;
// double perc_power_dyn_dataout_horizontal_htree;
// double perc_power_dyn_addr_vertical_htree;
// double perc_power_dyn_datain_vertical_htree;
// double perc_power_dyn_row_predecoder_drivers;
// double perc_power_dyn_row_predecoder_blocks;
// double perc_power_dyn_row_decoders;
// double perc_power_dyn_bit_mux_predecoder_drivers;
// double perc_power_dyn_bit_mux_predecoder_blocks;
// double perc_power_dyn_bit_mux_decoders;
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
// double perc_power_dyn_senseamp_mux_lev_1_decoders;
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
// double perc_power_dyn_senseamp_mux_lev_2_decoders;
// double perc_power_dyn_bitlines;
// double perc_power_dyn_sense_amps;
// double perc_power_dyn_prechg_eq_drivers;
// double perc_power_dyn_subarray_output_drivers;
// double perc_power_dyn_dataout_vertical_htree;
// double perc_power_dyn_comparators;
// double perc_power_dyn_crossbar;
// double perc_power_dyn_spent_outside_mats;
// double perc_power_leak_routing_to_bank;
// double perc_power_leak_addr_horizontal_htree;
// double perc_power_leak_datain_horizontal_htree;
// double perc_power_leak_dataout_horizontal_htree;
// double perc_power_leak_addr_vertical_htree;
// double perc_power_leak_datain_vertical_htree;
// double perc_power_leak_row_predecoder_drivers;
// double perc_power_leak_row_predecoder_blocks;
// double perc_power_leak_row_decoders;
// double perc_power_leak_bit_mux_predecoder_drivers;
// double perc_power_leak_bit_mux_predecoder_blocks;
// double perc_power_leak_bit_mux_decoders;
// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
// double perc_power_leak_senseamp_mux_lev_1_decoders;
// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
// double perc_power_leak_senseamp_mux_lev_2_decoders;
// double perc_power_leak_bitlines;
// double perc_power_leak_sense_amps;
// double perc_power_leak_prechg_eq_drivers;
// double perc_power_leak_subarray_output_drivers;
// double perc_power_leak_dataout_vertical_htree;
// double perc_power_leak_comparators;
// double perc_power_leak_crossbar;
// double perc_leak_mats;
// double perc_active_mats;
double refresh_power;
double dram_refresh_period;
double dram_array_availability;
double dyn_read_energy_from_closed_page;
double dyn_read_energy_from_open_page;
double leak_power_subbank_closed_page;
double leak_power_subbank_open_page;
double leak_power_request_and_reply_networks;
double activate_energy;
double read_energy;
double write_energy;
double precharge_energy;
typedef struct {
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
int number_activated_mats_horizontal_direction;
int number_subbanks;
int page_size_in_bits;
double delay_route_to_bank;
double delay_crossbar;
double delay_addr_din_horizontal_htree;
double delay_addr_din_vertical_htree;
double delay_row_predecode_driver_and_block;
double delay_row_decoder;
double delay_bitlines;
double delay_sense_amp;
double delay_subarray_output_driver;
double delay_bit_mux_predecode_driver_and_block;
double delay_bit_mux_decoder;
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
double delay_senseamp_mux_lev_1_decoder;
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
double delay_senseamp_mux_lev_2_decoder;
double delay_input_htree;
double delay_output_htree;
double delay_dout_vertical_htree;
double delay_dout_horizontal_htree;
double delay_comparator;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double delay_request_network;
double delay_inside_mat;
double delay_reply_network;
double trcd;
double cas_latency;
double precharge_delay;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_addr_horizontal_htree;
powerDef power_datain_horizontal_htree;
powerDef power_dataout_horizontal_htree;
powerDef power_addr_vertical_htree;
powerDef power_datain_vertical_htree;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_crossbar;
powerDef total_power;
double area;
double all_banks_height;
double all_banks_width;
double bank_height;
double bank_width;
double subarray_memory_cell_area_height;
double subarray_memory_cell_area_width;
double mat_height;
double mat_width;
double routing_area_height_within_bank;
double routing_area_width_within_bank;
double area_efficiency;
double refresh_power;
double dram_refresh_period;
double dram_array_availability;
double dyn_read_energy_from_closed_page;
double dyn_read_energy_from_open_page;
double leak_power_subbank_closed_page;
double leak_power_subbank_open_page;
double leak_power_request_and_reply_networks;
double activate_energy;
double read_energy;
double write_energy;
double precharge_energy;
} results_mem_array;
class uca_org_t
{
public:
class uca_org_t {
public:
mem_array * tag_array2;
mem_array * data_array2;
double access_time;
@ -378,7 +333,7 @@ class uca_org_t
void find_cyc();
void adjust_area();//for McPAT only to adjust routing overhead
void cleanup();
~uca_org_t(){};
~uca_org_t() {};
};
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(InputParameter * const local_interface);
//McPAT's plain interface, please keep !!!
uca_org_t init_interface(InputParameter * const local_interface);
uca_org_t init_interface(InputParameter * const local_interface,
const string &name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,
int excl_write_ports,
int single_ended_read_ports,
int search_ports,
int banks,
double tech_node,
int output_width,
int specific_tag,
int tag_width,
int access_mode,
int cache,
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_cycle_time,
int obj_func_area,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area,
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in,
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in,
int wire_inside_mat_type_in,
int wire_outside_mat_type_in,
int REPEATERS_IN_HTREE_SEGMENTS_in,
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
int PAGE_SIZE_BITS_in,
int BURST_LENGTH_in,
int INTERNAL_PREFETCH_WIDTH_in,
int force_wiretype,
int wiretype,
int force_config,
int ndwl,
int ndbl,
int nspd,
int ndcm,
int ndsam1,
int ndsam2,
int ecc);
// int cache_size,
// int line_size,
// int associativity,
// int rw_ports,
// int excl_read_ports,
// int excl_write_ports,
// int single_ended_read_ports,
// int banks,
// double tech_node,
// int output_width,
// int specific_tag,
// int tag_width,
// int access_mode,
// int cache,
// int main_mem,
// int obj_func_delay,
// int obj_func_dynamic_power,
// int obj_func_leakage_power,
// int obj_func_area,
// int obj_func_cycle_time,
// int dev_func_delay,
// int dev_func_dynamic_power,
// int dev_func_leakage_power,
// int dev_func_area,
// int dev_func_cycle_time,
// int temp,
// int data_arr_ram_cell_tech_flavor_in,
// int data_arr_peri_global_tech_flavor_in,
// int tag_arr_ram_cell_tech_flavor_in,
// int tag_arr_peri_global_tech_flavor_in,
// int interconnect_projection_type_in,
// int wire_inside_mat_type_in,
// int wire_outside_mat_type_in,
// int REPEATERS_IN_HTREE_SEGMENTS_in,
// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
//// double MAXAREACONSTRAINT_PERC_in,
//// double MAXACCTIMECONSTRAINT_PERC_in,
//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
// int PAGE_SIZE_BITS_in,
// int BURST_LENGTH_in,
// int INTERNAL_PREFETCH_WIDTH_in);
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,
int excl_write_ports,
int single_ended_read_ports,
int search_ports,
int banks,
double tech_node,
int output_width,
int specific_tag,
int tag_width,
int access_mode,
int cache,
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_cycle_time,
int obj_func_area,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area,
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in,
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in,
int wire_inside_mat_type_in,
int wire_outside_mat_type_in,
int REPEATERS_IN_HTREE_SEGMENTS_in,
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
int PAGE_SIZE_BITS_in,
int BURST_LENGTH_in,
int INTERNAL_PREFETCH_WIDTH_in,
int force_wiretype,
int wiretype,
int force_config,
int ndwl,
int ndbl,
int nspd,
int ndcm,
int ndsam1,
int ndsam2,
int ecc);
//Naveen's interface
uca_org_t cacti_interface(
@ -542,91 +456,90 @@ uca_org_t cacti_interface(
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
int p_input);
class mem_array
{
public:
int Ndcm;
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double area_ram_cells;
double area;
powerDef power;
double delay_senseamp_mux_decoder;
double delay_before_subarray_output_driver;
double delay_from_subarray_output_driver_to_output;
double height;
double width;
class mem_array {
public:
int Ndcm;
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double area_ram_cells;
double area;
powerDef power;
double delay_senseamp_mux_decoder;
double delay_before_subarray_output_driver;
double delay_from_subarray_output_driver_to_output;
double height;
double width;
double mat_height;
double mat_length;
double subarray_length;
double subarray_height;
double mat_height;
double mat_length;
double subarray_length;
double subarray_height;
double delay_route_to_bank,
delay_input_htree,
delay_row_predecode_driver_and_block,
delay_row_decoder,
delay_bitlines,
delay_sense_amp,
delay_subarray_output_driver,
delay_dout_htree,
delay_comparator,
delay_matchlines;
double delay_route_to_bank,
delay_input_htree,
delay_row_predecode_driver_and_block,
delay_row_decoder,
delay_bitlines,
delay_sense_amp,
delay_subarray_output_driver,
delay_dout_htree,
delay_comparator,
delay_matchlines;
double all_banks_height,
all_banks_width,
area_efficiency;
double all_banks_height,
all_banks_width,
area_efficiency;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_htree_in_search;
powerDef power_htree_out_search;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_htree_in_search;
powerDef power_htree_out_search;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_cam_bitline_precharge_eq_drv;
powerDef power_searchline;
powerDef power_searchline_precharge;
powerDef power_matchlines;
powerDef power_matchline_precharge;
powerDef power_matchline_to_wordline_drv;
powerDef power_cam_bitline_precharge_eq_drv;
powerDef power_searchline;
powerDef power_searchline_precharge;
powerDef power_matchlines;
powerDef power_matchline_precharge;
powerDef power_matchline_to_wordline_drv;
min_values_t *arr_min;
enum Wire_type wt;
min_values_t *arr_min;
enum Wire_type wt;
// dram stats
double activate_energy, read_energy, write_energy, precharge_energy,
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
leak_power_request_and_reply_networks;
// dram stats
double activate_energy, read_energy, write_energy, precharge_energy,
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
leak_power_request_and_reply_networks;
double precharge_delay;
double precharge_delay;
static bool lt(const mem_array * m1, const mem_array * m2);
static bool lt(const mem_array * m1, const mem_array * m2);
};

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -45,34 +46,30 @@ using namespace std;
Component::Component()
:area(), power(), rt_power(),delay(0)
{
: area(), power(), rt_power(), delay(0) {
}
Component::~Component()
{
Component::~Component() {
}
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
{
double w_poly = g_ip->F_sz_um;
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
num_stacked_in * w_poly +
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) {
double w_poly = g_ip->F_sz_um;
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
num_stacked_in * w_poly +
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
if (num_folded_tr > 1)
{
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
(num_folded_tr - 1) * num_stacked_in * w_poly +
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
}
if (num_folded_tr > 1) {
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
(num_folded_tr - 1) * num_stacked_in * w_poly +
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
}
return total_diff_w;
return total_diff_w;
}
@ -82,105 +79,96 @@ double Component::compute_gate_area(
int num_inputs,
double w_pmos,
double w_nmos,
double h_gate)
{
if (w_pmos <= 0.0 || w_nmos <= 0.0)
{
return 0.0;
}
double h_gate) {
if (w_pmos <= 0.0 || w_nmos <= 0.0) {
return 0.0;
}
double w_folded_pmos, w_folded_nmos;
int num_folded_pmos, num_folded_nmos;
double total_ndiff_w, total_pdiff_w;
Area gate;
double w_folded_pmos, w_folded_nmos;
int num_folded_pmos, num_folded_nmos;
double total_ndiff_w, total_pdiff_w;
Area gate;
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
{
return 0.0;
}
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) {
return 0.0;
}
w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
assert(w_folded_pmos > 0);
w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
assert(w_folded_pmos > 0);
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
switch (gate_type)
{
switch (gate_type) {
case INV:
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
break;
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
break;
case NOR:
total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
break;
total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
break;
case NAND:
total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
break;
total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
break;
default:
cout << "Unknown gate type: " << gate_type << endl;
exit(1);
}
cout << "Unknown gate type: " << gate_type << endl;
exit(1);
}
gate.w = MAX(total_ndiff_w, total_pdiff_w);
gate.w = MAX(total_ndiff_w, total_pdiff_w);
if (w_folded_nmos > w_nmos)
{
//means that the height of the gate can
//be made smaller than the input height specified, so calculate the height of the gate.
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
}
else
{
gate.h = h_gate;
}
return gate.get_area();
if (w_folded_nmos > w_nmos) {
//means that the height of the gate can
//be made smaller than the input height specified, so calculate the height of the gate.
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
} else {
gate.h = h_gate;
}
return gate.get_area();
}
double Component::compute_tr_width_after_folding(
double input_width,
double threshold_folding_width)
{//This is actually the width of the cell not the width of a device.
//The width of a cell and the width of a device is orthogonal.
if (input_width <= 0)
{
return 0;
}
double threshold_folding_width) {
//This is actually the width of the cell not the width of a device.
//The width of a cell and the width of a device is orthogonal.
if (input_width <= 0) {
return 0;
}
int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
double width_poly = g_ip->F_sz_um;
double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
double width_poly = g_ip->F_sz_um;
double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
return total_diff_width;
return total_diff_width;
}
double Component::height_sense_amplifier(double pitch_sense_amp)
{
// compute the height occupied by all PMOS transistors
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
double Component::height_sense_amplifier(double pitch_sense_amp) {
// compute the height occupied by all PMOS transistors
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
// compute the height occupied by all NMOS transistors
double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
// compute the height occupied by all NMOS transistors
double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
// compute total height by considering gap between the p and n diffusion areas
return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
// compute total height by considering gap between the p and n diffusion areas
return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
}
@ -195,42 +183,39 @@ int Component::logical_effort(
double p_to_n_sz_ratio,
bool is_dram_,
bool is_wl_tr_,
double max_w_nmos)
{
int num_gates = (int) (log(F) / log(fopt));
double max_w_nmos) {
int num_gates = (int) (log(F) / log(fopt));
// check if num_gates is odd. if so, add 1 to make it even
num_gates+= (num_gates % 2) ? 1 : 0;
num_gates = MAX(num_gates, num_gates_min);
// recalculate the effective fanout of each stage
double f = pow(F, 1.0 / num_gates);
int i = num_gates - 1;
double C_in = C_load / f;
w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
w_p[i] = p_to_n_sz_ratio * w_n[i];
if (w_n[i] > max_w_nmos)
{
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
num_gates = (int) (log(F) / log(fopt)) + 1;
num_gates+= (num_gates % 2) ? 1 : 0;
// check if num_gates is odd. if so, add 1 to make it even
num_gates += (num_gates % 2) ? 1 : 0;
num_gates = MAX(num_gates, num_gates_min);
f = pow(F, 1.0 / (num_gates - 1));
i = num_gates - 1;
w_n[i] = max_w_nmos;
// recalculate the effective fanout of each stage
double f = pow(F, 1.0 / num_gates);
int i = num_gates - 1;
double C_in = C_load / f;
w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
w_p[i] = p_to_n_sz_ratio * w_n[i];
}
for (i = num_gates - 2; i >= 1; i--)
{
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
w_p[i] = p_to_n_sz_ratio * w_n[i];
}
if (w_n[i] > max_w_nmos) {
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
num_gates = (int) (log(F) / log(fopt)) + 1;
num_gates += (num_gates % 2) ? 1 : 0;
num_gates = MAX(num_gates, num_gates_min);
f = pow(F, 1.0 / (num_gates - 1));
i = num_gates - 1;
w_n[i] = max_w_nmos;
w_p[i] = p_to_n_sz_ratio * w_n[i];
}
assert(num_gates <= MAX_NUMBER_GATES_STAGE);
return num_gates;
for (i = num_gates - 2; i >= 1; i--) {
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
w_p[i] = p_to_n_sz_ratio * w_n[i];
}
assert(num_gates <= MAX_NUMBER_GATES_STAGE);
return num_gates;
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -42,41 +43,32 @@ using namespace std;
class Crossbar;
class Bank;
class Component
{
public:
class Component {
public:
Component();
~Component();
Area area;
powerDef power,rt_power;
// TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE
// VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS
// MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER
// CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS
powerDef power, rt_power;
double delay;
double cycle_time;
double compute_gate_area(
int gate_type,
int num_inputs,
double w_pmos,
double w_nmos,
double h_gate);
double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
double compute_gate_area(int gate_type, int num_inputs, double w_pmos,
double w_nmos, double h_gate);
double compute_tr_width_after_folding(double input_width,
double threshold_folding_width);
double height_sense_amplifier(double pitch_sense_amp);
protected:
int logical_effort(
int num_gates_min,
double g,
double F,
double * w_n,
double * w_p,
double C_load,
double p_to_n_sz_ratio,
bool is_dram_,
bool is_wl_tr_,
double max_w_nmos);
protected:
int logical_effort(int num_gates_min, double g, double F, double * w_n,
double * w_p, double C_load, double p_to_n_sz_ratio,
bool is_dram_, bool is_wl_tr_, double max_w_nmos);
private:
private:
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
};

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -249,21 +250,20 @@ const double bit_to_byte = 8.0;
// v : vertical or velocity
enum ram_cell_tech_type_num
{
itrs_hp = 0,
itrs_lstp = 1,
itrs_lop = 2,
lp_dram = 3,
comm_dram = 4
enum ram_cell_tech_type_num {
itrs_hp = 0,
itrs_lstp = 1,
itrs_lop = 2,
lp_dram = 3,
comm_dram = 4
};
const double pppm[4] = {1,1,1,1};
const double pppm_lkg[4] = {0,1,1,0};
const double pppm_dyn[4] = {1,0,0,0};
const double pppm_Isub[4] = {0,1,0,0};
const double pppm_Ig[4] = {0,0,1,0};
const double pppm_sc[4] = {0,0,0,1};
const double pppm[4] = {1, 1, 1, 1};
const double pppm_lkg[4] = {0, 1, 1, 0};
const double pppm_dyn[4] = {1, 0, 0, 0};
const double pppm_Isub[4] = {0, 1, 0, 0};
const double pppm_Ig[4] = {0, 0, 1, 0};
const double pppm_sc[4] = {0, 0, 0, 1};

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -39,123 +40,140 @@ Crossbar::Crossbar(
double n_out_,
double flit_size_,
TechnologyParameter::DeviceType *dt
):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
{
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
Vdd = dt->Vdd;
CB_ADJ = 1;
): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
Vdd = dt->Vdd;
CB_ADJ = 1;
}
Crossbar::~Crossbar(){}
Crossbar::~Crossbar() {}
double Crossbar::output_buffer()
{
double Crossbar::output_buffer() {
//Wire winit(4, 4);
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
Wire w1(g_ip->wt, l_eff);
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
TriS2 = s1; //driver transistor
//Wire winit(4, 4);
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
Wire w1(g_ip->wt, l_eff);
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
l_eff * ADJ / w1.repeater_spacing : ADJ);
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
TriS2 = s1; //driver transistor
if (TriS1 < 1)
TriS1 = 1;
if (TriS1 < 1)
TriS1 = 1;
double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
// gate_C(TriS2*min_w_pmos, 0);
tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
gate_C(TriS2*g_tp.min_w_nmos_, 0)+
drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
gate_C(TriS2*min_w_pmos, 0);
double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
gate_C(TriS2 * min_w_pmos, 0);
double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
g_tp.cell_h_def) +
drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
tri_inp_cap = input_cap;
tri_out_cap = output_cap;
tri_ctr_cap = ctr_cap;
return input_cap + output_cap + ctr_cap;
tri_inp_cap = input_cap;
tri_out_cap = output_cap;
tri_ctr_cap = ctr_cap;
return input_cap + output_cap + ctr_cap;
}
void Crossbar::compute_power()
{
void Crossbar::compute_power() {
Wire winit(4, 4);
double tri_cap = output_buffer();
assert(tri_cap > 0);
//area of a tristate logic
double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
g_area *= 2; // to model area of output transistors
g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
// effective no. of tristate buffers that need to be laid side by side
int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
Wire w1(g_ip->wt, wire_len);
Wire winit(4, 4);
double tri_cap = output_buffer();
assert(tri_cap > 0);
//area of a tristate logic
double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
TriS2 * min_w_pmos, g_tp.cell_h_def);
g_area *= 2; // to model area of output transistors
g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
TriS1 * min_w_pmos, g_tp.cell_h_def);
g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
// effective no. of tristate buffers that need to be laid side by side
int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
double wire_len = MAX(width * ntri * n_out,
flit_size * g_tp.wire_outside_mat.pitch * n_out);
Wire w1(g_ip->wt, wire_len);
area.w = wire_len;
area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
Wire w2(g_ip->wt, area.h);
area.w = wire_len;
area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
Wire w2(g_ip->wt, area.h);
double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
if (aspect_ratio_cb < ASPECT_THRESHOLD) {
if (n_out > 2 && n_inp > 2) {
CB_ADJ+=0.2;
//cout << "CB ADJ " << CB_ADJ << endl;
if (CB_ADJ < 4) {
this->compute_power();
}
if (aspect_ratio_cb < ASPECT_THRESHOLD) {
if (n_out > 2 && n_inp > 2) {
CB_ADJ += 0.2;
//cout << "CB ADJ " << CB_ADJ << endl;
if (CB_ADJ < 4) {
this->compute_power();
}
}
}
}
power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
power.readOp.leakage = n_inp * n_out * flit_size * (
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
w1.power.readOp.leakage + w2.power.readOp.leakage);
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
power.readOp.dynamic =
(w1.power.readOp.dynamic + w2.power.readOp.dynamic +
(tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
tri_int_cap) * Vdd * Vdd) * flit_size;
power.readOp.leakage = n_inp * n_out * flit_size * (
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
1, inv) * Vdd +
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
2, nand) * Vdd +
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
2, nor) * Vdd +
w1.power.readOp.leakage + w2.power.readOp.leakage);
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
1, inv) * Vdd +
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
2, nand) * Vdd +
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
2, nor) * Vdd +
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
// delay calculation
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
Wire wdriver(g_ip->wt, l_eff);
double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
// delay calculation
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
Wire wdriver(g_ip->wt, l_eff);
double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
tri_inp_cap + n_inp * tri_out_cap;
delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
Wire wreset();
Wire wreset();
}
void Crossbar::print_crossbar()
{
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
cout << "Flit size : " << flit_size << " bits" << endl;
cout << "Width : " << area.w << " u" << endl;
cout << "Height : " << area.h << " u" << endl;
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
void Crossbar::print_crossbar() {
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
cout << "Flit size : " << flit_size << " bits" << endl;
cout << "Width : " << area.w << " u" << endl;
cout << "Height : " << area.h << " u" << endl;
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
MIN(n_inp, n_out) << " (nJ)" << endl;
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
<< endl;
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
<< " (mW)" << endl;
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -44,14 +45,13 @@
#include "parameter.h"
#include "wire.h"
class Crossbar : public Component
{
public:
class Crossbar : public Component {
public:
Crossbar(
double in,
double out,
double flit_sz,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
double in,
double out,
double flit_sz,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Crossbar();
void print_crossbar();
@ -62,18 +62,18 @@ class Crossbar : public Component
double flit_size;
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
private:
double CB_ADJ;
/*
* Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
* buffer is adjusted to get an aspect ratio of whole cross bar close to one;
* when adjust the ratio, the number of wires route over the tri-state buffers does not change,
* however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
* during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
* will increase. As a result, the height of the crossbar (area.h) will increase.
*/
private:
double CB_ADJ;
/*
* Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
* buffer is adjusted to get an aspect ratio of whole cross bar close to one;
* when adjust the ratio, the number of wires route over the tri-state buffers does not change,
* however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
* during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
* will increase. As a result, the height of the crossbar (area.h) will increase.
*/
TechnologyParameter::DeviceType *deviceType;
TechnologyParameter::DeviceType *deviceType;
double TriS1, TriS2;
double min_w_pmos, Vdd;

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -42,9 +43,8 @@
using namespace std;
class Decoder : public Component
{
public:
class Decoder : public Component {
public:
Decoder(
int _num_dec_signals,
bool flag_way_select,
@ -80,125 +80,120 @@ class Decoder : public Component
class PredecBlk : public Component
{
public:
PredecBlk(
int num_dec_signals,
Decoder * dec,
double C_wire_predec_blk_out,
double R_wire_predec_blk_out,
int num_dec_per_predec,
bool is_dram_,
bool is_blk1);
class PredecBlk : public Component {
public:
PredecBlk(
int num_dec_signals,
Decoder * dec,
double C_wire_predec_blk_out,
double R_wire_predec_blk_out,
int num_dec_per_predec,
bool is_dram_,
bool is_blk1);
Decoder * dec;
bool exist;
int number_input_addr_bits;
double C_ld_predec_blk_out;
double R_wire_predec_blk_out;
int branch_effort_nand2_gate_output;
int branch_effort_nand3_gate_output;
bool flag_two_unique_paths;
int flag_L2_gate;
int number_inputs_L1_gate;
int number_gates_L1_nand2_path;
int number_gates_L1_nand3_path;
int number_gates_L2;
int min_number_gates_L1;
int min_number_gates_L2;
int num_L1_active_nand2_path;
int num_L1_active_nand3_path;
double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
double w_L2_n[MAX_NUMBER_GATES_STAGE];
double w_L2_p[MAX_NUMBER_GATES_STAGE];
double delay_nand2_path;
double delay_nand3_path;
powerDef power_nand2_path;
powerDef power_nand3_path;
powerDef power_L2;
Decoder * dec;
bool exist;
int number_input_addr_bits;
double C_ld_predec_blk_out;
double R_wire_predec_blk_out;
int branch_effort_nand2_gate_output;
int branch_effort_nand3_gate_output;
bool flag_two_unique_paths;
int flag_L2_gate;
int number_inputs_L1_gate;
int number_gates_L1_nand2_path;
int number_gates_L1_nand3_path;
int number_gates_L2;
int min_number_gates_L1;
int min_number_gates_L2;
int num_L1_active_nand2_path;
int num_L1_active_nand3_path;
double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
double w_L2_n[MAX_NUMBER_GATES_STAGE];
double w_L2_p[MAX_NUMBER_GATES_STAGE];
double delay_nand2_path;
double delay_nand3_path;
powerDef power_nand2_path;
powerDef power_nand3_path;
powerDef power_L2;
bool is_dram_;
bool is_dram_;
void compute_widths();
void compute_area();
void compute_widths();
void compute_area();
void leakage_feedback(double temperature);
void leakage_feedback(double temperature);
pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
// return <outrise_nand2, outrise_nand3>
pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
// return <outrise_nand2, outrise_nand3>
};
class PredecBlkDrv : public Component
{
public:
PredecBlkDrv(
int way_select,
PredecBlk * blk_,
bool is_dram);
class PredecBlkDrv : public Component {
public:
PredecBlkDrv(
int way_select,
PredecBlk * blk_,
bool is_dram);
int flag_driver_exists;
int number_input_addr_bits;
int number_gates_nand2_path;
int number_gates_nand3_path;
int min_number_gates;
int num_buffers_driving_1_nand2_load;
int num_buffers_driving_2_nand2_load;
int num_buffers_driving_4_nand2_load;
int num_buffers_driving_2_nand3_load;
int num_buffers_driving_8_nand3_load;
int num_buffers_nand3_path;
double c_load_nand2_path_out;
double c_load_nand3_path_out;
double r_load_nand2_path_out;
double r_load_nand3_path_out;
double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
double delay_nand2_path;
double delay_nand3_path;
powerDef power_nand2_path;
powerDef power_nand3_path;
int flag_driver_exists;
int number_input_addr_bits;
int number_gates_nand2_path;
int number_gates_nand3_path;
int min_number_gates;
int num_buffers_driving_1_nand2_load;
int num_buffers_driving_2_nand2_load;
int num_buffers_driving_4_nand2_load;
int num_buffers_driving_2_nand3_load;
int num_buffers_driving_8_nand3_load;
int num_buffers_nand3_path;
double c_load_nand2_path_out;
double c_load_nand3_path_out;
double r_load_nand2_path_out;
double r_load_nand3_path_out;
double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
double delay_nand2_path;
double delay_nand3_path;
powerDef power_nand2_path;
powerDef power_nand3_path;
PredecBlk * blk;
Decoder * dec;
bool is_dram_;
int way_select;
PredecBlk * blk;
Decoder * dec;
bool is_dram_;
int way_select;
void compute_widths();
void compute_area();
void compute_widths();
void compute_area();
void leakage_feedback(double temperature);
void leakage_feedback(double temperature);
pair<double, double> compute_delays(
double inrisetime_nand2_path,
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
pair<double, double> compute_delays(
double inrisetime_nand2_path,
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
inline int num_addr_bits_nand2_path()
{
return num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load;
}
inline int num_addr_bits_nand3_path()
{
return num_buffers_driving_2_nand3_load +
num_buffers_driving_8_nand3_load;
}
double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
inline int num_addr_bits_nand2_path() {
return num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load;
}
inline int num_addr_bits_nand3_path() {
return num_buffers_driving_2_nand3_load +
num_buffers_driving_8_nand3_load;
}
double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
};
class Predec : public Component
{
public:
class Predec : public Component {
public:
Predec(
PredecBlkDrv * drv1,
PredecBlkDrv * drv2);
@ -214,7 +209,7 @@ class Predec : public Component
powerDef block_power;
powerDef driver_power;
private:
private:
// returns <delay, risetime>
pair<double, double> get_max_delay_before_decoder(
pair<double, double> input_pair1,
@ -223,24 +218,23 @@ class Predec : public Component
class Driver : public Component
{
public:
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
class Driver : public Component {
public:
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
int number_gates;
int min_number_gates;
double width_n[MAX_NUMBER_GATES_STAGE];
double width_p[MAX_NUMBER_GATES_STAGE];
double c_gate_load;
double c_wire_load;
double r_wire_load;
double delay;
powerDef power;
bool is_dram_;
int number_gates;
int min_number_gates;
double width_n[MAX_NUMBER_GATES_STAGE];
double width_p[MAX_NUMBER_GATES_STAGE];
double c_gate_load;
double c_wire_load;
double r_wire_load;
double delay;
powerDef power;
bool is_dram_;
void compute_widths();
double compute_delay(double inrisetime);
void compute_widths();
double compute_delay(double inrisetime);
};

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -44,13 +45,12 @@
// leakge power includes entire htree in a bank (when uca_tree == false)
// leakge power includes only part to one bank when uca_tree == true
class Htree2 : public Component
{
public:
class Htree2 : public Component {
public:
Htree2(enum Wire_type wire_model,
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
~Htree2() {};
void in_htree();
@ -64,16 +64,15 @@ class Htree2 : public Component
double in_rise_time, out_rise_time;
void set_in_rise_time(double rt)
{
in_rise_time = rt;
void set_in_rise_time(double rt) {
in_rise_time = rt;
}
double max_unpipelined_link_delay;
powerDef power_bit;
private:
private:
double wire_bw;
double init_wire_bw; // bus width at root
enum Htree_type tree_type;
@ -81,7 +80,11 @@ class Htree2 : public Component
double htree_vnodes;
double mat_width;
double mat_height;
int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
int add_bits;
int data_in_bits;
int search_data_in_bits;
int data_out_bits;
int search_data_out_bits;
int ndbl, ndwl;
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
bool search_tree;

File diff suppressed because it is too large Load diff

2878
ext/mcpat/cacti/mat.cc Executable file → Normal file

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -39,9 +40,8 @@
#include "subarray.h"
#include "wire.h"
class Mat : public Component
{
public:
class Mat : public Component {
public:
Mat(const DynamicParameter & dyn_p);
~Mat();
double compute_delays(double inrisetime); // return outrisetime
@ -106,8 +106,8 @@ class Mat : public Component
int deg_bl_muxing;
int num_act_mats_hor_dir;
double delay_writeback;
Area cell,cam_cell;
bool is_dram,is_fa, pure_cam, camFlag;
Area cell, cam_cell;
bool is_dram, is_fa, pure_cam, camFlag;
int num_mats;
powerDef power_sa;
double delay_sa;
@ -127,7 +127,7 @@ class Mat : public Component
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
private:
private:
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
double width_write_driver_or_write_mux();
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -46,8 +47,8 @@
#include "wire.h"
class nuca_org_t {
public:
~nuca_org_t();
public:
~nuca_org_t();
// int size;
/* area, power, access time, and cycle time stats */
Component nuca_pda;
@ -71,9 +72,8 @@ class nuca_org_t {
class Nuca : public Component
{
public:
class Nuca : public Component {
public:
Nuca(
TechnologyParameter::DeviceType *dt);
void print_router();
@ -87,12 +87,12 @@ class Nuca : public Component
void print_nuca(nuca_org_t *n);
void print_cont_stats();
private:
private:
TechnologyParameter::DeviceType *deviceType;
int wt_min, wt_max;
Wire *wire_vertical[WIRE_TYPES],
*wire_horizontal[WIRE_TYPES];
*wire_horizontal[WIRE_TYPES];
};

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -40,251 +41,242 @@
#include "io.h"
// parameters which are functions of certain device technology
class TechnologyParameter
{
public:
class DeviceType
{
public:
double C_g_ideal;
double C_fringe;
double C_overlap;
double C_junc; // C_junc_area
double C_junc_sidewall;
double l_phy;
double l_elec;
double R_nch_on;
double R_pch_on;
double Vdd;
double Vth;
double I_on_n;
double I_on_p;
double I_off_n;
double I_off_p;
double I_g_on_n;
double I_g_on_p;
double C_ox;
double t_ox;
double n_to_p_eff_curr_drv_ratio;
double long_channel_leakage_reduction;
class TechnologyParameter {
public:
class DeviceType {
public:
double C_g_ideal;
double C_fringe;
double C_overlap;
double C_junc; // C_junc_area
double C_junc_sidewall;
double l_phy;
double l_elec;
double R_nch_on;
double R_pch_on;
double Vdd;
double Vth;
double I_on_n;
double I_on_p;
double I_off_n;
double I_off_p;
double I_g_on_n;
double I_g_on_p;
double C_ox;
double t_ox;
double n_to_p_eff_curr_drv_ratio;
double long_channel_leakage_reduction;
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
Vdd(0), Vth(0),
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
void reset()
{
C_g_ideal = 0;
C_fringe = 0;
C_overlap = 0;
C_junc = 0;
l_phy = 0;
l_elec = 0;
R_nch_on = 0;
R_pch_on = 0;
Vdd = 0;
Vth = 0;
I_on_n = 0;
I_on_p = 0;
I_off_n = 0;
I_off_p = 0;
I_g_on_n = 0;
I_g_on_p = 0;
C_ox = 0;
t_ox = 0;
n_to_p_eff_curr_drv_ratio = 0;
long_channel_leakage_reduction = 0;
}
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
Vdd(0), Vth(0),
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0),
I_g_on_p(0),
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0),
long_channel_leakage_reduction(0) { };
void reset() {
C_g_ideal = 0;
C_fringe = 0;
C_overlap = 0;
C_junc = 0;
l_phy = 0;
l_elec = 0;
R_nch_on = 0;
R_pch_on = 0;
Vdd = 0;
Vth = 0;
I_on_n = 0;
I_on_p = 0;
I_off_n = 0;
I_off_p = 0;
I_g_on_n = 0;
I_g_on_p = 0;
C_ox = 0;
t_ox = 0;
n_to_p_eff_curr_drv_ratio = 0;
long_channel_leakage_reduction = 0;
}
void display(uint32_t indent = 0);
};
class InterconnectType {
public:
double pitch;
double R_per_um;
double C_per_um;
double horiz_dielectric_constant;
double vert_dielectric_constant;
double aspect_ratio;
double miller_value;
double ild_thickness;
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
void reset() {
pitch = 0;
R_per_um = 0;
C_per_um = 0;
horiz_dielectric_constant = 0;
vert_dielectric_constant = 0;
aspect_ratio = 0;
miller_value = 0;
ild_thickness = 0;
}
void display(uint32_t indent = 0);
};
class MemoryType {
public:
double b_w;
double b_h;
double cell_a_w;
double cell_pmos_w;
double cell_nmos_w;
double Vbitpre;
void reset() {
b_w = 0;
b_h = 0;
cell_a_w = 0;
cell_pmos_w = 0;
cell_nmos_w = 0;
Vbitpre = 0;
}
void display(uint32_t indent = 0);
};
class ScalingFactor {
public:
double logic_scaling_co_eff;
double core_tx_density;
double long_channel_leakage_reduction;
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
long_channel_leakage_reduction(0) { };
void reset() {
logic_scaling_co_eff = 0;
core_tx_density = 0;
long_channel_leakage_reduction = 0;
}
void display(uint32_t indent = 0);
};
double ram_wl_stitching_overhead_;
double min_w_nmos_;
double max_w_nmos_;
double max_w_nmos_dec;
double unit_len_wire_del;
double FO4;
double kinv;
double vpp;
double w_sense_en;
double w_sense_n;
double w_sense_p;
double sense_delay;
double sense_dy_power;
double w_iso;
double w_poly_contact;
double spacing_poly_to_poly;
double spacing_poly_to_contact;
double w_comp_inv_p1;
double w_comp_inv_p2;
double w_comp_inv_p3;
double w_comp_inv_n1;
double w_comp_inv_n2;
double w_comp_inv_n3;
double w_eval_inv_p;
double w_eval_inv_n;
double w_comp_n;
double w_comp_p;
double dram_cell_I_on;
double dram_cell_Vdd;
double dram_cell_I_off_worst_case_len_temp;
double dram_cell_C;
double gm_sense_amp_latch;
double w_nmos_b_mux;
double w_nmos_sa_mux;
double w_pmos_bl_precharge;
double w_pmos_bl_eq;
double MIN_GAP_BET_P_AND_N_DIFFS;
double MIN_GAP_BET_SAME_TYPE_DIFFS;
double HPOWERRAIL;
double cell_h_def;
double chip_layout_overhead;
double macro_layout_overhead;
double sckt_co_eff;
double fringe_cap;
uint64_t h_dec;
DeviceType sram_cell; // SRAM cell transistor
DeviceType dram_acc; // DRAM access transistor
DeviceType dram_wl; // DRAM wordline transistor
DeviceType peri_global; // peripheral global
DeviceType cam_cell; // SRAM cell transistor
InterconnectType wire_local;
InterconnectType wire_inside_mat;
InterconnectType wire_outside_mat;
ScalingFactor scaling_factor;
MemoryType sram;
MemoryType dram;
MemoryType cam;
void display(uint32_t indent = 0);
};
class InterconnectType
{
public:
double pitch;
double R_per_um;
double C_per_um;
double horiz_dielectric_constant;
double vert_dielectric_constant;
double aspect_ratio;
double miller_value;
double ild_thickness;
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
void reset() {
dram_cell_Vdd = 0;
dram_cell_I_on = 0;
dram_cell_C = 0;
vpp = 0;
void reset()
{
pitch = 0;
R_per_um = 0;
C_per_um = 0;
horiz_dielectric_constant = 0;
vert_dielectric_constant = 0;
aspect_ratio = 0;
miller_value = 0;
ild_thickness = 0;
}
void display(uint32_t indent = 0);
};
class MemoryType
{
public:
double b_w;
double b_h;
double cell_a_w;
double cell_pmos_w;
double cell_nmos_w;
double Vbitpre;
void reset()
{
b_w = 0;
b_h = 0;
cell_a_w = 0;
cell_pmos_w = 0;
cell_nmos_w = 0;
Vbitpre = 0;
}
void display(uint32_t indent = 0);
};
class ScalingFactor
{
public:
double logic_scaling_co_eff;
double core_tx_density;
double long_channel_leakage_reduction;
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
long_channel_leakage_reduction(0) { };
void reset()
{
logic_scaling_co_eff= 0;
core_tx_density = 0;
long_channel_leakage_reduction= 0;
}
void display(uint32_t indent = 0);
};
double ram_wl_stitching_overhead_;
double min_w_nmos_;
double max_w_nmos_;
double max_w_nmos_dec;
double unit_len_wire_del;
double FO4;
double kinv;
double vpp;
double w_sense_en;
double w_sense_n;
double w_sense_p;
double sense_delay;
double sense_dy_power;
double w_iso;
double w_poly_contact;
double spacing_poly_to_poly;
double spacing_poly_to_contact;
double w_comp_inv_p1;
double w_comp_inv_p2;
double w_comp_inv_p3;
double w_comp_inv_n1;
double w_comp_inv_n2;
double w_comp_inv_n3;
double w_eval_inv_p;
double w_eval_inv_n;
double w_comp_n;
double w_comp_p;
double dram_cell_I_on;
double dram_cell_Vdd;
double dram_cell_I_off_worst_case_len_temp;
double dram_cell_C;
double gm_sense_amp_latch;
double w_nmos_b_mux;
double w_nmos_sa_mux;
double w_pmos_bl_precharge;
double w_pmos_bl_eq;
double MIN_GAP_BET_P_AND_N_DIFFS;
double MIN_GAP_BET_SAME_TYPE_DIFFS;
double HPOWERRAIL;
double cell_h_def;
double chip_layout_overhead;
double macro_layout_overhead;
double sckt_co_eff;
double fringe_cap;
uint64_t h_dec;
DeviceType sram_cell; // SRAM cell transistor
DeviceType dram_acc; // DRAM access transistor
DeviceType dram_wl; // DRAM wordline transistor
DeviceType peri_global; // peripheral global
DeviceType cam_cell; // SRAM cell transistor
InterconnectType wire_local;
InterconnectType wire_inside_mat;
InterconnectType wire_outside_mat;
ScalingFactor scaling_factor;
MemoryType sram;
MemoryType dram;
MemoryType cam;
void display(uint32_t indent = 0);
void reset()
{
dram_cell_Vdd = 0;
dram_cell_I_on = 0;
dram_cell_C = 0;
vpp = 0;
sense_delay = 0;
sense_dy_power = 0;
fringe_cap = 0;
sense_delay = 0;
sense_dy_power = 0;
fringe_cap = 0;
// horiz_dielectric_constant = 0;
// vert_dielectric_constant = 0;
// aspect_ratio = 0;
// miller_value = 0;
// ild_thickness = 0;
dram_cell_I_off_worst_case_len_temp = 0;
dram_cell_I_off_worst_case_len_temp = 0;
sram_cell.reset();
dram_acc.reset();
dram_wl.reset();
peri_global.reset();
cam_cell.reset();
sram_cell.reset();
dram_acc.reset();
dram_wl.reset();
peri_global.reset();
cam_cell.reset();
scaling_factor.reset();
scaling_factor.reset();
wire_local.reset();
wire_inside_mat.reset();
wire_outside_mat.reset();
wire_local.reset();
wire_inside_mat.reset();
wire_outside_mat.reset();
sram.reset();
dram.reset();
cam.reset();
sram.reset();
dram.reset();
cam.reset();
chip_layout_overhead = 0;
macro_layout_overhead = 0;
sckt_co_eff = 0;
}
chip_layout_overhead = 0;
macro_layout_overhead = 0;
sckt_co_eff = 0;
}
};
class DynamicParameter
{
public:
class DynamicParameter {
public:
bool is_tag;
bool pure_ram;
bool pure_cam;
@ -313,8 +305,8 @@ class DynamicParameter
int num_so_b_mat;
int num_si_b_subbank;
int num_so_b_subbank;
int num_si_b_bank_per_port;
int num_so_b_bank_per_port;
int num_si_b_bank_per_port;
int num_so_b_bank_per_port;
int number_way_select_signals_mat;
int num_act_mats_hor_dir;

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -41,57 +42,56 @@ Router::Router(
double I_,
double O_,
double M_
):flit_size(flit_size_),
deviceType(dt),
I(I_),
O(O_),
M(M_)
{
vc_buffer_size = vc_buf;
vc_count = vc_c;
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
double technology = g_ip->F_sz_um;
): flit_size(flit_size_),
deviceType(dt),
I(I_),
O(O_),
M(M_) {
vc_buffer_size = vc_buf;
vc_count = vc_c;
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
double technology = g_ip->F_sz_um;
Vdd = dt->Vdd;
Vdd = dt->Vdd;
/*Crossbar parameters. Transmisson gate is employed for connector*/
NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
wt = 15*technology*1e-6/2; /*track width*/
ht = 15*technology*1e-6/2; /*track height*/
/*Crossbar parameters. Transmisson gate is employed for connector*/
NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/
PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/
wt = 15 * technology * 1e-6 / 2; /*track width*/
ht = 15 * technology * 1e-6 / 2; /*track height*/
// I = 5; /*Number of crossbar input ports*/
// O = 5; /*Number of crossbar output ports*/
NTi = 12.5*technology*1e-6/2;
PTi = 25*technology*1e-6/2;
NTi = 12.5 * technology * 1e-6 / 2;
PTi = 25 * technology * 1e-6 / 2;
NTid = 60*technology*1e-6/2; //m
PTid = 120*technology*1e-6/2; // m
NTod = 60*technology*1e-6/2; // m
PTod = 120*technology*1e-6/2; // m
NTid = 60 * technology * 1e-6 / 2; //m
PTid = 120 * technology * 1e-6 / 2; // m
NTod = 60 * technology * 1e-6 / 2; // m
PTod = 120 * technology * 1e-6 / 2; // m
calc_router_parameters();
calc_router_parameters();
}
Router::~Router(){}
Router::~Router() {}
double //wire cap with triple spacing
Router::Cw3(double length) {
Wire wc(g_ip->wt, length, 1, 3, 3);
return (wc.wire_cap(length));
Wire wc(g_ip->wt, length, 1, 3, 3);
return (wc.wire_cap(length));
}
/*Function to calculate the gate capacitance*/
double
Router::gate_cap(double w) {
return (double) gate_C (w*1e6 /*u*/, 0);
return (double) gate_C (w*1e6 /*u*/, 0);
}
/*Function to calculate the diffusion capacitance*/
double
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
double s /*number of stacking transistors*/) {
return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
double s /*number of stacking transistors*/) {
return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
}
@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
// Model for simple transmission gate
double
Router::transmission_buf_inpcap() {
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_outcap() {
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
}
double
Router::transmission_buf_ctrcap() {
return gate_cap(NTtr)+gate_cap(PTtr);
return gate_cap(NTtr) + gate_cap(PTtr);
}
double
Router::crossbar_inpline() {
return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
}
double
Router::crossbar_outline() {
return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
}
double
Router::crossbar_ctrline() {
return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
gate_cap(NTi) + gate_cap(PTi));
return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
gate_cap(NTi) + gate_cap(PTi));
}
double
Router::tr_crossbar_power() {
return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 +
crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
}
void Router::buffer_stats()
{
DynamicParameter dyn_p;
dyn_p.is_tag = false;
dyn_p.pure_cam = false;
dyn_p.fully_assoc = false;
dyn_p.pure_ram = true;
dyn_p.is_dram = false;
dyn_p.is_main_mem = false;
dyn_p.num_subarrays = 1;
dyn_p.num_mats = 1;
dyn_p.Ndbl = 1;
dyn_p.Ndwl = 1;
dyn_p.Nspd = 1;
dyn_p.deg_bl_muxing = 1;
dyn_p.deg_senseamp_muxing_non_associativity = 1;
dyn_p.Ndsam_lev_1 = 1;
dyn_p.Ndsam_lev_2 = 1;
dyn_p.Ndcm = 1;
dyn_p.number_addr_bits_mat = 8;
dyn_p.number_way_select_signals_mat = 1;
dyn_p.number_subbanks_decode = 0;
dyn_p.num_act_mats_hor_dir = 1;
dyn_p.V_b_sense = Vdd; // FIXME check power calc.
dyn_p.ram_cell_tech_type = 0;
dyn_p.num_r_subarray = (int) vc_buffer_size;
dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
dyn_p.num_mats_h_dir = 1;
dyn_p.num_mats_v_dir = 1;
dyn_p.num_do_b_subbank = (int)flit_size;
dyn_p.num_di_b_subbank = (int)flit_size;
dyn_p.num_do_b_mat = (int) flit_size;
dyn_p.num_di_b_mat = (int) flit_size;
dyn_p.num_do_b_mat = (int) flit_size;
dyn_p.num_di_b_mat = (int) flit_size;
dyn_p.num_do_b_bank_per_port = (int) flit_size;
dyn_p.num_di_b_bank_per_port = (int) flit_size;
dyn_p.out_w = (int) flit_size;
void Router::buffer_stats() {
DynamicParameter dyn_p;
dyn_p.is_tag = false;
dyn_p.pure_cam = false;
dyn_p.fully_assoc = false;
dyn_p.pure_ram = true;
dyn_p.is_dram = false;
dyn_p.is_main_mem = false;
dyn_p.num_subarrays = 1;
dyn_p.num_mats = 1;
dyn_p.Ndbl = 1;
dyn_p.Ndwl = 1;
dyn_p.Nspd = 1;
dyn_p.deg_bl_muxing = 1;
dyn_p.deg_senseamp_muxing_non_associativity = 1;
dyn_p.Ndsam_lev_1 = 1;
dyn_p.Ndsam_lev_2 = 1;
dyn_p.Ndcm = 1;
dyn_p.number_addr_bits_mat = 8;
dyn_p.number_way_select_signals_mat = 1;
dyn_p.number_subbanks_decode = 0;
dyn_p.num_act_mats_hor_dir = 1;
dyn_p.V_b_sense = Vdd; // FIXME check power calc.
dyn_p.ram_cell_tech_type = 0;
dyn_p.num_r_subarray = (int) vc_buffer_size;
dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
dyn_p.num_mats_h_dir = 1;
dyn_p.num_mats_v_dir = 1;
dyn_p.num_do_b_subbank = (int)flit_size;
dyn_p.num_di_b_subbank = (int)flit_size;
dyn_p.num_do_b_mat = (int) flit_size;
dyn_p.num_di_b_mat = (int) flit_size;
dyn_p.num_do_b_mat = (int) flit_size;
dyn_p.num_di_b_mat = (int) flit_size;
dyn_p.num_do_b_bank_per_port = (int) flit_size;
dyn_p.num_di_b_bank_per_port = (int) flit_size;
dyn_p.out_w = (int) flit_size;
dyn_p.use_inp_params = 1;
dyn_p.num_wr_ports = (unsigned int) vc_count;
dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
dyn_p.num_rw_ports = 0;
dyn_p.num_se_rd_ports =0;
dyn_p.num_search_ports =0;
dyn_p.use_inp_params = 1;
dyn_p.num_wr_ports = (unsigned int) vc_count;
dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
dyn_p.num_rw_ports = 0;
dyn_p.num_se_rd_ports = 0;
dyn_p.num_search_ports = 0;
dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
(dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
(dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
Mat buff(dyn_p);
buff.compute_delays(0);
buff.compute_power_energy();
buffer.power.readOp = buff.power.readOp;
buffer.power.writeOp = buffer.power.readOp; //FIXME
buffer.area = buff.area;
Mat buff(dyn_p);
buff.compute_delays(0);
buff.compute_power_energy();
buffer.power.readOp = buff.power.readOp;
buffer.power.writeOp = buffer.power.readOp; //FIXME
buffer.area = buff.area;
}
void
Router::cb_stats ()
{
if (1) {
Crossbar c_b(I, O, flit_size);
c_b.compute_power();
crossbar.delay = c_b.delay;
crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
crossbar.area = c_b.area;
void
Router::cb_stats () {
if (1) {
Crossbar c_b(I, O, flit_size);
c_b.compute_power();
crossbar.delay = c_b.delay;
crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
crossbar.area = c_b.area;
// c_b.print_crossbar();
}
else {
crossbar.power.readOp.dynamic = tr_crossbar_power();
crossbar.power.readOp.leakage = flit_size * I * O *
cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
crossbar.power.readOp.gate_leakage = flit_size * I * O *
cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
}
} else {
crossbar.power.readOp.dynamic = tr_crossbar_power();
crossbar.power.readOp.leakage = flit_size * I * O *
cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
crossbar.power.readOp.gate_leakage = flit_size * I * O *
cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
}
}
void
Router::get_router_power()
{
/* calculate buffer stats */
buffer_stats();
Router::get_router_power() {
/* calculate buffer stats */
buffer_stats();
/* calculate cross-bar stats */
cb_stats();
/* calculate cross-bar stats */
cb_stats();
/* calculate arbiter stats */
Arbiter vcarb(vc_count, flit_size, buffer.area.w);
Arbiter cbarb(I, flit_size, crossbar.area.w);
vcarb.compute_power();
cbarb.compute_power();
arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
cbarb.power.readOp.dynamic * O;
arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
cbarb.power.readOp.leakage * O;
arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
cbarb.power.readOp.gate_leakage * O;
/* calculate arbiter stats */
Arbiter vcarb(vc_count, flit_size, buffer.area.w);
Arbiter cbarb(I, flit_size, crossbar.area.w);
vcarb.compute_power();
cbarb.compute_power();
arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
cbarb.power.readOp.dynamic * O;
arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
cbarb.power.readOp.leakage * O;
arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
cbarb.power.readOp.gate_leakage * O;
// arb_stats();
power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
crossbar.power.readOp.dynamic +
arbiter.power.readOp.dynamic)*MIN(I, O)*M;
double pppm_t[4] = {1,I,I,1};
power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
power.readOp.dynamic = ((buffer.power.readOp.dynamic +
buffer.power.writeOp.dynamic) +
crossbar.power.readOp.dynamic +
arbiter.power.readOp.dynamic) * MIN(I, O) * M;
double pppm_t[4] = {1, I, I, 1};
power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
pppm_lkg;
}
void
Router::get_router_delay ()
{
FREQUENCY=5; // move this to config file --TODO
cycle_time = (1/(double)FREQUENCY)*1e3; //ps
delay = 4;
max_cyc = 17 * g_tp.FO4; //s
max_cyc *= 1e12; //ps
if (cycle_time < max_cyc) {
FREQUENCY = (1/max_cyc)*1e3; //GHz
}
void
Router::get_router_delay () {
FREQUENCY = 5; // move this to config file --TODO
cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
delay = 4;
max_cyc = 17 * g_tp.FO4; //s
max_cyc *= 1e12; //ps
if (cycle_time < max_cyc) {
FREQUENCY = (1 / max_cyc) * 1e3; //GHz
}
}
void
Router::get_router_area()
{
area.h = I*buffer.area.h;
area.w = buffer.area.w+crossbar.area.w;
void
Router::get_router_area() {
area.h = I * buffer.area.h;
area.w = buffer.area.w + crossbar.area.w;
}
void
Router::calc_router_parameters()
{
/* calculate router frequency and pipeline cycles */
get_router_delay();
void
Router::calc_router_parameters() {
/* calculate router frequency and pipeline cycles */
get_router_delay();
/* router power stats */
get_router_power();
/* router power stats */
get_router_power();
/* area stats */
get_router_area();
/* area stats */
get_router_area();
}
void
Router::print_router()
{
cout << "\n\nRouter stats:\n";
cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
cout << "\tNo. of pipeline stages - " << delay << endl;
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
void
Router::print_router() {
cout << "\n\nRouter stats:\n";
cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
<< "GHz\n";
cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
cout << "\tNo. of pipeline stages - " << delay << endl;
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
cout << "\tNo. of buffer entries per virtual channel - "
<< vc_buffer_size << "\n";
cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
<< "(mm^2)\n";
cout << "\tSimple buffer access (Read) - "
<< buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
<< " (mW)\n";
cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
<< "(mm^2)\n";
cout << "\tCross bar access energy - "
<< crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
cout << "\tCross bar leakage power - "
<< crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
<< arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
<< arbiter.power.readOp.leakage * 1e3 << " (mW)\n";
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -47,9 +48,8 @@
#include "parameter.h"
#include "wire.h"
class Router : public Component
{
public:
class Router : public Component {
public:
Router(
double flit_size_,
double vc_buf, /* vc size = vc_buffer_size * flit_size */
@ -70,9 +70,9 @@ class Router : public Component
double vc_count;
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
private:
TechnologyParameter::DeviceType *deviceType;
double FREQUENCY; // move this to config file --TODO
private:
TechnologyParameter::DeviceType *deviceType;
double FREQUENCY; // move this to config file --TODO
double Cw3(double len);
double gate_cap(double w);
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -39,158 +40,152 @@
#include "subarray.h"
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
{
//num_cols=7;
//cout<<"num_cols ="<< num_cols <<endl;
if (!(is_fa || dp.pure_cam))
{
num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
uint32_t ram_num_cells_wl_stitching =
(dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
(dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
//num_cols=7;
//cout<<"num_cols ="<< num_cols <<endl;
if (!(is_fa || dp.pure_cam)) {
// ECC overhead
num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
num_bits_per_ecc_b_) : 0);
uint32_t ram_num_cells_wl_stitching =
(dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
(dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
area.h = cell.h * num_rows;
area.h = cell.h * num_rows;
area.w = cell.w * num_cols +
ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
}
else //cam fa
{
area.w = cell.w * num_cols +
ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
} else { //cam fa
//should not add dummy row here since the dummy row do not need decoder
if (is_fa)// fully associative cache
{
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
num_cols = num_cols_fa_cam + num_cols_fa_ram;
}
else
{
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
num_cols_fa_ram = 0;
num_cols = num_cols_fa_cam;
}
//should not add dummy row here since the dummy row do not need decoder
if (is_fa) { // fully associative cache
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
num_cols = num_cols_fa_cam + num_cols_fa_ram;
} else {
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
num_cols_fa_ram = 0;
num_cols = num_cols_fa_cam;
}
area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
+ ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
+ 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
+ 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
}
area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
+ ceil((num_cols_fa_cam + num_cols_fa_ram) /
sram_num_cells_wl_stitching_) *
g_tp.ram_wl_stitching_overhead_
//the overhead for the NAND gate to connect the two halves
+ 16 * g_tp.wire_local.pitch
//the overhead for the drivers from matchline to wordline of RAM
+ 128 * g_tp.wire_local.pitch;
}
assert(area.h>0);
assert(area.w>0);
compute_C();
assert(area.h > 0);
assert(area.w > 0);
compute_C();
}
Subarray::~Subarray()
{
Subarray::~Subarray() {
}
double Subarray::get_total_cell_area()
{
double Subarray::get_total_cell_area() {
// return (is_fa==false? cell.get_area() * num_rows * num_cols
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
if (!(is_fa || dp.pure_cam))
return (cell.get_area() * num_rows * num_cols);
else if (is_fa)
{ //for FA, this area includes the dummy cells in SRAM arrays.
//return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
//cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
return (cell.get_area() * num_rows * num_cols);
else if (is_fa) {
//for FA, this area includes the dummy cells in SRAM arrays.
//return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
//cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
return (cam_cell.h * (num_rows + 1) *
(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
} else {
return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
}
else
return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
}
void Subarray::compute_C()
{
double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
double C_b_row_drain_C;
void Subarray::compute_C() {
double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
double C_b_row_drain_C;
if (dp.is_dram)
{
C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
if (dp.is_dram) {
C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
if (dp.ram_cell_tech_type == comm_dram)
{
C_bl = num_rows * C_b_metal;
if (dp.ram_cell_tech_type == comm_dram) {
C_bl = num_rows * C_b_metal;
} else {
C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
}
} else {
if (!(is_fa || dp.pure_cam)) {
C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
(g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
false, true) * 2 +
c_w_metal) * num_cols;
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
} else {
//Following is wordline not matchline
//CAM portion
c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
(g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
2.0, false, true) * 2 +
c_w_metal) * num_cols_fa_cam;
R_wl_cam = (r_w_metal) * num_cols_fa_cam;
if (!dp.pure_cam) {
//RAM portion
c_w_metal = cell.w * g_tp.wire_local.C_per_um;
r_w_metal = cell.w * g_tp.wire_local.R_per_um;
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
(g_tp.sram.b_w - 2 *
g_tp.sram.cell_a_w) / 2.0, false,
true) * 2 +
c_w_metal) * num_cols_fa_ram;
R_wl_ram = (r_w_metal) * num_cols_fa_ram;
} else {
C_wl_ram = R_wl_ram = 0;
}
C_wl = C_wl_cam + C_wl_ram;
C_wl += (16 + 128) * g_tp.wire_local.pitch *
g_tp.wire_local.C_per_um;
R_wl = R_wl_cam + R_wl_ram;
R_wl += (16 + 128) * g_tp.wire_local.pitch *
g_tp.wire_local.R_per_um;
//there are two ways to write to a FA,
//1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
//2) using separate wordline for read/write and search in RAM.
//We are using the second approach.
//Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
}
}
else
{
C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
}
}
else
{
if (!(is_fa ||dp.pure_cam))
{
C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
c_w_metal) * num_cols;
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
}
else
{
//Following is wordline not matchline
//CAM portion
c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
c_w_metal) * num_cols_fa_cam;
R_wl_cam = (r_w_metal) * num_cols_fa_cam;
if (!dp.pure_cam)
{
//RAM portion
c_w_metal = cell.w * g_tp.wire_local.C_per_um;
r_w_metal = cell.w * g_tp.wire_local.R_per_um;
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
c_w_metal) * num_cols_fa_ram;
R_wl_ram = (r_w_metal) * num_cols_fa_ram;
}
else
{
C_wl_ram = R_wl_ram =0;
}
C_wl = C_wl_cam + C_wl_ram;
C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
R_wl = R_wl_cam + R_wl_ram;
R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
//there are two ways to write to a FA,
//1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
//2) using separate wordline for read/write and search in RAM.
//We are using the second approach.
//Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
}
}
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -41,9 +42,8 @@
using namespace std;
class Subarray : public Component
{
public:
class Subarray : public Component {
public:
Subarray(const DynamicParameter & dp, bool is_fa_);
~Subarray();
@ -59,7 +59,7 @@ class Subarray : public Component
double C_wl, C_wl_cam, C_wl_ram;
double R_wl, R_wl_cam, R_wl_ram;
double C_bl, C_bl_cam;
private:
private:
void compute_C(); // compute bitline and wordline capacitance
};

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -37,390 +38,390 @@
#include "uca.h"
UCA::UCA(const DynamicParameter & dyn_p)
:dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
{
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
int num_banks_hor_dir = nbanks/num_banks_ver_dir;
: dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
/ 2 : (_log2(nbanks) - _log2(nbanks) / 2));
int num_banks_hor_dir = nbanks / num_banks_ver_dir;
if (dp.use_inp_params)
{
RWP = dp.num_rw_ports;
ERP = dp.num_rd_ports;
EWP = dp.num_wr_ports;
SCHP = dp.num_search_ports;
}
else
{
RWP = g_ip->num_rw_ports;
ERP = g_ip->num_rd_ports;
EWP = g_ip->num_wr_ports;
SCHP = g_ip->num_search_ports;
}
if (dp.use_inp_params) {
RWP = dp.num_rw_ports;
ERP = dp.num_rd_ports;
EWP = dp.num_wr_ports;
SCHP = dp.num_search_ports;
} else {
RWP = g_ip->num_rw_ports;
ERP = g_ip->num_rd_ports;
EWP = g_ip->num_wr_ports;
SCHP = g_ip->num_search_ports;
}
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
(RWP + ERP + EWP);
num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
if (!dp.fully_assoc && !dp.pure_cam)
{
if (!dp.fully_assoc && !dp.pure_cam) {
if (g_ip->fast_access && dp.is_tag == false)
{
num_do_b_bank *= g_ip->data_assoc;
}
if (g_ip->fast_access && dp.is_tag == false) {
num_do_b_bank *= g_ip->data_assoc;
}
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
}
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, 0,
num_do_b_bank, 0, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Add_htree, true);
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, 0,
num_do_b_bank, 0, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Data_in_htree, true);
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, 0,
num_do_b_bank, 0, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Data_out_htree, true);
}
else
{
else {
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
}
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,
num_si_b_bank, num_do_b_bank, num_so_b_bank,
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
Add_htree, true);
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,
num_si_b_bank, num_do_b_bank, num_so_b_bank,
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
Data_in_htree, true);
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,
num_si_b_bank, num_do_b_bank,
num_so_b_bank, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Data_out_htree, true);
htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,
num_si_b_bank, num_do_b_bank,
num_so_b_bank, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Data_in_htree, true);
htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
num_addr_b_bank, num_di_b_bank,
num_si_b_bank, num_do_b_bank,
num_so_b_bank, num_banks_ver_dir * 2,
num_banks_hor_dir * 2, Data_out_htree,
true);
}
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
area.w = htree_in_data->area.w;
area.h = htree_in_data->area.h;
area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
// cout<<"area cell"<<area_all_dataramcells<<endl;
// cout<<area.get_area()<<endl;
// delay calculation
double inrisetime = 0.0;
compute_delays(inrisetime);
compute_power_energy();
// delay calculation
double inrisetime = 0.0;
compute_delays(inrisetime);
compute_power_energy();
}
UCA::~UCA()
{
delete htree_in_add;
delete htree_in_data;
delete htree_out_data;
UCA::~UCA() {
delete htree_in_add;
delete htree_in_data;
delete htree_out_data;
}
double UCA::compute_delays(double inrisetime)
{
double outrisetime = bank.compute_delays(inrisetime);
double UCA::compute_delays(double inrisetime) {
double outrisetime = bank.compute_delays(inrisetime);
double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
bank.mat.sa_mux_lev_1_predec->delay +
bank.mat.sa_mux_lev_1_dec->delay;
delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
bank.mat.sa_mux_lev_2_predec->delay +
bank.mat.sa_mux_lev_2_dec->delay;
double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
bank.mat.sa_mux_lev_1_predec->delay +
bank.mat.sa_mux_lev_1_dec->delay;
delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
bank.mat.sa_mux_lev_2_predec->delay +
bank.mat.sa_mux_lev_2_dec->delay;
double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
delay_before_subarray_output_driver =
MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
bank.htree_out_data->delay + htree_out_data->delay;
access_time = bank.mat.delay_comparator;
delay_before_subarray_output_driver =
MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
bank.htree_out_data->delay + htree_out_data->delay;
access_time = bank.mat.delay_comparator;
double ram_delay_inside_mat;
if (dp.fully_assoc)
{
//delay of FA contains both CAM tag and RAM data
{ //delay of CAM
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
access_time = htree_in_add->delay + bank.htree_in_add->delay;
//delay of fully-associative data array
access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
}
}
else
{
access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
}
if (dp.is_main_mem)
{
double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
delay_from_subarray_out_drv_to_out;
access_time = t_rcd + cas_latency;
}
double temp;
if (!dp.fully_assoc)
{
temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
if (dp.is_dram)
{
temp += bank.mat.delay_writeback; // temp stores random cycle time
double ram_delay_inside_mat;
if (dp.fully_assoc) {
//delay of FA contains both CAM tag and RAM data
{ //delay of CAM
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
access_time = htree_in_add->delay + bank.htree_in_add->delay;
//delay of fully-associative data array
access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
}
} else {
access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
}
if (dp.is_main_mem) {
double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
delay_from_subarray_out_drv_to_out;
access_time = t_rcd + cas_latency;
}
temp = MAX(temp, bank.mat.r_predec->delay);
temp = MAX(temp, bank.mat.b_mux_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
}
else
{
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
+ bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
double temp;
temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
}
if (!dp.fully_assoc) {
temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
if (dp.is_dram) {
temp += bank.mat.delay_writeback; // temp stores random cycle time
}
// The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
if (g_ip->rpters_in_htree == false)
{
temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
}
cycle_time = temp;
double delay_req_network = max_delay_before_row_decoder;
double delay_rep_network = delay_from_subarray_out_drv_to_out;
multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
temp = MAX(temp, bank.mat.r_predec->delay);
temp = MAX(temp, bank.mat.b_mux_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
} else {
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
+ bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
if (dp.is_main_mem)
{
multisubbank_interleave_cycle_time = htree_in_add->delay;
precharge_delay = htree_in_add->delay +
bank.htree_in_add->delay + bank.mat.delay_writeback +
bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
cycle_time = access_time + precharge_delay;
}
else
{
precharge_delay = 0;
}
temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
}
double dram_array_availability = 0;
if (dp.is_dram)
{
dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
}
// The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
if (g_ip->rpters_in_htree == false) {
temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
}
cycle_time = temp;
return outrisetime;
double delay_req_network = max_delay_before_row_decoder;
double delay_rep_network = delay_from_subarray_out_drv_to_out;
multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
if (dp.is_main_mem) {
multisubbank_interleave_cycle_time = htree_in_add->delay;
precharge_delay = htree_in_add->delay +
bank.htree_in_add->delay + bank.mat.delay_writeback +
bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
cycle_time = access_time + precharge_delay;
} else {
precharge_delay = 0;
}
double dram_array_availability = 0;
if (dp.is_dram) {
dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
}
return outrisetime;
}
// note: currently, power numbers are for a bank of an array
void UCA::compute_power_energy()
{
bank.compute_power_energy();
power = bank.power;
void UCA::compute_power_energy() {
bank.compute_power_energy();
power = bank.power;
power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
if (dp.fully_assoc || dp.pure_cam)
power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
if (dp.fully_assoc || dp.pure_cam)
power_routing_to_bank.searchOp.dynamic =
htree_in_search->power.searchOp.dynamic +
htree_out_search->power.searchOp.dynamic;
power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
htree_in_data->power.readOp.leakage +
htree_out_data->power.readOp.leakage;
power_routing_to_bank.readOp.leakage +=
htree_in_add->power.readOp.leakage +
htree_in_data->power.readOp.leakage +
htree_out_data->power.readOp.leakage;
power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
htree_in_data->power.readOp.gate_leakage +
htree_out_data->power.readOp.gate_leakage;
if (dp.fully_assoc || dp.pure_cam)
{
power_routing_to_bank.readOp.gate_leakage +=
htree_in_add->power.readOp.gate_leakage +
htree_in_data->power.readOp.gate_leakage +
htree_out_data->power.readOp.gate_leakage;
if (dp.fully_assoc || dp.pure_cam) {
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
}
}
power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
power.readOp.leakage += power_routing_to_bank.readOp.leakage;
power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
power.readOp.leakage += power_routing_to_bank.readOp.leakage;
power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
// calculate total write energy per access
power.writeOp.dynamic = power.readOp.dynamic
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- power_routing_to_bank.readOp.dynamic
+ power_routing_to_bank.writeOp.dynamic
+ bank.htree_in_data->power.readOp.dynamic
- bank.htree_out_data->power.readOp.dynamic;
// calculate total write energy per access
power.writeOp.dynamic = power.readOp.dynamic
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
- power_routing_to_bank.readOp.dynamic
+ power_routing_to_bank.writeOp.dynamic
+ bank.htree_in_data->power.readOp.dynamic
- bank.htree_out_data->power.readOp.dynamic;
if (dp.is_dram == false)
{
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
}
if (dp.is_dram == false) {
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
}
dyn_read_energy_from_closed_page = power.readOp.dynamic;
dyn_read_energy_from_open_page = power.readOp.dynamic -
(bank.mat.r_predec->power.readOp.dynamic +
bank.mat.power_row_decoders.readOp.dynamic +
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
bank.mat.power_sa.readOp.dynamic +
bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
dyn_read_energy_from_closed_page = power.readOp.dynamic;
dyn_read_energy_from_open_page = power.readOp.dynamic -
(bank.mat.r_predec->power.readOp.dynamic +
bank.mat.power_row_decoders.readOp.dynamic +
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
bank.mat.power_sa.readOp.dynamic +
bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
dyn_read_energy_remaining_words_in_burst =
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
bank.htree_out_data->power.readOp.dynamic +
power_routing_to_bank.readOp.dynamic);
dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
dyn_read_energy_remaining_words_in_burst =
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
bank.htree_out_data->power.readOp.dynamic +
power_routing_to_bank.readOp.dynamic);
dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
activate_energy = htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
(bank.mat.r_predec->power.readOp.dynamic +
bank.mat.power_row_decoders.readOp.dynamic +
bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
read_energy = (htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
bank.htree_out_data->power.readOp.dynamic +
htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
write_energy = (htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
htree_in_data->power.readOp.dynamic +
bank.htree_in_data->power.readOp.dynamic +
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
activate_energy = htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
(bank.mat.r_predec->power.readOp.dynamic +
bank.mat.power_row_decoders.readOp.dynamic +
bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
read_energy = (htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
bank.htree_out_data->power.readOp.dynamic +
htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
write_energy = (htree_in_add->power.readOp.dynamic +
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
htree_in_data->power.readOp.dynamic +
bank.htree_in_data->power.readOp.dynamic +
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
leak_power_subbank_closed_page =
(bank.mat.r_predec->power.readOp.leakage +
bank.mat.b_mux_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
bank.mat.power_row_decoders.readOp.leakage +
bank.mat.power_bit_mux_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_closed_page =
(bank.mat.r_predec->power.readOp.leakage +
bank.mat.b_mux_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
bank.mat.power_row_decoders.readOp.leakage +
bank.mat.power_bit_mux_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_closed_page +=
(bank.mat.r_predec->power.readOp.gate_leakage +
bank.mat.b_mux_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
bank.mat.power_row_decoders.readOp.gate_leakage +
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
//bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_closed_page +=
(bank.mat.r_predec->power.readOp.gate_leakage +
bank.mat.b_mux_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
bank.mat.power_row_decoders.readOp.gate_leakage +
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
//bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_open_page =
(bank.mat.r_predec->power.readOp.leakage +
bank.mat.b_mux_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
bank.mat.power_row_decoders.readOp.leakage +
bank.mat.power_bit_mux_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_open_page =
(bank.mat.r_predec->power.readOp.leakage +
bank.mat.b_mux_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
bank.mat.power_row_decoders.readOp.leakage +
bank.mat.power_bit_mux_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_open_page +=
(bank.mat.r_predec->power.readOp.gate_leakage +
bank.mat.b_mux_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
bank.mat.power_row_decoders.readOp.gate_leakage +
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
//bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
leak_power_subbank_open_page +=
(bank.mat.r_predec->power.readOp.gate_leakage +
bank.mat.b_mux_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
bank.mat.power_row_decoders.readOp.gate_leakage +
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
//bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
leak_power_request_and_reply_networks =
power_routing_to_bank.readOp.leakage +
bank.htree_in_add->power.readOp.leakage +
bank.htree_in_data->power.readOp.leakage +
bank.htree_out_data->power.readOp.leakage;
leak_power_request_and_reply_networks =
power_routing_to_bank.readOp.leakage +
bank.htree_in_add->power.readOp.leakage +
bank.htree_in_data->power.readOp.leakage +
bank.htree_out_data->power.readOp.leakage;
leak_power_request_and_reply_networks +=
power_routing_to_bank.readOp.gate_leakage +
bank.htree_in_add->power.readOp.gate_leakage +
bank.htree_in_data->power.readOp.gate_leakage +
bank.htree_out_data->power.readOp.gate_leakage;
leak_power_request_and_reply_networks +=
power_routing_to_bank.readOp.gate_leakage +
bank.htree_in_add->power.readOp.gate_leakage +
bank.htree_in_data->power.readOp.gate_leakage +
bank.htree_out_data->power.readOp.gate_leakage;
if (dp.fully_assoc || dp.pure_cam)
{
if (dp.fully_assoc || dp.pure_cam) {
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
}
if (dp.is_dram)
{ // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
refresh_power /= dp.dram_refresh_period;
}
if (dp.is_tag == false)
{
power.readOp.dynamic = dyn_read_energy_from_closed_page;
power.writeOp.dynamic = dyn_read_energy_from_closed_page
- dyn_read_energy_remaining_words_in_burst
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ (power_routing_to_bank.writeOp.dynamic -
power_routing_to_bank.readOp.dynamic -
bank.htree_out_data->power.readOp.dynamic +
bank.htree_in_data->power.readOp.dynamic) *
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
if (dp.is_dram == false)
{
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
}
}
// if DRAM, add refresh power to total leakage
if (dp.is_dram)
{
power.readOp.leakage += refresh_power;
}
// TODO: below should be avoided.
/*if (dp.is_main_mem)
{
power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
}*/
// if DRAM, add contribution of power spent in row predecoder drivers,
// blocks and decoders to refresh power
if (dp.is_dram) {
refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
refresh_power /= dp.dram_refresh_period;
}
assert(power.readOp.dynamic > 0);
assert(power.writeOp.dynamic > 0);
assert(power.readOp.leakage > 0);
if (dp.is_tag == false) {
power.readOp.dynamic = dyn_read_energy_from_closed_page;
power.writeOp.dynamic = dyn_read_energy_from_closed_page
- dyn_read_energy_remaining_words_in_burst
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
+ (power_routing_to_bank.writeOp.dynamic -
power_routing_to_bank.readOp.dynamic -
bank.htree_out_data->power.readOp.dynamic +
bank.htree_in_data->power.readOp.dynamic) *
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
if (dp.is_dram == false) {
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
}
}
// if DRAM, add refresh power to total leakage
if (dp.is_dram) {
power.readOp.leakage += refresh_power;
}
// TODO: below should be avoided.
/*if (dp.is_main_mem)
{
power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
}*/
assert(power.readOp.dynamic > 0);
assert(power.writeOp.dynamic > 0);
assert(power.readOp.leakage > 0);
}

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -40,9 +41,8 @@
#include "htree2.h"
#include "parameter.h"
class UCA : public Component
{
public:
class UCA : public Component {
public:
UCA(const DynamicParameter & dyn_p);
~UCA();
double compute_delays(double inrisetime); // returns outrisetime
@ -66,7 +66,10 @@ class UCA : public Component
int num_do_b_bank;
int num_si_b_bank;
int num_so_b_bank;
int RWP, ERP, EWP,SCHP;
int RWP;
int ERP;
int EWP;
int SCHP;
double area_all_dataramcells;
double dyn_read_energy_from_closed_page;

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -43,9 +44,8 @@
#include "component.h"
#include "parameter.h"
class Wire : public Component
{
public:
class Wire : public Component {
public:
Wire(enum Wire_type wire_model, double len /* in u*/,
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
double width_scaling = 1,
@ -56,16 +56,16 @@ class Wire : public Component
~Wire();
Wire( double width_scaling = 1,
double spacing_scaling = 1,
enum Wire_placement wire_placement = outside_mat,
double resistivity = CU_RESISTIVITY,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
); // should be used only once for initializing static members
double spacing_scaling = 1,
enum Wire_placement wire_placement = outside_mat,
double resistivity = CU_RESISTIVITY,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
); // should be used only once for initializing static members
void init_wire();
void calculate_wire_stats();
void delay_optimal_wire();
double wire_cap(double len, bool call_from_outside=false);
double wire_cap(double len, bool call_from_outside = false);
double wire_res(double len);
void low_swing_model();
double signal_fall_time();
@ -81,9 +81,8 @@ class Wire : public Component
double wire_length;
double in_rise_time, out_rise_time;
void set_in_rise_time(double rt)
{
in_rise_time = rt;
void set_in_rise_time(double rt) {
in_rise_time = rt;
}
static Component global;
static Component global_5;
@ -95,10 +94,10 @@ class Wire : public Component
static double wire_spacing_init;
void print_wire();
private:
private:
int nsense; // no. of sense amps connected to a low-swing wire if it
// is broadcasting data to multiple destinations
// is broadcasting data to multiple destinations
// width and spacing scaling factor can be used
// to model low level wires or special
// fat wires

65
ext/mcpat/common.h Normal file
View file

@ -0,0 +1,65 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Author: Yasuko Eckert
*
***************************************************************************/
#ifndef __COMMON_H__
#define __COMMON_H__
#include <string>
#include "xmlParser.h"
// Macro definitions to do string comparson to specific parameter/stat.
// Note: These macros assume node_name and value variables of type XMLCSTR
// to exist already.
#define STRCMP(var, str) else if (strcmp(var, str) == 0)
#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \
lhs = atoi(value)
#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \
lhs = atof(value)
#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \
lhs = string(value)
#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \
lhs = (etype)atoi(value)
// Constants shared across many system components
#define BITS_PER_BYTE 8.0
#define MIN_BUFFER_SIZE 64
// CAM structures do not have any associativity
#define CAM_ASSOC 0
#endif // __COMMON_H__

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -33,230 +34,305 @@
#ifndef CORE_H_
#define CORE_H_
#include "XML_Parse.h"
#include "array.h"
#include "basic_components.h"
#include "cacheunit.h"
#include "interconnect.h"
#include "logic.h"
#include "parameter.h"
#include "sharedcache.h"
class BranchPredictor :public Component {
public:
// Macros used in the various core-related classes
#define NUM_SOURCE_OPERANDS 2
#define NUM_INT_INST_SOURCE_OPERANDS 2
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
ArrayST * globalBPT;
ArrayST * localBPT;
ArrayST * L1_localBPT;
ArrayST * L2_localBPT;
ArrayST * chooser;
ArrayST * RAS;
bool exist;
BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~BranchPredictor();
class BranchPredictorParameters {
public:
int assoc;
int nbanks;
int local_l1_predictor_size;
int local_l2_predictor_size;
int local_predictor_entries;
int global_predictor_bits;
int global_predictor_entries;
int chooser_predictor_bits;
int chooser_predictor_entries;
};
class BranchPredictor : public McPATComponent {
public:
ArrayST* globalBPT;
ArrayST* localBPT;
ArrayST* L1_localBPT;
ArrayST* L2_localBPT;
ArrayST* chooser;
ArrayST* RAS;
class InstFetchU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
enum Cache_policy cache_p;
InstCache icache;
ArrayST * IB;
ArrayST * BTB;
BranchPredictor * BPT;
inst_decoder * ID_inst;
inst_decoder * ID_operand;
inst_decoder * ID_misc;
bool exist;
InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~InstFetchU();
};
class SchedulerU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double Iw_height, fp_Iw_height,ROB_height;
ArrayST * int_inst_window;
ArrayST * fp_inst_window;
ArrayST * ROB;
selection_logic * instruction_selection;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
BranchPredictorParameters branch_pred_params;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
bool exist;
SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~SchedulerU();
BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exsit = true);
void set_params_stats();
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~BranchPredictor();
};
class RENAMINGU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
double clockRate,executionTime;
CoreDynParam coredynp;
ArrayST * iFRAT;
ArrayST * fFRAT;
ArrayST * iRRAT;
ArrayST * fRRAT;
ArrayST * ifreeL;
ArrayST * ffreeL;
dep_resource_conflict_check * idcl;
dep_resource_conflict_check * fdcl;
ArrayST * RAHT;//register alias history table Used to store GC
bool exist;
RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~RENAMINGU();
class InstFetchParameters {
public:
int btb_size;
int btb_block_size;
int btb_assoc;
int btb_num_banks;
int btb_latency;
int btb_throughput;
int btb_rw_ports;
};
class LoadStoreU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
enum Cache_policy cache_p;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double lsq_height;
DataCache dcache;
ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
ArrayST * LoadQ;
bool exist;
LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~LoadStoreU();
class InstFetchStatistics {
public:
double btb_read_accesses;
double btb_write_accesses;
};
class MemManU :public Component {
public:
class InstFetchU : public McPATComponent {
public:
CacheUnit* icache;
ArrayST* IB;
ArrayST* BTB;
BranchPredictor* BPT;
InstructionDecoder* ID_inst;
InstructionDecoder* ID_operand;
InstructionDecoder* ID_misc;
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
ArrayST * itlb;
ArrayST * dtlb;
bool exist;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
InstFetchParameters inst_fetch_params;
InstFetchStatistics inst_fetch_stats;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
enum Cache_policy cache_p;
bool exist;
MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~MemManU();
};
class RegFU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double int_regfile_height, fp_regfile_height;
ArrayST * IRF;
ArrayST * FRF;
ArrayST * RFWIN;
bool exist;
RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~RegFU();
};
class EXECU :public Component {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double lsq_height;
CoreDynParam coredynp;
RegFU * rfu;
SchedulerU * scheu;
FunctionalUnit * fp_u;
FunctionalUnit * exeu;
FunctionalUnit * mul;
interconnect * int_bypass;
interconnect * intTagBypass;
interconnect * int_mul_bypass;
interconnect * intTag_mul_Bypass;
interconnect * fp_bypass;
interconnect * fpTagBypass;
Component bypass;
bool exist;
EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~EXECU();
InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exsit = true);
void set_params_stats();
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~InstFetchU();
};
class Core :public Component {
public:
class SchedulerU : public McPATComponent {
public:
static int ROB_STATUS_BITS;
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
InstFetchU * ifu;
LoadStoreU * lsu;
MemManU * mmu;
EXECU * exu;
RENAMINGU * rnu;
Pipeline * corepipe;
UndiffCore * undiffCore;
SharedCache * l2cache;
CoreDynParam coredynp;
//full_decoder inst_decoder;
//clock_network clockNetwork;
Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_);
void set_core_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~Core();
ArrayST* int_inst_window;
ArrayST* fp_inst_window;
ArrayST* ROB;
selection_logic* int_instruction_selection;
selection_logic* fp_instruction_selection;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double Iw_height, fp_Iw_height, ROB_height;
bool exist;
SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exist_ = true);
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~SchedulerU();
};
class RENAMINGU : public McPATComponent {
public:
ArrayST* iFRAT;
ArrayST* fFRAT;
ArrayST* iRRAT;
ArrayST* fRRAT;
ArrayST* ifreeL;
ArrayST* ffreeL;
dep_resource_conflict_check* idcl;
dep_resource_conflict_check* fdcl;
ArrayST* RAHT;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
bool exist;
RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exist_ = true);
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~RENAMINGU();
};
class LoadStoreU : public McPATComponent {
public:
CacheUnit* dcache;
ArrayST* LSQ;
ArrayST* LoadQ;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
enum Cache_policy cache_p;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double lsq_height;
bool exist;
LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exist_ = true);
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~LoadStoreU();
};
class MemoryManagementParams {
public:
int itlb_number_entries;
double itlb_latency;
double itlb_throughput;
int itlb_assoc;
int itlb_nbanks;
int dtlb_number_entries;
double dtlb_latency;
double dtlb_throughput;
int dtlb_assoc;
int dtlb_nbanks;
};
class MemoryManagementStats {
public:
double itlb_total_accesses;
double itlb_total_misses;
double itlb_conflicts;
double dtlb_read_accesses;
double dtlb_read_misses;
double dtlb_write_accesses;
double dtlb_write_misses;
double dtlb_conflicts;
};
class MemManU : public McPATComponent {
public:
ArrayST* itlb;
ArrayST* dtlb;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
MemoryManagementParams mem_man_params;
MemoryManagementStats mem_man_stats;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
bool exist;
MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_ = true);
void set_params_stats();
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~MemManU();
};
class RegFU : public McPATComponent {
public:
static int RFWIN_ACCESS_MULTIPLIER;
ArrayST* IRF;
ArrayST* FRF;
ArrayST* RFWIN;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double int_regfile_height, fp_regfile_height;
bool exist;
RegFU(XMLNode* _xml_data,
InputParameter* interface_ip_, const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exist_ = true);
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~RegFU();
};
class EXECU : public McPATComponent {
public:
RegFU* rfu;
SchedulerU* scheu;
FunctionalUnit* fp_u;
FunctionalUnit* exeu;
FunctionalUnit* mul;
Interconnect* int_bypass;
Interconnect* intTagBypass;
Interconnect* int_mul_bypass;
Interconnect* intTag_mul_Bypass;
Interconnect* fp_bypass;
Interconnect* fpTagBypass;
InputParameter interface_ip;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
double lsq_height;
CoreParameters core_params;
CoreStatistics core_stats;
bool exist;
EXECU(XMLNode* _xml_data, InputParameter* interface_ip_,
double lsq_height_, const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_ = true);
void computeEnergy();
void displayData(uint32_t indent = 0, int plevel = 100);
~EXECU();
};
class Core : public McPATComponent {
public:
InstFetchU* ifu;
LoadStoreU* lsu;
MemManU* mmu;
EXECU* exu;
RENAMINGU* rnu;
Pipeline* corepipe;
UndiffCore* undiffCore;
CacheUnit* l2cache;
int ithCore;
InputParameter interface_ip;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
CoreParameters core_params;
CoreStatistics core_stats;
// TODO: Migrate component ID handling into the XML data to remove this
// ithCore variable
Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_);
void initialize_params();
void initialize_stats();
void set_core_param();
void computeEnergy();
~Core();
};
#endif /* CORE_H_ */

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -33,130 +34,190 @@
#include <cassert>
#include <iostream>
#include "globalvar.h"
#include "basic_components.h"
#include "interconnect.h"
#include "wire.h"
interconnect::interconnect(
string name_,
enum Device_ty device_ty_,
double base_w, double base_h,
int data_w, double len,const InputParameter *configure_interface,
int start_wiring_level_,
bool pipelinable_ ,
double route_over_perc_ ,
bool opt_local_,
enum Core_type core_ty_,
enum Wire_type wire_model,
double width_s, double space_s,
TechnologyParameter::DeviceType *dt
)
:name(name_),
device_ty(device_ty_),
in_rise_time(0),
out_rise_time(0),
base_width(base_w),
base_height(base_h),
data_width(data_w),
wt(wire_model),
width_scaling(width_s),
space_scaling(space_s),
start_wiring_level(start_wiring_level_),
length(len),
//interconnect_latency(1e-12),
//interconnect_throughput(1e-12),
opt_local(opt_local_),
core_ty(core_ty_),
pipelinable(pipelinable_),
route_over_perc(route_over_perc_),
deviceType(dt)
{
double Interconnect::width_scaling_threshold = 3.0;
wt = Global;
l_ip=*configure_interface;
local_result = init_interface(&l_ip);
Interconnect::Interconnect(XMLNode* _xml_data, string name_,
enum Device_ty device_ty_, double base_w,
double base_h, int data_w,
double len,
const InputParameter *configure_interface,
int start_wiring_level_, double _clockRate,
bool pipelinable_, double route_over_perc_,
bool opt_local_, enum Core_type core_ty_,
enum Wire_type wire_model,
double width_s, double space_s,
TechnologyParameter::DeviceType *dt)
: McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0),
out_rise_time(0), base_width(base_w), base_height(base_h),
data_width(data_w), wt(wire_model), width_scaling(width_s),
space_scaling(space_s), start_wiring_level(start_wiring_level_),
length(len), opt_local(opt_local_), core_ty(core_ty_),
pipelinable(pipelinable_), route_over_perc(route_over_perc_),
deviceType(dt) {
name = name_;
clockRate = _clockRate;
l_ip = *configure_interface;
local_result = init_interface(&l_ip, name);
max_unpipelined_link_delay = 0; //TODO
min_w_nmos = g_tp.min_w_nmos_;
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
max_unpipelined_link_delay = 0;
min_w_nmos = g_tp.min_w_nmos_;
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
latency = l_ip.latency;
throughput = l_ip.throughput;
latency_overflow=false;
throughput_overflow=false;
latency = l_ip.latency;
throughput = l_ip.throughput;
latency_overflow = false;
throughput_overflow = false;
/*
* TODO: Add wiring option from semi-global to global automatically
* And directly jump to global if semi-global cannot satisfy timing
* Fat wires only available for global wires, thus
* if signal wiring layer starts from semi-global,
* the next layer up will be global, i.e., semi-global does
* not have fat wires.
*/
if (pipelinable == false)
//Non-pipelinable wires, such as bypass logic, care latency
{
compute();
if (opt_for_clk && opt_local)
{
while (delay > latency && width_scaling<3.0)
{
width_scaling *= 2;
space_scaling *= 2;
Wire winit(width_scaling, space_scaling);
compute();
}
if (delay > latency)
{
latency_overflow=true;
}
}
}
else //Pipelinable wires, such as bus, does not care latency but throughput
{
/*
* TODO: Add pipe regs power, area, and timing;
* Pipelinable wires optimize latency first.
*/
compute();
if (opt_for_clk && opt_local)
{
while (delay > throughput && width_scaling<3.0)
{
width_scaling *= 2;
space_scaling *= 2;
Wire winit(width_scaling, space_scaling);
compute();
}
if (delay > throughput)
// insert pipeline stages
{
num_pipe_stages = (int)ceil(delay/throughput);
assert(num_pipe_stages>0);
delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay;
}
}
}
if (pipelinable == false) {
//Non-pipelinable wires, such as bypass logic, care latency
calcWireData();
if (opt_for_clk && opt_local) {
while (delay > latency &&
width_scaling < width_scaling_threshold) {
width_scaling *= 2;
space_scaling *= 2;
Wire winit(width_scaling, space_scaling);
calcWireData();
}
if (delay > latency) {
latency_overflow = true;
}
}
} else {
//Pipelinable wires, such as bus, does not care latency but throughput
calcWireData();
if (opt_for_clk && opt_local) {
while (delay > throughput &&
width_scaling < width_scaling_threshold) {
width_scaling *= 2;
space_scaling *= 2;
Wire winit(width_scaling, space_scaling);
calcWireData();
}
if (delay > throughput) {
// insert pipeline stages
num_pipe_stages = (int)ceil(delay / throughput);
assert(num_pipe_stages > 0);
delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay;
}
}
}
power_bit = power;
power.readOp.dynamic *= data_width;
power.readOp.leakage *= data_width;
power.readOp.gate_leakage *= data_width;
area.set_area(area.get_area()*data_width);
no_device_under_wire_area.h *= data_width;
if (latency_overflow == true) {
cout << "Warning: " << name
<< " wire structure cannot satisfy latency constraint." << endl;
}
assert(power.readOp.dynamic > 0);
assert(power.readOp.leakage > 0);
assert(power.readOp.gate_leakage > 0);
double long_channel_device_reduction =
longer_channel_device_reduction(device_ty, core_ty);
double sckRation = g_tp.sckt_co_eff;
power.readOp.dynamic *= sckRation;
power.writeOp.dynamic *= sckRation;
power.searchOp.dynamic *= sckRation;
power.readOp.longer_channel_leakage =
power.readOp.leakage * long_channel_device_reduction;
//Only global wires has the option to choose whether routing over or not
if (pipelinable)
area.set_area(area.get_area() * route_over_perc +
no_device_under_wire_area.get_area() *
(1 - route_over_perc));
Wire wreset();
}
void
Interconnect::calcWireData() {
Wire *wtemp1 = 0;
wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
delay = wtemp1->delay;
power.readOp.dynamic = wtemp1->power.readOp.dynamic;
power.readOp.leakage = wtemp1->power.readOp.leakage;
power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
area.set_area(wtemp1->area.get_area());
no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
no_device_under_wire_area.w = length;
if (wtemp1)
delete wtemp1;
}
void
Interconnect::computeEnergy() {
double pppm_t[4] = {1, 1, 1, 1};
// Compute TDP
power_t.reset();
set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle,
int_params.active_ports, int_params.active_ports,
int_params.active_ports * int_stats.duty_cycle);
power_t = power * pppm_t;
rt_power.reset();
set_pppm(pppm_t, int_stats.accesses, int_params.active_ports,
int_params.active_ports, int_stats.accesses);
rt_power = power * pppm_t;
output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate;
output_data.subthreshold_leakage_power = power_t.readOp.leakage;
output_data.gate_leakage_power = power_t.readOp.gate_leakage;
output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
}
void
Interconnect::computeArea() {
output_data.area = area.get_area() / 1e6;
}
void
Interconnect::set_params_stats(double active_ports,
double duty_cycle, double accesses) {
int_params.active_ports = active_ports;
int_stats.duty_cycle = duty_cycle;
int_stats.accesses = accesses;
}
void Interconnect::leakage_feedback(double temperature) {
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
calcWireData();
power_bit = power;
power.readOp.dynamic *= data_width;
power.readOp.leakage *= data_width;
power.readOp.gate_leakage *= data_width;
area.set_area(area.get_area()*data_width);
no_device_under_wire_area.h *= data_width;
if (latency_overflow==true)
cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl;
assert(power.readOp.dynamic > 0);
assert(power.readOp.leakage > 0);
assert(power.readOp.gate_leakage > 0);
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
double long_channel_device_reduction =
longer_channel_device_reduction(device_ty,core_ty);
double sckRation = g_tp.sckt_co_eff;
power.readOp.dynamic *= sckRation;
@ -164,59 +225,6 @@ interconnect::interconnect(
power.searchOp.dynamic *= sckRation;
power.readOp.longer_channel_leakage =
power.readOp.leakage*long_channel_device_reduction;
if (pipelinable)//Only global wires has the option to choose whether routing over or not
area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc));
Wire wreset();
}
void
interconnect::compute()
{
Wire *wtemp1 = 0;
wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
delay = wtemp1->delay;
power.readOp.dynamic = wtemp1->power.readOp.dynamic;
power.readOp.leakage = wtemp1->power.readOp.leakage;
power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
area.set_area(wtemp1->area.get_area());
no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
no_device_under_wire_area.w = length;
if (wtemp1)
delete wtemp1;
}
void interconnect::leakage_feedback(double temperature)
{
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
compute();
power_bit = power;
power.readOp.dynamic *= data_width;
power.readOp.leakage *= data_width;
power.readOp.gate_leakage *= data_width;
assert(power.readOp.dynamic > 0);
assert(power.readOp.leakage > 0);
assert(power.readOp.gate_leakage > 0);
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
double sckRation = g_tp.sckt_co_eff;
power.readOp.dynamic *= sckRation;
power.writeOp.dynamic *= sckRation;
power.searchOp.dynamic *= sckRation;
power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
power.readOp.leakage*long_channel_device_reduction;
}

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -42,46 +43,31 @@
#include "subarray.h"
#include "wire.h"
// leakge power includes entire htree in a bank (when uca_tree == false)
// leakge power includes only part to one bank when uca_tree == true
class InterconnectParameters {
public:
double active_ports;
};
class interconnect : public Component
{
public:
interconnect(
string name_,
enum Device_ty device_ty_,
double base_w, double base_h, int data_w, double len,
const InputParameter *configure_interface, int start_wiring_level_,
bool pipelinable_ = false,
double route_over_perc_ =0.5,
bool opt_local_=true,
enum Core_type core_ty_=Inorder,
enum Wire_type wire_model=Global,
double width_s=1.0, double space_s=1.0,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
);
class InterconnectStatistics {
public:
double duty_cycle;
double accesses;
};
~interconnect() {};
class Interconnect : public McPATComponent {
public:
static double width_scaling_threshold;
void compute();
string name;
enum Device_ty device_ty;
enum Device_ty device_ty;
double in_rise_time, out_rise_time;
InputParameter l_ip;
uca_org_t local_result;
InputParameter l_ip;
uca_org_t local_result;
Area no_device_under_wire_area;
void set_in_rise_time(double rt)
{
in_rise_time = rt;
}
void leakage_feedback(double temperature);
double max_unpipelined_link_delay;
powerDef power_bit;
double wire_bw;
double init_wire_bw; // bus width at root
double init_wire_bw;
double base_width;
double base_height;
int data_width;
@ -92,19 +78,39 @@ class interconnect : public Component
double min_w_nmos;
double min_w_pmos;
double latency, throughput;
bool latency_overflow;
bool throughput_overflow;
double interconnect_latency;
double interconnect_throughput;
bool latency_overflow;
bool throughput_overflow;
double interconnect_latency;
double interconnect_throughput;
bool opt_local;
enum Core_type core_ty;
bool pipelinable;
double route_over_perc;
int num_pipe_stages;
private:
TechnologyParameter::DeviceType *deviceType;
int num_pipe_stages;
TechnologyParameter::DeviceType* deviceType;
InterconnectParameters int_params;
InterconnectStatistics int_stats;
Interconnect(XMLNode* _xml_data, string name_,
enum Device_ty device_ty_, double base_w,
double base_h, int data_w, double len,
const InputParameter *configure_interface,
int start_wiring_level_,
double _clockRate = 0.0f,
bool pipelinable_ = false, double route_over_perc_ = 0.5,
bool opt_local_ = true, enum Core_type core_ty_ = Inorder,
enum Wire_type wire_model = Global, double width_s = 1.0,
double space_s = 1.0,
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
private:
void calcWireData();
public:
void computeArea();
void computeEnergy();
void set_params_stats(double active_ports,
double duty_cycle, double accesses);
void leakage_feedback(double temperature);
~Interconnect() {};
};
#endif

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <algorithm>
@ -34,14 +35,12 @@
#include <iostream>
#include <string>
#include "XML_Parse.h"
#include "basic_circuit.h"
#include "basic_components.h"
#include "common.h"
#include "const.h"
#include "io.h"
#include "iocontrollers.h"
#include "logic.h"
#include "parameter.h"
/*
SUN Niagara 2 I/O power analysis:
@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F
*
*/
NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
:XML(XML_interface),
interface_ip(*interface_ip_)
{
local_result = init_interface(&interface_ip);
NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
: McPATComponent(_xml_data, interface_ip_) {
name = "NIU";
set_niu_param();
}
double frontend_area, phy_area, mac_area, SerDer_area;
double frontend_dyn, mac_dyn, SerDer_dyn;
double frontend_gates, mac_gates, SerDer_gates;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double NMOS_sizing, PMOS_sizing;
void NIUController::computeArea() {
double mac_area;
double frontend_area;
double SerDer_area;
set_niu_param();
if (niup.type == 0) { //high performance NIU
//Area estimation based on average of die photo from Niagara 2 and
//Cadence ChipEstimate using 65nm.
mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
//Area estimation based on average of die photo from Niagara 2, ISSCC
//"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
//With Robust VCO Tuning Technique" Frontend is PCS
frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
(interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
//Area estimation based on average of die photo from Niagara 2 and
//Cadence ChipEstimate hard IP @65nm.
//SerDer is very hard to scale
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
0.065);//* (interface_ip.F_sz_um/0.065);
} else {
//Low power implementations are mostly from Cadence ChipEstimator;
//Ignore the multiple IP effect
// ---When there are multiple IP (same kind or not) selected, Cadence
//ChipEstimator results are not a simple summation of all IPs.
//Ignore this effect
mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um/0.065);
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
//Transceiver and XAUI Interface With Robust VCO Tuning Technique"
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
//scale perfectly with the technology
}
if (niup.type == 0) //high performance NIU
{
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
//SerDer is very hard to scale
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
phy_area = frontend_area + SerDer_area;
//total area
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
//Power
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
//Cadence ChipEstimate using 65nm soft IP;
frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
//according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
//Cadence ChipEstimate using 65nm
mac_gates = 111700;
frontend_gates = 320000;
SerDer_gates = 200000;
NMOS_sizing = 5*g_tp.min_w_nmos_;
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
}
else
{//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
// ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
// a simple summation of all IPs. Ignore this effect
mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
//total area
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
//Power
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
//Cadence ChipEstimate using 65nm soft IP;
frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
//SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
mac_gates = 111700;
frontend_gates = 52000;
SerDer_gates = 199260;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
}
power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
//total area
output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
}
void NIUController::computeEnergy(bool is_tdp)
{
if (is_tdp)
{
void NIUController::computeEnergy() {
double mac_dyn;
double frontend_dyn;
double SerDer_dyn;
double frontend_gates;
double mac_gates;
double SerDer_gates;
double NMOS_sizing;
double PMOS_sizing;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
if (niup.type == 0) { //high performance NIU
//Power
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
//E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
//Cadence ChipEstimate using 65nm soft IP;
frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
//according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
power = power_t;
power.readOp.dynamic *= niup.duty_cycle;
//Cadence ChipEstimate using 65nm
mac_gates = 111700;
frontend_gates = 320000;
SerDer_gates = 200000;
NMOS_sizing = 5 * g_tp.min_w_nmos_;
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
} else {
//Power
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
/ 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
//Cadence ChipEstimate using 65nm soft IP;
frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
//SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
mac_gates = 111700;
frontend_gates = 52000;
SerDer_gates = 199260;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
}
else
{
rt_power = power_t;
rt_power.readOp.dynamic *= niup.perc_load;
//covert to energy per clock cycle of whole NIU
SerDer_dyn /= niup.clockRate;
power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction =
longer_channel_device_reduction(Uncore_device);
power.readOp.longer_channel_leakage =
power.readOp.leakage * long_channel_device_reduction;
power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
// Output power
output_data.subthreshold_leakage_power =
longer_channel_device ? power.readOp.longer_channel_leakage :
power.readOp.leakage;
output_data.gate_leakage_power = power.readOp.gate_leakage;
output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
}
void NIUController::set_niu_param() {
int num_children = xml_data->nChildNode("param");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
ASSIGN_INT_IF("num_units", niup.num_units);
ASSIGN_INT_IF("type", niup.type);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
niup.clockRate *= 1e6;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
ASSIGN_FP_IF("perc_load", nius.perc_load);
else {
warnUnrecognizedStat(node_name);
}
}
}
void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << "NIU:" << endl;
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl;
cout << indent_str<< "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
cout<<endl;
}
else
{
}
PCIeController::PCIeController(XMLNode* _xml_data,
InputParameter* interface_ip_)
: McPATComponent(_xml_data, interface_ip_) {
name = "PCIe";
set_pcie_param();
}
void NIUController::set_niu_param()
{
niup.clockRate = XML->sys.niu.clockrate;
niup.clockRate *= 1e6;
niup.num_units = XML->sys.niu.number_units;
niup.duty_cycle = XML->sys.niu.duty_cycle;
niup.perc_load = XML->sys.niu.total_load_perc;
niup.type = XML->sys.niu.type;
// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
void PCIeController::computeArea() {
double ctrl_area;
double SerDer_area;
/* Assuming PCIe is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
if (pciep.type == 0) { //high performance PCIe
//Area estimation based on average of die photo from Niagara 2 and
//Cadence ChipEstimate @ 65nm.
ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
//Area estimation based on average of die photo from Niagara 2 and
//Cadence ChipEstimate hard IP @65nm.
//SerDer is very hard to scale
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
0.065);//* (interface_ip.F_sz_um/0.065);
} else {
ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
//Area estimation based on average of die photo from Niagara 2, and
//Cadence ChipEstimate @ 65nm.
SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
}
// Total area
output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
pciep.num_channels) * 1e6;
}
PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
:XML(XML_interface),
interface_ip(*interface_ip_)
{
local_result = init_interface(&interface_ip);
double frontend_area, phy_area, ctrl_area, SerDer_area;
double ctrl_dyn, frontend_dyn, SerDer_dyn;
double ctrl_gates,frontend_gates, SerDer_gates;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double NMOS_sizing, PMOS_sizing;
void PCIeController::computeEnergy() {
double ctrl_dyn;
double SerDer_dyn;
double ctrl_gates;
double SerDer_gates = 0;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double NMOS_sizing;
double PMOS_sizing;
/* Assuming PCIe is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
/* Assuming PCIe is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
set_pcie_param();
if (pciep.type == 0) //high performance NIU
{
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
//SerDer is very hard to scale
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
phy_area = frontend_area + SerDer_area;
//total area
//Power
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
// //Cadence ChipEstimate using 65nm soft IP;
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
if (pciep.type == 0) { //high performance PCIe
//Power
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
// //Cadence ChipEstimate using 65nm soft IP;
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
//PCIe 2.0 max per lane speed is 4Gb/s
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
//power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
//Cadence ChipEstimate using 65nm
ctrl_gates = 900000/8*pciep.num_channels;
// frontend_gates = 120000/8;
// SerDer_gates = 200000/8;
NMOS_sizing = 5*g_tp.min_w_nmos_;
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
}
else
{
ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//total area
//Power
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
// //Cadence ChipEstimate using 65nm soft IP;
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
//Cadence ChipEstimate using 65nm
ctrl_gates = 900000 / 8 * pciep.num_channels;
// frontend_gates = 120000/8;
// SerDer_gates = 200000/8;
NMOS_sizing = 5 * g_tp.min_w_nmos_;
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
} else {
//Power
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
// //Cadence ChipEstimate using 65nm soft IP;
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
//PCIe 2.0 max per lane speed is 4Gb/s
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
//Cadence ChipEstimate using 65nm
ctrl_gates = 200000/8*pciep.num_channels;
// frontend_gates = 120000/8;
SerDer_gates = 200000/8*pciep.num_channels;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
}
area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
}
void PCIeController::computeEnergy(bool is_tdp)
{
if (is_tdp)
{
power = power_t;
power.readOp.dynamic *= pciep.duty_cycle;
//Cadence ChipEstimate using 65nm
ctrl_gates = 200000 / 8 * pciep.num_channels;
// frontend_gates = 120000/8;
SerDer_gates = 200000 / 8 * pciep.num_channels;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
}
else
{
rt_power = power_t;
rt_power.readOp.dynamic *= pciep.perc_load;
//covert to energy per clock cycle
SerDer_dyn /= pciep.clockRate;
power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
pciep.num_channels;
power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction =
longer_channel_device_reduction(Uncore_device);
power.readOp.longer_channel_leakage =
power.readOp.leakage * long_channel_device_reduction;
power.readOp.gate_leakage = (ctrl_gates +
(pciep.withPHY ? SerDer_gates : 0)) *
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
// Output power
output_data.subthreshold_leakage_power =
longer_channel_device ? power.readOp.longer_channel_leakage :
power.readOp.leakage;
output_data.gate_leakage_power = power.readOp.gate_leakage;
output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
output_data.runtime_dynamic_energy =
power.readOp.dynamic * pcies.perc_load;
}
void PCIeController::set_pcie_param() {
int num_children = xml_data->nChildNode("param");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
ASSIGN_INT_IF("num_units", pciep.num_units);
ASSIGN_INT_IF("num_channels", pciep.num_channels);
ASSIGN_INT_IF("type", pciep.type);
ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
pciep.clockRate *= 1e6;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
ASSIGN_FP_IF("perc_load", pcies.perc_load);
else {
warnUnrecognizedStat(node_name);
}
}
}
void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << "PCIe:" << endl;
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl;
cout << indent_str<< "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
cout<<endl;
}
else
{
}
FlashController::FlashController(XMLNode* _xml_data,
InputParameter* interface_ip_)
: McPATComponent(_xml_data, interface_ip_) {
name = "Flash Controller";
set_fc_param();
}
void PCIeController::set_pcie_param()
{
pciep.clockRate = XML->sys.pcie.clockrate;
pciep.clockRate *= 1e6;
pciep.num_units = XML->sys.pcie.number_units;
pciep.num_channels = XML->sys.pcie.num_channels;
pciep.duty_cycle = XML->sys.pcie.duty_cycle;
pciep.perc_load = XML->sys.pcie.total_load_perc;
pciep.type = XML->sys.pcie.type;
pciep.withPHY = XML->sys.pcie.withPHY;
// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
void FlashController::computeArea() {
double ctrl_area;
double SerDer_area;
}
FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
:XML(XML_interface),
interface_ip(*interface_ip_)
{
local_result = init_interface(&interface_ip);
double frontend_area, phy_area, ctrl_area, SerDer_area;
double ctrl_dyn, frontend_dyn, SerDer_dyn;
double ctrl_gates,frontend_gates, SerDer_gates;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double NMOS_sizing, PMOS_sizing;
/* Assuming PCIe is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
set_fc_param();
if (fcp.type == 0) //high performance NIU
{
cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
exit(0);
NMOS_sizing = 5*g_tp.min_w_nmos_;
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
}
else
{
ctrl_area = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
//speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
ctrl_gates = 129267;
SerDer_gates = 200000/8;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
//Power
//Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
ctrl_dyn = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
//max Per controller speed is 1.6Gb/s (200MB/s)
}
double number_channel = 1+(fcp.num_channels-1)*0.2;
area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
}
void FlashController::computeEnergy(bool is_tdp)
{
if (is_tdp)
{
power = power_t;
power.readOp.dynamic *= fcp.duty_cycle;
/* Assuming Flash is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
if (fcp.type == 0) { //high performance flash controller
cout << "Current McPAT does not support high performance flash "
<< "controller since even low power designs are enough for "
<< "maintain throughput" <<endl;
exit(0);
} else {
ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
//from CAST
SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
(interface_ip.F_sz_um / 0.065);
}
else
{
rt_power = power_t;
rt_power.readOp.dynamic *= fcp.perc_load;
}
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
1e6 * number_channel;
}
void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
void FlashController::computeEnergy() {
double ctrl_dyn;
double SerDer_dyn;
double ctrl_gates;
double SerDer_gates;
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
double NMOS_sizing;
double PMOS_sizing;
if (is_tdp)
{
cout << "Flash Controller:" << endl;
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
cout << indent_str<< "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
cout<<endl;
}
else
{
/* Assuming Flash is bit-slice based architecture
* This is the reason for /8 in both area and power calculation
* to get per lane numbers
*/
}
if (fcp.type == 0) { //high performance flash controller
cout << "Current McPAT does not support high performance flash "
<< "controller since even low power designs are enough for "
<< "maintain throughput" <<endl;
exit(0);
NMOS_sizing = 5 * g_tp.min_w_nmos_;
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
} else {
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
//support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
//This is already saturate the 200MB/s of the flash controller core
//above.
ctrl_gates = 129267;
SerDer_gates = 200000 / 8;
NMOS_sizing = g_tp.min_w_nmos_;
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
//Power
//Cadence ChipEstimate using 65nm the controller 125mW for every
//200MB/s This is power not energy!
ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
1.1 * (interface_ip.F_sz_nm / 65.0);
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
//max Per controller speed is 1.6Gb/s (200MB/s)
}
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
number_channel;
power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
number_channel) *
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
double long_channel_device_reduction =
longer_channel_device_reduction(Uncore_device);
power.readOp.longer_channel_leakage =
power.readOp.leakage * long_channel_device_reduction;
power.readOp.gate_leakage =
((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
g_tp.peri_global.Vdd;//unit W
// Output power
output_data.subthreshold_leakage_power =
longer_channel_device ? power.readOp.longer_channel_leakage :
power.readOp.leakage;
output_data.gate_leakage_power = power.readOp.gate_leakage;
output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
}
void FlashController::set_fc_param()
{
// fcp.clockRate = XML->sys.flashc.mc_clock;
// fcp.clockRate *= 1e6;
fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
fcp.num_channels = ceil(fcp.peakDataTransferRate/200);
fcp.num_mcs = XML->sys.flashc.number_mcs;
fcp.duty_cycle = XML->sys.flashc.duty_cycle;
fcp.perc_load = XML->sys.flashc.total_load_perc;
fcp.type = XML->sys.flashc.type;
fcp.withPHY = XML->sys.flashc.withPHY;
// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
int num_children = xml_data->nChildNode("param");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("num_channels", fcp.num_channels);
ASSIGN_INT_IF("type", fcp.type);
ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
else {
warnUnrecognizedParam(node_name);
}
}
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
ASSIGN_FP_IF("perc_load", fcs.perc_load);
else {
warnUnrecognizedStat(node_name);
}
}
}

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,63 +26,52 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef IOCONTROLLERS_H_
#define IOCONTROLLERS_H_
#endif /* IOCONTROLLERS_H_ */
#include "XML_Parse.h"
#include "parameter.h"
//#include "io.h"
#include "array.h"
//#include "Undifferentiated_Core_Area.h"
#include <vector>
#include "array.h"
#include "basic_components.h"
#include "parameter.h"
class NIUController : public Component {
class NIUController : public McPATComponent {
public:
ParseXML *XML;
InputParameter interface_ip;
NIUParam niup;
powerDef power_t;
uca_org_t local_result;
NIUController(ParseXML *XML_interface,InputParameter* interface_ip_);
NIUParameters niup;
NIUStatistics nius;
NIUController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_niu_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
void computeArea();
void computeEnergy();
~NIUController(){};
};
class PCIeController : public Component {
class PCIeController : public McPATComponent {
public:
ParseXML *XML;
InputParameter interface_ip;
PCIeParam pciep;
powerDef power_t;
uca_org_t local_result;
PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_);
PCIeParameters pciep;
PCIeStatistics pcies;
PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_pcie_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
void computeArea();
void computeEnergy();
~PCIeController(){};
};
class FlashController : public Component {
class FlashController : public McPATComponent {
public:
ParseXML *XML;
InputParameter interface_ip;
MCParam fcp;
powerDef power_t;
uca_org_t local_result;
FlashController(ParseXML *XML_interface,InputParameter* interface_ip_);
MCParameters fcp;
MCStatistics fcs;
FlashController(XMLNode* _xml_data, InputParameter* interface_ip_);
void set_fc_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
void computeArea();
void computeEnergy();
~FlashController(){};
};
#endif /* IOCONTROLLERS_H_ */

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,18 +26,16 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef LOGIC_H_
#define LOGIC_H_
#include <cassert>
#include <cmath>
#include <cstring>
#include <iostream>
#include "XML_Parse.h"
#include "arch_const.h"
#include "basic_circuit.h"
#include "basic_components.h"
@ -49,185 +48,190 @@
using namespace std;
class selection_logic : public Component{
class selection_logic : public McPATComponent {
public:
selection_logic(bool _is_default, int win_entries_,
int issue_width_, const InputParameter *configure_interface,
enum Device_ty device_ty_=Core_device,
enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface);
bool is_default;
InputParameter l_ip;
uca_org_t local_result;
const ParseXML *XML_interface;
int win_entries;
int issue_width;
int num_threads;
enum Device_ty device_ty;
enum Core_type core_ty;
bool is_default;
InputParameter l_ip;
uca_org_t local_result;
int win_entries;
int issue_width;
double accesses;
int num_threads;
enum Device_ty device_ty;
enum Core_type core_ty;
void selection_power();
selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries,
int issue_width_, const InputParameter* configure_interface,
string _name, double _accesses,
double clockRate_ = 0.0f,
enum Device_ty device_ty_ = Core_device,
enum Core_type core_ty_ = Inorder);
void computeArea();
void computeEnergy();
void leakage_feedback(double temperature); // TODO
// TODO: Add a deconstructor
};
class dep_resource_conflict_check : public Component{
class dep_resource_conflict_check : public McPATComponent {
public:
dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true);
InputParameter l_ip;
uca_org_t local_result;
double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
CoreDynParam coredynp;
int compare_bits;
bool is_default;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
InputParameter l_ip;
uca_org_t local_result;
double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
CoreParameters coredynp;
int compare_bits;
bool is_default;
statsDef stats_t;
void conflict_check_power();
double compare_cap();
~dep_resource_conflict_check(){
local_result.cleanup();
}
dep_resource_conflict_check(XMLNode* _xml_data, const string _name,
const InputParameter *configure_interface,
const CoreParameters & dyn_p_, int compare_bits_,
double clockRate_ = 0.0f,
bool _is_default = true);
void conflict_check_power();
double compare_cap();
void computeEnergy() {};
~dep_resource_conflict_check() {
local_result.cleanup();
}
void leakage_feedback(double temperature);
};
class inst_decoder: public Component{
class InstructionDecoder: public McPATComponent {
public:
inst_decoder(bool _is_default, const InputParameter *configure_interface,
int opcode_length_,
int num_decoders_,
bool x86_,
enum Device_ty device_ty_=Core_device,
enum Core_type core_ty_=Inorder);
inst_decoder();
bool is_default;
int opcode_length;
int num_decoders;
bool x86;
int num_decoder_segments;
int num_decoded_signals;
InputParameter l_ip;
uca_org_t local_result;
enum Device_ty device_ty;
enum Core_type core_ty;
Decoder* final_dec;
Predec* pre_dec;
Decoder * final_dec;
Predec * pre_dec;
bool is_default;
int opcode_length;
int num_decoders;
bool x86;
int num_decoder_segments;
int num_decoded_signals;
InputParameter l_ip;
uca_org_t local_result;
enum Device_ty device_ty;
enum Core_type core_ty;
statsDef stats_t;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
void inst_decoder_delay_power();
~inst_decoder();
InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default,
const InputParameter *configure_interface,
int opcode_length_, int num_decoders_, bool x86_,
double clockRate_ = 0.0f,
enum Device_ty device_ty_ = Core_device,
enum Core_type core_ty_ = Inorder);
InstructionDecoder();
void computeEnergy() {};
void inst_decoder_delay_power();
~InstructionDecoder();
void leakage_feedback(double temperature);
};
// TODO: This should be defined elsewhere? This isn't a true McPATComponent
class DFFCell : public Component {
public:
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load,
const InputParameter *configure_interface);
InputParameter l_ip;
bool is_dram;
double cell_load;
double WdecNANDn;
double WdecNANDp;
double clock_cap;
int model;
int n_switch;
int n_keep_1;
int n_keep_0;
int n_clock;
powerDef e_switch;
powerDef e_keep_1;
powerDef e_keep_0;
powerDef e_clock;
InputParameter l_ip;
bool is_dram;
double cell_load;
double WdecNANDn;
double WdecNANDp;
double clock_cap;
int model;
int n_switch;
int n_keep_1;
int n_keep_0;
int n_clock;
powerDef e_switch;
powerDef e_keep_1;
powerDef e_keep_0;
powerDef e_clock;
double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
void compute_DFF_cell(void);
};
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load,
const InputParameter *configure_interface);
double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
void compute_DFF_cell(void);
~DFFCell() {};
};
class Pipeline : public Component{
// TODO: This is a very ambiguous component. Try to refactor it.
class Pipeline : public McPATComponent {
public:
Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true);
InputParameter l_ip;
uca_org_t local_result;
CoreDynParam coredynp;
enum Device_ty device_ty;
bool is_core_pipeline, is_default;
double num_piperegs;
// int pipeline_stages;
// int tot_stage_vector, per_stage_vector;
bool process_ind;
double WNANDn ;
double WNANDp;
double load_per_pipeline_stage;
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
// bool thread_clock_gated;
// bool in_order, multithreaded;
void compute_stage_vector();
void compute();
~Pipeline(){
local_result.cleanup();
};
InputParameter l_ip;
uca_org_t local_result;
CoreParameters coredynp;
enum Device_ty device_ty;
bool is_core_pipeline, is_default;
double num_piperegs;
bool process_ind;
double WNANDn;
double WNANDp;
double load_per_pipeline_stage;
Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface,
const CoreParameters & dyn_p_,
enum Device_ty device_ty_ = Core_device,
bool _is_core_pipeline = true, bool _is_default = true);
void compute_stage_vector();
/**
* TODO: compute() completes work that should be completed in computeArea()
* and computeEnergy() recursively. Consider shifting these calculations
* around to be consistent with rest of hierarchy
*/
void compute();
void computeArea() {};
// TODO: Move energy computation to this function to unify hierarchy
void computeEnergy() {};
~Pipeline() {
local_result.cleanup();
};
};
//class core_pipeline :public pipeline{
//public:
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
// bool thread_clock_gated;
// bool in_order, multithreaded;
// core_pipeline(bool _is_default, const InputParameter *configure_interface);
// virtual void compute_stage_vector();
//
//};
class FunctionalUnit :public Component{
class FunctionalUnit : public McPATComponent {
public:
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double FU_height;
double clockRate,executionTime;
double num_fu;
double energy, base_energy,per_access_energy, leakage, gate_leakage;
bool is_default;
enum FU_type fu_type;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
InputParameter interface_ip;
CoreParameters core_params;
CoreStatistics core_stats;
double FU_height;
double num_fu;
double energy;
double base_energy;
double per_access_energy;
bool is_default;
enum FU_type fu_type;
statsDef stats_t;
FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, enum FU_type fu_type);
void computeEnergy();
void leakage_feedback(double temperature);
~FunctionalUnit() {};
};
class UndiffCore :public Component{
// TODO: This is a very ambiguous component. Try to refactor it.
class UndiffCore : public McPATComponent {
public:
UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false);
ParseXML *XML;
int ithCore;
InputParameter interface_ip;
CoreDynParam coredynp;
double clockRate,executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
enum Core_type core_ty;
bool opt_performance, embedded;
double pipeline_stage,num_hthreads,issue_width;
bool is_default;
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~UndiffCore(){};
bool exist;
InputParameter interface_ip;
CoreParameters coredynp;
double scktRatio;
double chip_PR_overhead;
double macro_PR_overhead;
enum Core_type core_ty;
bool opt_performance;
bool embedded;
double pipeline_stage;
double num_hthreads;
double issue_width;
bool is_default;
bool exist;
UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & dyn_p_,
bool exist_ = true);
void computeArea() {};
// TODO: Move energy computation to this function to unify hierarchy
void computeEnergy() {};
~UndiffCore() {};
};
#endif /* LOGIC_H_ */

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,15 +26,17 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <sys/stat.h>
#include <cassert>
#include <iostream>
#include "XML_Parse.h"
#include "globalvar.h"
#include "basic_components.h"
#include "io.h"
#include "processor.h"
#include "system.h"
#include "version.h"
#include "xmlParser.h"
@ -41,61 +44,68 @@ using namespace std;
void print_usage(char * argv0);
int main(int argc,char *argv[])
{
char * fb ;
bool infile_specified = false;
int plevel = 2;
opt_for_clk =true;
//cout.precision(10);
if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help"))
{
print_usage(argv[0]);
int main(int argc, char *argv[]) {
char* xml_file = NULL;
int plevel = 2;
for (int32_t i = 0; i < argc; i++) {
if (argv[i] == string("-infile")) {
xml_file = argv[++i];
} else if (argv[i] == string("-print_level")) {
plevel = atoi(argv[++i]);
} else if (argv[i] == string("-opt_for_clk")) {
McPATComponent::opt_for_clk = (bool)atoi(argv[++i]);
}
}
for (int32_t i = 0; i < argc; i++)
{
if (argv[i] == string("-infile"))
{
infile_specified = true;
i++;
fb = argv[ i];
}
// Ensure that the XML file was specified
if (xml_file == NULL) {
cerr << "ERROR: Please specify infile\n\n";
print_usage(argv[0]);
}
if (argv[i] == string("-print_level"))
{
i++;
plevel = atoi(argv[i]);
}
// Ensure that the XML file exists
struct stat file_info;
if (stat(xml_file, &file_info)) {
cerr << "ERROR: File not found: " << xml_file << endl << endl;
print_usage(argv[0]);
}
if (argv[i] == string("-opt_for_clk"))
{
i++;
opt_for_clk = (bool)atoi(argv[i]);
}
}
if (infile_specified == false)
{
print_usage(argv[0]);
}
cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR
<< " of " << VER_UPDATE << ") is computing the target processor...\n "
<< endl;
// Parse the XML input file
XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component");
unsigned int num_children = xml_data.nChildNode("component");
assert(num_children == 1);
XMLNode system_xml = xml_data.getChildNode("component");
assert(strcmp(system_xml.getAttribute("type"), "System") == 0);
cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
<< " of " << VER_UPDATE << ") is computing the target processor...\n "<<endl;
// Recursively instantiate the system hierarchy
System* system = new System(&system_xml);
// Recursively compute chip area
system->computeArea();
// Recursively compute the power consumed
system->computeEnergy();
// Recursively output the computed values
system->displayData(2, plevel);
// Clean up
delete system;
return 0;
//parse XML-based interface
ParseXML *p1= new ParseXML();
p1->parse(fb);
Processor proc(p1);
proc.displayEnergy(2, plevel);
delete p1;
return 0;
}
void print_usage(char * argv0)
{
void print_usage(char * argv0) {
cerr << "How to use McPAT:" << endl;
cerr << " mcpat -infile <input file name> -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl;
//cerr << " Note:default print level is at processor level, please increase it to see the details" << endl;
cerr << " mcpat -infile <input file name> -print_level < "
<< "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P "
<< "only)/1 (optimzed for target clock rate)>" << endl;
exit(1);
}

View file

@ -29,13 +29,16 @@ VPATH = cacti
SRCS = \
Ucache.cc \
XML_Parse.cc \
arbiter.cc \
area.cc \
array.cc \
bank.cc \
basic_circuit.cc \
basic_components.cc \
bus_interconnect.cc \
cachearray.cc \
cachecontroller.cc \
cacheunit.cc \
cacti_interface.cc \
component.cc \
core.cc \
@ -52,14 +55,13 @@ SRCS = \
noc.cc \
nuca.cc \
parameter.cc \
processor.cc \
router.cc \
sharedcache.cc \
subarray.cc \
system.cc \
technology.cc \
uca.cc \
wire.cc \
xmlParser.cc
xmlParser.cc
OBJS = $(patsubst %.cc,$(ODIR)/obj_$(TAG)/%.o,$(SRCS))

View file

@ -1,81 +0,0 @@
TARGET = mcpatXeonCore
SHELL = /bin/sh
.PHONY: all depend clean
.SUFFIXES: .cc .o
ifndef NTHREADS
NTHREADS = 4
endif
LIBS =
INCS = -lm
ifeq ($(TAG),dbg)
DBG = -Wall
OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti
else
DBG =
OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti
#OPT = -O0 -DNTHREADS=$(NTHREADS)
endif
#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
CXX = g++ -m32
CC = gcc -m32
VPATH = cacti
SRCS = \
Ucache.cc \
XML_Parse.cc \
arbiter.cc \
area.cc \
array.cc \
bank.cc \
basic_circuit.cc \
basic_components.cc \
cacti_interface.cc \
component.cc \
core.cc \
crossbar.cc \
decoder.cc \
htree2.cc \
interconnect.cc \
io.cc \
iocontrollers.cc \
logic.cc \
main.cc \
mat.cc \
memoryctrl.cc \
noc.cc \
nuca.cc \
parameter.cc \
processor.cc \
router.cc \
sharedcache.cc \
subarray.cc \
technology_xeon_core.cc \
uca.cc \
wire.cc \
xmlParser.cc
OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
all: obj_$(TAG)/$(TARGET)
cp -f obj_$(TAG)/$(TARGET) $(TARGET)
obj_$(TAG)/$(TARGET) : $(OBJS)
$(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
#obj_$(TAG)/%.o : %.cc
# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
obj_$(TAG)/%.o : %.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
clean:
-rm -f *.o $(TARGET)

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,89 +26,75 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef MEMORYCTRL_H_
#define MEMORYCTRL_H_
#include "XML_Parse.h"
#include "parameter.h"
//#include "io.h"
#include "array.h"
//#include "Undifferentiated_Core_Area.h"
#include <vector>
#include "basic_components.h"
#include "cachearray.h"
#include "parameter.h"
class MCBackend : public Component {
public:
class MCBackend : public McPATComponent {
public:
InputParameter l_ip;
uca_org_t local_result;
enum MemoryCtrl_type mc_type;
MCParam mcp;
statsDef tdp_stats;
statsDef rtp_stats;
MCParameters mcp;
MCStatistics mcs;
statsDef stats_t;
powerDef power_t;
MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
void compute();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~MCBackend(){};
MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
const MCParameters & mcp_, const MCStatistics & mcs_);
void computeArea();
void computeEnergy();
~MCBackend() {};
};
class MCPHY : public Component {
public:
class MCPHY : public McPATComponent {
public:
InputParameter l_ip;
uca_org_t local_result;
enum MemoryCtrl_type mc_type;
MCParam mcp;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
void compute();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~MCPHY(){};
MCParameters mcp;
MCStatistics mcs;
statsDef stats_t;
MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
const MCParameters & mcp_, const MCStatistics & mcs_);
void computeArea();
void computeEnergy();
~MCPHY() {};
};
class MCFrontEnd : public Component {
public:
ParseXML *XML;
InputParameter interface_ip;
enum MemoryCtrl_type mc_type;
MCParam mcp;
selection_logic * MC_arb;
ArrayST * frontendBuffer;
ArrayST * readBuffer;
ArrayST * writeBuffer;
class MCFrontEnd : public McPATComponent {
public:
CacheArray* frontendBuffer;
CacheArray* readBuffer;
CacheArray* writeBuffer;
selection_logic* MC_arb;
MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
InputParameter interface_ip;
MCParameters mcp;
MCStatistics mcs;
MCFrontEnd(XMLNode* _xml_data,
InputParameter* interface_ip_, const MCParameters & mcp_,
const MCStatistics & mcs_);
~MCFrontEnd();
};
class MemoryController : public Component {
public:
ParseXML *XML;
InputParameter interface_ip;
enum MemoryCtrl_type mc_type;
MCParam mcp;
MCFrontEnd * frontend;
MCBackend * transecEngine;
MCPHY * PHY;
Pipeline * pipeLogic;
class MemoryController : public McPATComponent {
public:
InputParameter interface_ip;
MCParameters mcp;
MCStatistics mcs;
//clock_network clockNetwork;
MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_);
MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_);
void initialize_params();
void set_mc_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~MemoryController();
};
#endif /* MEMORYCTRL_H_ */

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,7 +26,7 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
@ -35,321 +36,236 @@
#include <iostream>
#include <string>
#include "XML_Parse.h"
#include "basic_circuit.h"
#include "common.h"
#include "const.h"
#include "io.h"
#include "noc.h"
#include "parameter.h"
NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_)
:XML(XML_interface),
ithNoC(ithNoC_),
interface_ip(*interface_ip_),
router(0),
link_bus(0),
link_bus_exist(false),
router_exist(false),
M_traffic_pattern(M_traffic_pattern_)
{
/*
* initialize, compute and optimize individual components.
*/
OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
InputParameter* interface_ip_)
: McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_),
interface_ip(*interface_ip_), link_bus_exist(false),
router_exist(false) {
name = "On-Chip Network";
set_param_stats();
local_result = init_interface(&interface_ip, name);
scktRatio = g_tp.sckt_co_eff;
if (XML->sys.Embedded)
{
interface_ip.wt =Global_30;
interface_ip.wire_is_mat_type = 0;
interface_ip.wire_os_mat_type = 1;
}
else
{
interface_ip.wt =Global;
interface_ip.wire_is_mat_type = 2;
interface_ip.wire_os_mat_type = 2;
}
set_noc_param();
local_result=init_interface(&interface_ip);
scktRatio = g_tp.sckt_co_eff;
// TODO: Routers and links should be children of the NOC component
if (noc_params.type) {
init_router();
} else {
init_link_bus();
}
}
if (nocdynp.type)
{/*
* if NOC compute router, router links must be computed separately
* and called from external
* since total chip area must be known first
*/
init_router();
void OnChipNetwork::init_router() {
router = new Router(noc_params.flit_size,
noc_params.virtual_channel_per_port *
noc_params.input_buffer_entries_per_vc,
noc_params.virtual_channel_per_port,
&(g_tp.peri_global),
noc_params.input_ports, noc_params.output_ports,
noc_params.M_traffic_pattern);
// TODO: Make a router class within McPAT that descends from McPATComponent
// children.push_back(router);
area.set_area(area.get_area() + router->area.get_area() *
noc_params.total_nodes);
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
router_exist = true;
}
void OnChipNetwork::init_link_bus() {
if (noc_params.type) {
link_name = "Links";
} else {
link_name = "Bus";
}
interface_ip.throughput = noc_params.link_throughput /
noc_params.clockRate;
interface_ip.latency = noc_params.link_latency / noc_params.clockRate;
link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2;
if (noc_params.total_nodes > 1) {
//All links are shared by neighbors
link_len /= 2;
}
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
noc_params.link_base_width,
noc_params.link_base_height,
noc_params.flit_size, link_len, &interface_ip,
noc_params.link_start_wiring_level,
noc_params.clockRate, true/*pipelinable*/,
noc_params.route_over_perc);
children.push_back(link_bus);
link_bus_exist = true;
}
// TODO: This should use the McPATComponent::computeEnergy function to
// recursively calculate energy of routers and links and then add
void OnChipNetwork::computeEnergy() {
double pppm_t[4] = {1, 1, 1, 1};
// Initialize stats for TDP
tdp_stats.reset();
tdp_stats.readAc.access = noc_stats.duty_cycle;
if (router_exist) {
// TODO: Define a regression to exercise routers
// TODO: Clean this up: it is too invasive and breaks abstraction
set_pppm(pppm_t, 1 * tdp_stats.readAc.access, 1, 1, 1);
router->power = router->power * pppm_t;
set_pppm(pppm_t, noc_params.total_nodes,
noc_params.total_nodes,
noc_params.total_nodes,
noc_params.total_nodes);
}
if (link_bus_exist) {
if (noc_params.type) {
link_bus->int_params.active_ports = noc_params.min_ports - 1;
} else {
link_bus->int_params.active_ports = noc_params.min_ports;
}
else
{
init_link_bus(link_len_); //if bus compute bus
link_bus->int_stats.duty_cycle =
noc_params.M_traffic_pattern * noc_stats.duty_cycle;
// TODO: Decide how to roll multiple routers into a single top-level
// NOC module. I would prefer not to, but it might be a nice feature
set_pppm(pppm_t, noc_params.total_nodes,
noc_params.total_nodes,
noc_params.total_nodes,
noc_params.total_nodes);
}
// Initialize stats for runtime energy and power
rtp_stats.reset();
rtp_stats.readAc.access = noc_stats.total_access;
set_pppm(pppm_t, 1, 0 , 0, 0);
if (router_exist) {
// TODO: Move this to a McPATComponent parent class of Router
router->buffer.rt_power.readOp.dynamic =
(router->buffer.power.readOp.dynamic +
router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access;
router->crossbar.rt_power.readOp.dynamic =
router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access;
router->arbiter.rt_power.readOp.dynamic =
router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access;
router->rt_power = router->rt_power +
(router->buffer.rt_power + router->crossbar.rt_power +
router->arbiter.rt_power) * pppm_t +
router->power * pppm_lkg;//TDP power must be calculated first!
}
if (link_bus_exist) {
link_bus->int_stats.accesses = noc_stats.total_access;
}
// Recursively compute energy
McPATComponent::computeEnergy();
}
void OnChipNetwork::set_param_stats() {
// TODO: Remove this or move initialization elsewhere
memset(&noc_params, 0, sizeof(OnChipNetworkParameters));
int num_children = xml_data->nChildNode("param");
int i;
int mat_type;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("type", noc_params.type);
ASSIGN_FP_IF("clockrate", noc_params.clockRate);
ASSIGN_INT_IF("flit_bits", noc_params.flit_size);
ASSIGN_FP_IF("link_len", link_len);
ASSIGN_FP_IF("link_throughput", noc_params.link_throughput);
ASSIGN_FP_IF("link_latency", noc_params.link_latency);
ASSIGN_INT_IF("input_ports", noc_params.input_ports);
ASSIGN_INT_IF("output_ports", noc_params.output_ports);
ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports);
ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes);
ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes);
ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage);
ASSIGN_FP_IF("link_routing_over_percentage",
noc_params.route_over_perc);
ASSIGN_INT_IF("has_global_link", noc_params.has_global_link);
ASSIGN_INT_IF("virtual_channel_per_port",
noc_params.virtual_channel_per_port);
ASSIGN_INT_IF("input_buffer_entries_per_vc",
noc_params.input_buffer_entries_per_vc);
ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern);
ASSIGN_FP_IF("link_base_width", noc_params.link_base_width);
ASSIGN_FP_IF("link_base_height", noc_params.link_base_height);
ASSIGN_INT_IF("link_start_wiring_level",
noc_params.link_start_wiring_level);
ASSIGN_INT_IF("wire_mat_type", mat_type);
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
else {
warnUnrecognizedParam(node_name);
}
}
// //clock power
// clockNetwork.init_wire_external(is_default, &interface_ip);
// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
// clockNetwork.end_wiring_level =5;//toplevel metal
// clockNetwork.start_wiring_level =5;//toplevel metal
// clockNetwork.num_regs = corepipe.tot_stage_vector;
// clockNetwork.optimize_wire();
}
// Change from MHz to Hz
noc_params.clockRate *= 1e6;
void NoC::init_router()
{
router = new Router(nocdynp.flit_size,
nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc,
nocdynp.virtual_channel_per_port, &(g_tp.peri_global),
nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern);
//router->print_router();
area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes);
interface_ip.wire_is_mat_type = mat_type;
interface_ip.wire_os_mat_type = mat_type;
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
router_exist = true;
}
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
void NoC ::init_link_bus(double link_len_)
{
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle);
ASSIGN_FP_IF("total_accesses", noc_stats.total_access);
// if (nocdynp.min_ports==1 )
if (nocdynp.type)
link_name = "Links";
else
link_name = "Bus";
link_len=link_len_;
assert(link_len>0);
interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate;
interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate;
link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2;
if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors
link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size,
link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc);
link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area()
* nocdynp.global_linked_ports);
area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes);
link_bus_exist = true;
}
void NoC::computeEnergy(bool is_tdp)
{
//power_point_product_masks
double pppm_t[4] = {1,1,1,1};
double M=nocdynp.duty_cycle;
if (is_tdp)
{
//init stats for TDP
stats_t.readAc.access = M;
tdp_stats = stats_t;
if (router_exist)
{
set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern
router->power = router->power*pppm_t;
set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes);
power = power + router->power*pppm_t;
}
if (link_bus_exist)
{
if (nocdynp.type)
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports,
nocdynp.global_linked_ports, nocdynp.global_linked_ports);
//reset traffic pattern; local port do not have router links
else
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports,
nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern
link_bus_tot_per_Router.power = link_bus->power*pppm_t;
set_pppm(pppm_t, nocdynp.total_nodes,
nocdynp.total_nodes,
nocdynp.total_nodes,
nocdynp.total_nodes);
power = power + link_bus_tot_per_Router.power*pppm_t;
}
}
else
{
//init stats for runtime power (RTP)
stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses;
rtp_stats = stats_t;
set_pppm(pppm_t, 1, 0 , 0, 0);
if (router_exist)
{
router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ;
router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ;
router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ;
router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t +
router->power*pppm_lkg;//TDP power must be calculated first!
rt_power = rt_power + router->rt_power;
}
if (link_bus_exist)
{
set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access);
link_bus->rt_power = link_bus->power * pppm_t;
rt_power = rt_power + link_bus->rt_power;
}
}
}
void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
double M =M_traffic_pattern*nocdynp.duty_cycle;
/*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc;
* When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to
* be applied together with McPAT's extra traffic pattern.
* */
if (is_tdp)
{
cout << name << endl;
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
cout << indent_str << "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
if (router_exist)
{
cout << indent_str << "Router: " << endl;
cout << indent_str_next << "Area = " << router->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next<< "Peak Dynamic = " << router->power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->power.readOp.longer_channel_leakage:router->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next<< "Runtime Dynamic = " << router->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
if (plevel >2){
cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl;
cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)
*nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< indent_str<< "Crossbar:" << endl;
cout << indent_str<< indent_str_next << "Area = " << router->crossbar.area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->crossbar.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->crossbar.power.readOp.longer_channel_leakage:router->crossbar.power.readOp.leakage) << " W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->crossbar.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< indent_str<< "Arbiter:" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->arbiter.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->arbiter.power.readOp.longer_channel_leakage:router->arbiter.power.readOp.leakage) << " W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->arbiter.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
}
}
if (link_bus_exist)
{
cout << indent_str << (nocdynp.type? "Per Router ":"") << link_name<<": " << endl;
cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next<< "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic*
nocdynp.clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? link_bus_tot_per_Router.power.readOp.longer_channel_leakage:link_bus_tot_per_Router.power.readOp.leakage)
<<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage
<< " W" << endl;
cout << indent_str_next<< "Runtime Dynamic = " << link_bus->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
}
}
else
{
// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
else {
warnUnrecognizedStat(node_name);
}
}
clockRate = noc_params.clockRate;
noc_params.min_ports =
min(noc_params.input_ports, noc_params.output_ports);
if (noc_params.type) {
noc_params.global_linked_ports = (noc_params.input_ports - 1) +
(noc_params.output_ports - 1);
}
noc_params.total_nodes =
noc_params.horizontal_nodes * noc_params.vertical_nodes;
assert(noc_params.chip_coverage <= 1);
assert(noc_params.route_over_perc <= 1);
assert(link_len > 0);
}
void NoC::set_noc_param()
{
nocdynp.type = XML->sys.NoC[ithNoC].type;
nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate;
nocdynp.clockRate *= 1e6;
nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits;
if (nocdynp.type)
{
nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports;
nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1);
/*
* Except local i/o ports, all ports needs links( global_linked_ports);
* However only min_ports can be fully active simultaneously
* since the fewer number of ports (input or output ) is the bottleneck.
*/
}
else
{
nocdynp.input_ports = 1;
nocdynp.output_ports = 1;
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
nocdynp.global_linked_ports = 1;
}
nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port;
nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc;
nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes;
nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes;
nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes;
nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle;
nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link;
nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput;
nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency;
nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage;
nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc;
assert (nocdynp.chip_coverage <=1);
assert (nocdynp.route_over_perc <=1);
if (nocdynp.type)
name = "NOC";
else
name = "BUSES";
OnChipNetwork ::~OnChipNetwork() {
}
NoC ::~NoC(){
if(router) {delete router; router = 0;}
if(link_bus) {delete link_bus; link_bus = 0;}
if (router) {
delete router;
router = 0;
}
if (link_bus) {
delete link_bus;
link_bus = 0;
}
}

View file

@ -2,6 +2,7 @@
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,13 +26,13 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#ifndef NOC_H_
#define NOC_H_
#include "XML_Parse.h"
#include "array.h"
#include "basic_components.h"
#include "interconnect.h"
@ -39,37 +40,62 @@
#include "parameter.h"
#include "router.h"
class NoC :public Component {
public:
class OnChipNetworkParameters {
public:
double clockRate;
int flit_size;
int input_ports;
int output_ports;
int min_ports;
int global_linked_ports;
int virtual_channel_per_port;
int input_buffer_entries_per_vc;
int horizontal_nodes;
int vertical_nodes;
int total_nodes;
double link_throughput;
double link_latency;
double chip_coverage;
double route_over_perc;
bool has_global_link;
bool type;
double M_traffic_pattern;
double link_base_width;
double link_base_height;
int link_start_wiring_level;
};
ParseXML *XML;
int ithNoC;
InputParameter interface_ip;
double link_len;
double executionTime;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
Router * router;
interconnect * link_bus;
NoCParam nocdynp;
uca_org_t local_result;
statsDef tdp_stats;
statsDef rtp_stats;
statsDef stats_t;
powerDef power_t;
Component link_bus_tot_per_Router;
bool link_bus_exist;
bool router_exist;
string name, link_name;
double M_traffic_pattern;
NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0);
void set_noc_param();
void computeEnergy(bool is_tdp=true);
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
void init_link_bus(double link_len_);
void init_router();
void computeEnergy_link_bus(bool is_tdp=true);
void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
~NoC();
class OnChipNetworkStatistics {
public:
double duty_cycle;
double total_access;
};
class OnChipNetwork : public McPATComponent {
public:
Router* router;
Interconnect* link_bus;
Component link_bus_tot_per_Router;
int ithNoC;
InputParameter interface_ip;
double link_len;
double scktRatio, chip_PR_overhead, macro_PR_overhead;
OnChipNetworkParameters noc_params;
OnChipNetworkStatistics noc_stats;
uca_org_t local_result;
statsDef stats_t;
bool link_bus_exist;
bool router_exist;
string link_name;
OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
InputParameter* interface_ip_);
void set_param_stats();
void computeEnergy();
void init_link_bus();
void init_router();
~OnChipNetwork();
};
#endif /* NOC_H_ */

View file

@ -1,839 +0,0 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdio>
#include <cstring>
#include <fstream>
#include <iostream>
#include "XML_Parse.h"
#include "array.h"
#include "basic_circuit.h"
#include "const.h"
#include "parameter.h"
#include "processor.h"
#include "version.h"
Processor::Processor(ParseXML *XML_interface)
:XML(XML_interface),//TODO: using one global copy may have problems.
mc(0),
niu(0),
pcie(0),
flashcontroller(0)
{
/*
* placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
* There is no point to have heterogeneous memory controller on chip,
* thus McPAT only support homogeneous memory controllers.
*/
int i;
double pppm_t[4] = {1,1,1,1};
set_proc_param();
if (procdynp.homoCore)
numCore = procdynp.numCore==0? 0:1;
else
numCore = procdynp.numCore;
if (procdynp.homoL2)
numL2 = procdynp.numL2==0? 0:1;
else
numL2 = procdynp.numL2;
if (XML->sys.Private_L2 && numCore != numL2)
{
cout<<"Number of private L2 does not match number of cores"<<endl;
exit(0);
}
if (procdynp.homoL3)
numL3 = procdynp.numL3==0? 0:1;
else
numL3 = procdynp.numL3;
if (procdynp.homoNOC)
numNOC = procdynp.numNOC==0? 0:1;
else
numNOC = procdynp.numNOC;
// if (!procdynp.homoNOC)
// {
// cout<<"Current McPAT does not support heterogeneous NOC"<<endl;
// exit(0);
// }
if (procdynp.homoL1Dir)
numL1Dir = procdynp.numL1Dir==0? 0:1;
else
numL1Dir = procdynp.numL1Dir;
if (procdynp.homoL2Dir)
numL2Dir = procdynp.numL2Dir==0? 0:1;
else
numL2Dir = procdynp.numL2Dir;
for (i = 0;i < numCore; i++)
{
cores.push_back(new Core(XML,i, &interface_ip));
cores[i]->computeEnergy();
cores[i]->computeEnergy(false);
if (procdynp.homoCore){
core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore);
set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore);
core.power = core.power + cores[i]->power*pppm_t;
set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore);
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
power = power + core.power;
rt_power = rt_power + core.rt_power;
}
else{
core.area.set_area(core.area.get_area() + cores[i]->area.get_area());
area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1);
core.power = core.power + cores[i]->power*pppm_t;
power = power + cores[i]->power*pppm_t;
set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1);
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
rt_power = rt_power + cores[i]->rt_power*pppm_t;
}
}
if (!XML->sys.Private_L2)
{
if (numL2 >0)
for (i = 0;i < numL2; i++)
{
l2array.push_back(new SharedCache(XML,i, &interface_ip));
l2array[i]->computeEnergy();
l2array[i]->computeEnergy(false);
if (procdynp.homoL2){
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2);
set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2);
l2.power = l2.power + l2array[i]->power*pppm_t;
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2);
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
power = power + l2.power;
rt_power = rt_power + l2.rt_power;
}
else{
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area());
area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1);
l2.power = l2.power + l2array[i]->power*pppm_t;
power = power + l2array[i]->power*pppm_t;;
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1);
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
rt_power = rt_power + l2array[i]->rt_power*pppm_t;
}
}
}
if (numL3 >0)
for (i = 0;i < numL3; i++)
{
l3array.push_back(new SharedCache(XML,i, &interface_ip, L3));
l3array[i]->computeEnergy();
l3array[i]->computeEnergy(false);
if (procdynp.homoL3){
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3);
set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3);
l3.power = l3.power + l3array[i]->power*pppm_t;
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3);
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
power = power + l3.power;
rt_power = rt_power + l3.rt_power;
}
else{
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area());
area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1);
l3.power = l3.power + l3array[i]->power*pppm_t;
power = power + l3array[i]->power*pppm_t;
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1);
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
rt_power = rt_power + l3array[i]->rt_power*pppm_t;
}
}
if (numL1Dir >0)
for (i = 0;i < numL1Dir; i++)
{
l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory));
l1dirarray[i]->computeEnergy();
l1dirarray[i]->computeEnergy(false);
if (procdynp.homoL1Dir){
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir);
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm
power = power + l1dir.power;
rt_power = rt_power + l1dir.rt_power;
}
else{
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area());
area.set_area(area.get_area() + l1dirarray[i]->area.get_area());
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1);
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
power = power + l1dirarray[i]->power;
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1);
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
rt_power = rt_power + l1dirarray[i]->rt_power;
}
}
if (numL2Dir >0)
for (i = 0;i < numL2Dir; i++)
{
l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory));
l2dirarray[i]->computeEnergy();
l2dirarray[i]->computeEnergy(false);
if (procdynp.homoL2Dir){
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir);
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm
power = power + l2dir.power;
rt_power = rt_power + l2dir.rt_power;
}
else{
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area());
area.set_area(area.get_area() + l2dirarray[i]->area.get_area());
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1);
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
power = power + l2dirarray[i]->power*pppm_t;
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1);
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t;
}
}
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
{
mc = new MemoryController(XML, &interface_ip, MC);
mc->computeEnergy();
mc->computeEnergy(false);
mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
mcs.power = mc->power*pppm_t;
power = power + mcs.power;
set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
mcs.rt_power = mc->rt_power*pppm_t;
rt_power = rt_power + mcs.rt_power;
}
if (XML->sys.flashc.number_mcs >0 )//flash controller
{
flashcontroller = new FlashController(XML, &interface_ip);
flashcontroller->computeEnergy();
flashcontroller->computeEnergy(false);
double number_fcs = flashcontroller->fcp.num_mcs;
flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs);
area.set_area(area.get_area()+flashcontrollers.area.get_area());
set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs );
flashcontrollers.power = flashcontroller->power*pppm_t;
power = power + flashcontrollers.power;
set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs );
flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t;
rt_power = rt_power + flashcontrollers.rt_power;
}
if (XML->sys.niu.number_units >0)
{
niu = new NIUController(XML, &interface_ip);
niu->computeEnergy();
niu->computeEnergy(false);
nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
nius.power = niu->power*pppm_t;
power = power + nius.power;
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
nius.rt_power = niu->rt_power*pppm_t;
rt_power = rt_power + nius.rt_power;
}
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0)
{
pcie = new PCIeController(XML, &interface_ip);
pcie->computeEnergy();
pcie->computeEnergy(false);
pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
pcies.power = pcie->power*pppm_t;
power = power + pcies.power;
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
pcies.rt_power = pcie->rt_power*pppm_t;
rt_power = rt_power + pcies.rt_power;
}
if (numNOC >0)
{
for (i = 0;i < numNOC; i++)
{
if (XML->sys.NoC[i].type)
{//First add up area of routers if NoC is used
nocs.push_back(new NoC(XML,i, &interface_ip, 1));
if (procdynp.homoNOC)
{
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
area.set_area(area.get_area() + noc.area.get_area());
}
else
{
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
area.set_area(area.get_area() + nocs[i]->area.get_area());
}
}
else
{//Bus based interconnect
nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage)));
if (procdynp.homoNOC){
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
area.set_area(area.get_area() + noc.area.get_area());
}
else
{
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
area.set_area(area.get_area() + nocs[i]->area.get_area());
}
}
}
/*
* Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip
* area must be obtain to decide the link routing
*/
for (i = 0;i < numNOC; i++)
{
if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type)
{
nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links
if (procdynp.homoNOC)
{
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
* nocs[i]->nocdynp.total_nodes
* procdynp.numNOC);
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
* nocs[i]->nocdynp.total_nodes
* procdynp.numNOC);
}
else
{
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
* nocs[i]->nocdynp.total_nodes);
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
* nocs[i]->nocdynp.total_nodes);
}
}
}
//Compute energy of NoC (w or w/o links) or buses
for (i = 0;i < numNOC; i++)
{
nocs[i]->computeEnergy();
nocs[i]->computeEnergy(false);
if (procdynp.homoNOC){
set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
noc.power = noc.power + nocs[i]->power*pppm_t;
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
power = power + noc.power;
rt_power = rt_power + noc.rt_power;
}
else
{
set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1);
noc.power = noc.power + nocs[i]->power*pppm_t;
power = power + nocs[i]->power*pppm_t;
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1);
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
rt_power = rt_power + nocs[i]->rt_power*pppm_t;
}
}
}
// //clock power
// globalClock.init_wire_external(is_default, &interface_ip);
// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2
// globalClock.end_wiring_level =5;//toplevel metal
// globalClock.start_wiring_level =5;//toplevel metal
// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes
// globalClock.optimize_wire();
}
void Processor::displayDeviceType(int device_type_, uint32_t indent)
{
string indent_str(indent, ' ');
switch ( device_type_ ) {
case 0 :
cout <<indent_str<<"Device Type= "<<"ITRS high performance device type"<<endl;
break;
case 1 :
cout <<indent_str<<"Device Type= "<<"ITRS low standby power device type"<<endl;
break;
case 2 :
cout <<indent_str<<"Device Type= "<<"ITRS low operating power device type"<<endl;
break;
case 3 :
cout <<indent_str<<"Device Type= "<<"LP-DRAM device type"<<endl;
break;
case 4 :
cout <<indent_str<<"Device Type= "<<"COMM-DRAM device type"<<endl;
break;
default :
{
cout <<indent_str<<"Unknown Device Type"<<endl;
exit(0);
}
}
}
void Processor::displayInterconnectType(int interconnect_type_, uint32_t indent)
{
string indent_str(indent, ' ');
switch ( interconnect_type_ ) {
case 0 :
cout <<indent_str<<"Interconnect metal projection= "<<"aggressive interconnect technology projection"<<endl;
break;
case 1 :
cout <<indent_str<<"Interconnect metal projection= "<<"conservative interconnect technology projection"<<endl;
break;
default :
{
cout <<indent_str<<"Unknown Interconnect Projection Type"<<endl;
exit(0);
}
}
}
void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp)
{
int i;
bool long_channel = XML->sys.longer_channel_device;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
if (is_tdp)
{
if (plevel<5)
{
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
<< " of " << VER_UPDATE << ") results (current print level is "<< plevel
<<", please increase print level to see the details in components): "<<endl;
}
else
{
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
<< " of " << VER_UPDATE << ") results (current print level is 5)"<< endl;
}
cout <<"*****************************************************************************************"<<endl;
cout <<indent_str<<"Technology "<<XML->sys.core_tech_node<<" nm"<<endl;
//cout <<indent_str<<"Device Type= "<<XML->sys.device_type<<endl;
if (long_channel)
cout <<indent_str<<"Using Long Channel Devices When Appropriate"<<endl;
//cout <<indent_str<<"Interconnect metal projection= "<<XML->sys.interconnect_projection_type<<endl;
displayInterconnectType(XML->sys.interconnect_projection_type, indent);
cout <<indent_str<<"Core clock Rate(MHz) "<<XML->sys.core[0].clock_rate<<endl;
cout <<endl;
cout <<"*****************************************************************************************"<<endl;
cout <<"Processor: "<<endl;
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str << "Peak Power = " << power.readOp.dynamic +
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
cout << indent_str << "Total Leakage = " <<
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;
cout << indent_str << "Subthreshold Leakage = " << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
//cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
if (numCore >0){
cout <<indent_str<<"Total Cores: "<<XML->sys.number_of_cores << " cores "<<endl;
displayDeviceType(XML->sys.device_type,indent);
cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (!XML->sys.Private_L2)
{
if (numL2 >0){
cout <<indent_str<<"Total L2s: "<<endl;
displayDeviceType(XML->sys.L2[0].device_type,indent);
cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
}
if (numL3 >0){
cout <<indent_str<<"Total L3s: "<<endl;
displayDeviceType(XML->sys.L3[0].device_type, indent);
cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (numL1Dir >0){
cout <<indent_str<<"Total First Level Directory: "<<endl;
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (numL2Dir >0){
cout <<indent_str<<"Total First Level Directory: "<<endl;
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (numNOC >0){
cout <<indent_str<<"Total NoCs (Network/Bus): "<<endl;
displayDeviceType(XML->sys.device_type, indent);
cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
{
cout <<indent_str<<"Total MCs: "<<XML->sys.mc.number_mcs << " Memory Controllers "<<endl;
displayDeviceType(XML->sys.device_type, indent);
cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (XML->sys.flashc.number_mcs >0)
{
cout <<indent_str<<"Total Flash/SSD Controllers: "<<flashcontroller->fcp.num_mcs << " Flash/SSD Controllers "<<endl;
displayDeviceType(XML->sys.device_type, indent);
cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (XML->sys.niu.number_units >0 )
{
cout <<indent_str<<"Total NIUs: "<<niu->niup.num_units << " Network Interface Units "<<endl;
displayDeviceType(XML->sys.device_type, indent);
cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
{
cout <<indent_str<<"Total PCIes: "<<pcie->pciep.num_units << " PCIe Controllers "<<endl;
displayDeviceType(XML->sys.device_type, indent);
cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl;
cout <<endl;
}
cout <<"*****************************************************************************************"<<endl;
if (plevel >1)
{
for (i = 0;i < numCore; i++)
{
cores[i]->displayEnergy(indent+4,plevel,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
if (!XML->sys.Private_L2)
{
for (i = 0;i < numL2; i++)
{
l2array[i]->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
}
for (i = 0;i < numL3; i++)
{
l3array[i]->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
for (i = 0;i < numL1Dir; i++)
{
l1dirarray[i]->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
for (i = 0;i < numL2Dir; i++)
{
l2dirarray[i]->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
{
mc->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
if (XML->sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0)
{
flashcontroller->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
if (XML->sys.niu.number_units >0 )
{
niu->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
{
pcie->displayEnergy(indent+4,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
for (i = 0;i < numNOC; i++)
{
nocs[i]->displayEnergy(indent+4,plevel,is_tdp);
cout <<"*****************************************************************************************"<<endl;
}
}
}
else
{
}
}
void Processor::set_proc_param()
{
bool debug = false;
procdynp.homoCore = bool(debug?1:XML->sys.homogeneous_cores);
procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s);
procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s);
procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs);
procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories);
procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories);
procdynp.numCore = XML->sys.number_of_cores;
procdynp.numL2 = XML->sys.number_of_L2s;
procdynp.numL3 = XML->sys.number_of_L3s;
procdynp.numNOC = XML->sys.number_of_NoCs;
procdynp.numL1Dir = XML->sys.number_of_L1Directories;
procdynp.numL2Dir = XML->sys.number_of_L2Directories;
procdynp.numMC = XML->sys.mc.number_mcs;
procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc;
// if (procdynp.numCore<1)
// {
// cout<<" The target processor should at least have one core on chip." <<endl;
// exit(0);
// }
// if (numNOCs<0 || numNOCs>2)
// {
// cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<<endl;
// exit(0);
// }
/* Basic parameters*/
interface_ip.data_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type;
interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components.
interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components.
interface_ip.leakage_power_wt = 0;
interface_ip.cycle_time_wt = 0;
interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied.
interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
interface_ip.leakage_power_dev = 10000;
interface_ip.cycle_time_dev = 10000;
interface_ip.ed = 2;
interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately
interface_ip.int_prefetch_w = 1;
interface_ip.page_sz_bits = 0;
interface_ip.temp = debug?360: XML->sys.temperature;
interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node;
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
//***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors.
//They will be overridden during each components initialization
interface_ip.cache_sz =64;
interface_ip.line_sz = 1;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = 64;
interface_ip.access_mode = 2;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.is_main_mem = false;
interface_ip.rpters_in_htree = true ;
interface_ip.ver_htree_wires_over_array = 0;
interface_ip.broadcast_addr_din_over_ver_htrees = 0;
interface_ip.num_rw_ports = 1;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 1;
interface_ip.nuca = 0;
interface_ip.nuca_bank_count = 0;
interface_ip.is_cache =true;
interface_ip.pure_ram =false;
interface_ip.pure_cam =false;
interface_ip.force_cache_config =false;
if (XML->sys.Embedded)
{
interface_ip.wt =Global_30;
interface_ip.wire_is_mat_type = 0;
interface_ip.wire_os_mat_type = 0;
}
else
{
interface_ip.wt =Global;
interface_ip.wire_is_mat_type = 2;
interface_ip.wire_os_mat_type = 2;
}
interface_ip.force_wiretype = false;
interface_ip.print_detail = 1;
interface_ip.add_ecc_b_ =true;
}
Processor::~Processor(){
while (!cores.empty())
{
delete cores.back();
cores.pop_back();
}
while (!l2array.empty())
{
delete l2array.back();
l2array.pop_back();
}
while (!l3array.empty())
{
delete l3array.back();
l3array.pop_back();
}
while (!nocs.empty())
{
delete nocs.back();
nocs.pop_back();
}
if (!mc)
{
delete mc;
}
if (!niu)
{
delete niu;
}
if (!pcie)
{
delete pcie;
}
if (!flashcontroller)
{
delete flashcontroller;
}
};

File diff suppressed because it is too large Load diff

350
ext/mcpat/system.cc Normal file
View file

@ -0,0 +1,350 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#include <algorithm>
#include <cmath>
#include <cstdio>
#include <fstream>
#include <iostream>
#include <string>
#include "array.h"
#include "basic_circuit.h"
#include "common.h"
#include "const.h"
#include "parameter.h"
#include "system.h"
#include "version.h"
// TODO: Fix this constructor to default initialize all pointers to NULL
System::System(XMLNode* _xml_data)
: McPATComponent(_xml_data) {
int i;
int currCore = 0;
int currNOC = 0;
name = "System";
set_proc_param();
// TODO: This loop can (and should) be called by every component in
// the hierarchy. Consider moving it to McPATComponent
int numChildren = xml_data->nChildNode("component");
for (i = 0; i < numChildren; i++ ) {
// For each child node of the system,
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = childXML->getAttribute("type");
if (!type) {
warnMissingComponentType(childXML->getAttribute("id"));
} STRCMP(type, "Core") {
// TODO: If homogeneous cores, and currCore > 0, just copy core 0
children.push_back(new Core(childXML, currCore, &interface_ip));
currCore++;
} STRCMP(type, "CacheUnit") {
children.push_back(new CacheUnit(childXML, &interface_ip));
} STRCMP(type, "CacheController") {
// TODO: Remove reliance on interface_ip - there should be a better
// way to share global variables than passing, copying
children.push_back(new CacheController(childXML, &interface_ip));
} STRCMP(type, "MemoryController") {
children.push_back(new MemoryController(childXML, &interface_ip));
} STRCMP(type, "FlashController") {
children.push_back(new FlashController(childXML, &interface_ip));
} STRCMP(type, "NIUController") {
children.push_back(new NIUController(childXML, &interface_ip));
} STRCMP(type, "PCIeController") {
children.push_back(new PCIeController(childXML, &interface_ip));
} STRCMP(type, "Memory") {
// TODO:
warnIncompleteComponentType(type);
} STRCMP(type, "OnChipNetwork") {
// TODO: Many of the parameters to this constructor should be
// handled in another way
children.push_back(new OnChipNetwork(childXML, currNOC,
&interface_ip));
currNOC++;
warnIncompleteComponentType(type);
} STRCMP(type, "BusInterconnect") {
// TODO: Many of the parameters to this constructor should be
// handled in another way
children.push_back(new BusInterconnect(childXML, &interface_ip));
warnIncompleteComponentType(type);
// TODO: Add a directory data type that can handle the directories
// as defined by certain McScript output
} else {
warnUnrecognizedComponent(type);
}
}
}
void System::displayDeviceType(int device_type_, uint32_t indent) {
string indent_str(indent, ' ');
cout << indent_str << "Device Type = ";
switch ( device_type_ ) {
case 0:
cout << "ITRS high performance device type" << endl;
break;
case 1:
cout << "ITRS low standby power device type" << endl;
break;
case 2:
cout << "ITRS low operating power device type" << endl;
break;
case 3:
cout << "LP-DRAM device type" << endl;
break;
case 4:
cout << "COMM-DRAM device type" << endl;
break;
default:
cout << indent_str << "Unknown!" << endl;
exit(0);
}
}
void System::displayInterconnectType(int interconnect_type_, uint32_t indent) {
string indent_str(indent, ' ');
cout << indent_str << "Interconnect metal projection = ";
switch ( interconnect_type_ ) {
case 0:
cout << "aggressive interconnect technology projection" << endl;
break;
case 1:
cout << "conservative interconnect technology projection" << endl;
break;
default:
cout << indent_str << "Unknown!" << endl;
exit(0);
}
}
// TODO: Migrate this down to the McPATComponent::displayData function
void System::displayData(uint32_t indent, int plevel) {
string indent_str(indent, ' ');
string indent_str_next(indent + 2, ' ');
if (plevel < 5) {
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
<< " of " << VER_UPDATE << ") results (current print level is "
<< plevel
<< ", please increase print level to see the details in "
<< "components) " << endl;
} else {
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
<< " of " << VER_UPDATE << ") results (current print level is 5)"
<< endl;
}
cout << "*****************************************************************"
<< "************************" << endl;
cout << indent_str << "Technology " << core_tech_node << " nm" << endl;
if (longer_channel_device)
cout << indent_str << "Using Long Channel Devices When Appropriate" << endl;
displayInterconnectType(interconnect_projection_type, indent);
cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl;
cout << endl;
cout << "*****************************************************************"
<< "************************" << endl;
McPATComponent::displayData(indent, plevel);
}
void System::set_proc_param() {
// TODO: Consider creating a SystemParams class that tracks system-wide
// parameters like these
longer_channel_device = false;
core_tech_node = -1;
temperature = -1;
interconnect_projection_type = -1;
device_type = -1;
physical_address_width = -1;
int num_children = xml_data->nChildNode("param");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_FP_IF("core_tech_node", core_tech_node);
ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate);
ASSIGN_INT_IF("temperature", temperature);
ASSIGN_INT_IF("device_type", device_type);
ASSIGN_INT_IF("longer_channel_device", longer_channel_device);
ASSIGN_INT_IF("interconnect_projection_type",
interconnect_projection_type);
ASSIGN_INT_IF("machine_bits", data_path_width);
ASSIGN_INT_IF("virtual_address_width", virtual_address_width);
ASSIGN_INT_IF("physical_address_width", physical_address_width);
ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size);
ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type);
ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type);
ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt);
ASSIGN_INT_IF("area_wt", interface_ip.area_wt);
ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt);
ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt);
ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt);
ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev);
ASSIGN_INT_IF("area_dev", interface_ip.area_dev);
ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev);
ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev);
ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev);
ASSIGN_INT_IF("ed", interface_ip.ed);
ASSIGN_INT_IF("burst_len", interface_ip.burst_len);
ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w);
ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits);
ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool);
ASSIGN_INT_IF("ver_htree_wires_over_array",
interface_ip.ver_htree_wires_over_array);
ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees",
interface_ip.broadcast_addr_din_over_ver_htrees);
ASSIGN_INT_IF("nuca", interface_ip.nuca);
ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count);
ASSIGN_ENUM_IF("force_cache_config",
interface_ip.force_cache_config, bool);
ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type);
ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype);
ASSIGN_INT_IF("print_detail", interface_ip.print_detail);
ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
target_core_clockrate *= 1e6;
interconnect_projection_type =
(interconnect_projection_type == 0) ? 0 : 1;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("total_cycles", total_cycles);
else {
warnUnrecognizedStat(node_name);
}
}
if (temperature < 0) {
errorUnspecifiedParam("temperature");
}
if (core_tech_node < 0) {
errorUnspecifiedParam("core_tech_node");
}
if (interconnect_projection_type < 0) {
errorUnspecifiedParam("interconnect_projection_type");
}
if (device_type < 0) {
errorUnspecifiedParam("device_type");
}
if (physical_address_width <= 0) {
errorNonPositiveParam("physical_address_width");
}
if (data_path_width <= 0) {
errorNonPositiveParam("machine_bits");
}
if (total_cycles <= 0) {
fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ",
"power numbers will be funky...\n");
}
clockRate = target_core_clockrate;
execution_time = total_cycles / (target_core_clockrate);
/* Basic parameters*/
interface_ip.data_arr_ram_cell_tech_type = device_type;
interface_ip.data_arr_peri_global_tech_type = device_type;
interface_ip.tag_arr_ram_cell_tech_type = device_type;
interface_ip.tag_arr_peri_global_tech_type = device_type;
interface_ip.ic_proj_type = interconnect_projection_type;
interface_ip.temp = temperature;
interface_ip.F_sz_nm = core_tech_node;
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
interface_ip.is_main_mem = false;
// These are there just to make CACTI's error_checking() happy.
// They are either not actually used or overwritten by each component.
interface_ip.cache_sz = MIN_BUFFER_SIZE;
interface_ip.nbanks = 1;
interface_ip.out_w = 0;
interface_ip.line_sz = 1;
interface_ip.assoc = 1;
interface_ip.num_rw_ports = 1;
interface_ip.num_search_ports = 1;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
interface_ip.pure_cam = false;
//This section of code does not have real meaning; it is just to ensure
//all data will have initial value to prevent errors.
//They will be overridden during each components initialization
interface_ip.specific_tag = 1;
interface_ip.tag_w = 64;
interface_ip.access_mode = 2;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
}
System::~System() {
// TODO: Delete children... do this in McPATComponent
};

View file

@ -1,7 +1,7 @@
/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
@ -25,19 +25,23 @@
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Joel Hestness
* Yasuko Eckert
*
***************************************************************************/
#ifndef PROCESSOR_H_
#define PROCESSOR_H_
#include <vector>
#ifndef SYSTEM_H_
#define SYSTEM_H_
#include "XML_Parse.h"
#include "arbiter.h"
#include "area.h"
#include "array.h"
#include "basic_components.h"
#include "bus_interconnect.h"
#include "cachecontroller.h"
#include "cacheunit.h"
#include "core.h"
#include "decoder.h"
#include "iocontrollers.h"
@ -45,35 +49,23 @@
#include "noc.h"
#include "parameter.h"
#include "router.h"
#include "sharedcache.h"
class Processor : public Component
{
public:
ParseXML *XML;
vector<Core *> cores;
vector<SharedCache *> l2array;
vector<SharedCache *> l3array;
vector<SharedCache *> l1dirarray;
vector<SharedCache *> l2dirarray;
vector<NoC *> nocs;
MemoryController * mc;
NIUController * niu;
PCIeController * pcie;
FlashController * flashcontroller;
class System : public McPATComponent {
public:
InputParameter interface_ip;
ProcParam procdynp;
//wire globalInterconnect;
//clock_network globalClock;
Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers;
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir;
Processor(ParseXML *XML_interface);
void compute();
int device_type;
double core_tech_node;
int interconnect_projection_type;
int temperature;
System(XMLNode* _xml_data);
void set_proc_param();
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
// TODO: make this recursively compute energy on subcomponents
void displayData(uint32_t indent = 0, int plevel = 100);
void displayDeviceType(int device_type_, uint32_t indent = 0);
void displayInterconnectType(int interconnect_type_, uint32_t indent = 0);
~Processor();
~System();
};
#endif /* PROCESSOR_H_ */
#endif /* SYSTEM_H_ */

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -42,6 +42,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Copyright (c) 2002, Business-Insight
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* <a href="http://www.Business-Insight.com">Business-Insight</a>
* All rights reserved.
*
@ -160,33 +161,32 @@
#define XMLDLLENTRY
#ifndef XML_NO_WIDE_CHAR
#include <wchar.h> // to have 'wcsrtombs' for ANSI version
// to have 'mbsrtowcs' for WIDECHAR version
// to have 'mbsrtowcs' for WIDECHAR version
#endif
#endif
// Some common types for char set portable code
#ifdef _XMLWIDECHAR
#define _CXML(c) L ## c
#define XMLCSTR const wchar_t *
#define XMLSTR wchar_t *
#define XMLCHAR wchar_t
#define _CXML(c) L ## c
#define XMLCSTR const wchar_t *
#define XMLSTR wchar_t *
#define XMLCHAR wchar_t
#else
#define _CXML(c) c
#define XMLCSTR const char *
#define XMLSTR char *
#define XMLCHAR char
#define _CXML(c) c
#define XMLCSTR const char *
#define XMLSTR char *
#define XMLCHAR char
#endif
#ifndef FALSE
#define FALSE 0
#define FALSE 0
#endif /* FALSE */
#ifndef TRUE
#define TRUE 1
#define TRUE 1
#endif /* TRUE */
/// Enumeration for XML parse errors.
typedef enum XMLError
{
typedef enum XMLError {
eXMLErrorNone = 0,
eXMLErrorMissingEndTag,
eXMLErrorNoXMLTagFound,
@ -213,30 +213,32 @@ typedef enum XMLError
/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents
typedef enum XMLElementType
{
eNodeChild=0,
eNodeAttribute=1,
eNodeText=2,
eNodeClear=3,
eNodeNULL=4
typedef enum XMLElementType {
eNodeChild = 0,
eNodeAttribute = 1,
eNodeText = 2,
eNodeClear = 3,
eNodeNULL = 4
} XMLElementType;
/// Structure used to obtain error details if the parse fails.
typedef struct XMLResults
{
typedef struct XMLResults {
enum XMLError error;
int nLine,nColumn;
int nLine;
int nColumn;
} XMLResults;
/// Structure for XML clear (unformatted) node (usually comments)
typedef struct XMLClear {
XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag;
XMLCSTR lpszValue;
XMLCSTR lpszOpenTag;
XMLCSTR lpszCloseTag;
} XMLClear;
/// Structure for XML attribute.
typedef struct XMLAttribute {
XMLCSTR lpszName; XMLCSTR lpszValue;
XMLCSTR lpszName;
XMLCSTR lpszValue;
} XMLAttribute;
/// XMLElementPosition are not interchangeable with simple indexes
@ -256,9 +258,8 @@ struct XMLNodeContents;
* <li> XMLNode::openFileHelper </li>
* <li> XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)</li>
* </ul> */
typedef struct XMLDLLENTRY XMLNode
{
private:
typedef struct XMLDLLENTRY XMLNode {
private:
struct XMLNodeDataTag;
@ -267,7 +268,7 @@ typedef struct XMLDLLENTRY XMLNode
/// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode
XMLNode(struct XMLNodeDataTag *p);
public:
public:
static XMLCSTR getVersion();///< Return the XMLParser library version number
/** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string.
@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode
* @{ */
/// Parse an XML string and return the root of a XMLNode tree representing the string.
static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL,
XMLResults *pResults = NULL);
/**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is
* the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the
* "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error.
@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode
*/
/// Parse an XML file and return the root of a XMLNode tree representing the file.
static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL,
XMLResults *pResults = NULL);
/**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is
* the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the
* "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error.
@ -301,7 +304,7 @@ typedef struct XMLDLLENTRY XMLNode
*/
/// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made.
static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL);
static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL);
/**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file.
* This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each
* application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files
@ -322,7 +325,7 @@ typedef struct XMLDLLENTRY XMLNode
static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error
/// Create an XML string starting from the current XMLNode.
XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const;
XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const;
/**< The returned string should be free'd using the "freeXMLString" function.
*
* If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element
@ -330,8 +333,8 @@ typedef struct XMLDLLENTRY XMLNode
/// Save the content of an xmlNode inside a file
XMLError writeToFile(XMLCSTR filename,
const char *encoding=NULL,
char nFormat=1) const;
const char *encoding = NULL,
char nFormat = 1) const;
/**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns.
* If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8".
* If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS".
@ -349,14 +352,15 @@ typedef struct XMLDLLENTRY XMLNode
XMLNode getChildNode(int i=0) const; ///< return ith child node
XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name.
XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing)
XMLNode* getChildNodePtr(XMLCSTR name, int *j) const;
XMLNode getChildNodeWithAttribute(XMLCSTR tagName,
XMLCSTR attributeName,
XMLCSTR attributeValue=NULL,
int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing)
XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
///< return the first child node with specific path
///< return the first child node with specific path
XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
///< return the first child node with specific path.
///< return the first child node with specific path.
int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name
int nChildNode() const; ///< nbr of child node
@ -418,12 +422,12 @@ typedef struct XMLDLLENTRY XMLNode
*/
XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name
XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
/** @} */
@ -482,12 +486,12 @@ typedef struct XMLDLLENTRY XMLNode
XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name
XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
/** @} */
@ -508,15 +512,14 @@ typedef struct XMLDLLENTRY XMLNode
/** @} */
/// Enumeration for XML character encoding.
typedef enum XMLCharEncoding
{
char_encoding_error=0,
char_encoding_UTF8=1,
char_encoding_legacy=2,
char_encoding_ShiftJIS=3,
char_encoding_GB2312=4,
char_encoding_Big5=5,
char_encoding_GBK=6 // this is actually the same as Big5
typedef enum XMLCharEncoding {
char_encoding_error = 0,
char_encoding_UTF8 = 1,
char_encoding_legacy = 2,
char_encoding_ShiftJIS = 3,
char_encoding_GB2312 = 4,
char_encoding_Big5 = 5,
char_encoding_GBK = 6 // this is actually the same as Big5
} XMLCharEncoding;
/** \addtogroup conversions
@ -589,48 +592,46 @@ typedef struct XMLDLLENTRY XMLNode
* If an inconsistency in the encoding is detected, then the return value is "0". */
/** @} */
private:
// these are functions and structures used internally by the XMLNode class (don't bother about them):
private:
// these are functions and structures used internally by the XMLNode class (don't bother about them):
typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
{
XMLCSTR lpszName; // Element name (=NULL if root)
int nChild, // Number of child nodes
nText, // Number of text fields
nClear, // Number of Clear fields (comments)
nAttribute; // Number of attributes
char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
XMLNode *pChild; // Array of child nodes
XMLCSTR *pText; // Array of text fields
XMLClear *pClear; // Array of clear fields
XMLAttribute *pAttribute; // Array of attributes
int *pOrder; // order of the child_nodes,text_fields,clear_fields
int ref_count; // for garbage collection (smart pointers)
} XMLNodeData;
XMLNodeData *d;
typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
XMLCSTR lpszName; // Element name (=NULL if root)
int nChild, // Number of child nodes
nText, // Number of text fields
nClear, // Number of Clear fields (comments)
nAttribute; // Number of attributes
char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
XMLNode *pChild; // Array of child nodes
XMLCSTR *pText; // Array of text fields
XMLClear *pClear; // Array of clear fields
XMLAttribute *pAttribute; // Array of attributes
int *pOrder; // order of the child_nodes,text_fields,clear_fields
int ref_count; // for garbage collection (smart pointers)
} XMLNodeData;
XMLNodeData *d;
char parseClearTag(void *px, void *pa);
char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
int ParseXMLElement(void *pXML);
void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
int indexText(XMLCSTR lpszValue) const;
int indexClear(XMLCSTR lpszValue) const;
XMLNode addChild_priv(int,XMLSTR,char,int);
XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR);
XMLCSTR addText_priv(int,XMLSTR,int);
XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int);
void emptyTheNode(char force);
static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
static void exactMemory(XMLNodeData *d);
static int detachFromParent(XMLNodeData *d);
char parseClearTag(void *px, void *pa);
char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
int ParseXMLElement(void *pXML);
void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
int indexText(XMLCSTR lpszValue) const;
int indexClear(XMLCSTR lpszValue) const;
XMLNode addChild_priv(int, XMLSTR, char, int);
XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR);
XMLCSTR addText_priv(int, XMLSTR, int);
XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int);
void emptyTheNode(char force);
static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
static void exactMemory(XMLNodeData *d);
static int detachFromParent(XMLNodeData *d);
} XMLNode;
/// This structure is given by the function XMLNode::enumContents.
typedef struct XMLNodeContents
{
typedef struct XMLNodeContents {
/// This dictates what's the content of the XMLNodeContent
enum XMLElementType etype;
/**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */
@ -664,12 +665,12 @@ XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);}
* delete them without any trouble.
*
* @{ */
XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0);
XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0);
XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0);
XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0);
XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML(""));
XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue=0);
XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue=0);
XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue=0);
XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue=.0);
XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue=_CXML(""));
XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, XMLCHAR defautValue=_CXML('\0'));
/** @} */
/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions.
@ -685,10 +686,9 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
* \note If you are creating from scratch an XML file using the provided XMLNode class
* you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the
* processing job for you during rendering).*/
typedef struct XMLDLLENTRY ToXMLStringTool
{
typedef struct XMLDLLENTRY ToXMLStringTool {
public:
ToXMLStringTool(): buf(NULL),buflen(0){}
ToXMLStringTool(): buf(NULL), buflen(0){}
~ToXMLStringTool();
void freeBuffer();///<call this function when you have finished using this object to release memory used by the internal buffer.
@ -718,10 +718,9 @@ private:
* b64-encoded text included inside the XML file, use "decode". Alternatively, these
* functions can also be used to "encrypt/decrypt" some critical data contained inside
* the XML (it's not a strong encryption at all, but sometimes it can be useful). */
typedef struct XMLDLLENTRY XMLParserBase64Tool
{
typedef struct XMLDLLENTRY XMLParserBase64Tool {
public:
XMLParserBase64Tool(): buf(NULL),buflen(0){}
XMLParserBase64Tool(): buf(NULL), buflen(0){}
~XMLParserBase64Tool();
void freeBuffer();///< Call this function when you have finished using this object to release memory used by the internal buffer.