gem5/ext/mcpat/cacti/cacti_interface.h
Anthony Gutierrez e553a7bfa7 ext: add McPAT source
this patch adds the source for mcpat, a power, area, and timing modeling
framework.
2014-04-01 12:44:30 -04:00

633 lines
21 KiB
C++

/*****************************************************************************
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
*
***************************************************************************/
#ifndef __CACTI_INTERFACE_H__
#define __CACTI_INTERFACE_H__
#include <iostream>
#include <list>
#include <map>
#include <string>
#include <vector>
#include "const.h"
using namespace std;
class min_values_t;
class mem_array;
class uca_org_t;
class powerComponents
{
public:
double dynamic;
double leakage;
double gate_leakage;
double short_circuit;
double longer_channel_leakage;
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
powerComponents(const powerComponents & obj) { *this = obj; }
powerComponents & operator=(const powerComponents & rhs)
{
dynamic = rhs.dynamic;
leakage = rhs.leakage;
gate_leakage = rhs.gate_leakage;
short_circuit = rhs.short_circuit;
longer_channel_leakage = rhs.longer_channel_leakage;
return *this;
}
void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
friend powerComponents operator*(const powerComponents & x, double const * const y);
};
class powerDef
{
public:
powerComponents readOp;
powerComponents writeOp;
powerComponents searchOp;//Sheng: for CAM and FA
powerDef() : readOp(), writeOp(), searchOp() { }
void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
friend powerDef operator+(const powerDef & x, const powerDef & y);
friend powerDef operator*(const powerDef & x, double const * const y);
};
enum Wire_type
{
Global /* gloabl wires with repeaters */,
Global_5 /* 5% delay penalty */,
Global_10 /* 10% delay penalty */,
Global_20 /* 20% delay penalty */,
Global_30 /* 30% delay penalty */,
Low_swing /* differential low power wires with high area overhead */,
Semi_global /* mid-level wires with repeaters*/,
Transmission /* tranmission lines with high area overhead */,
Optical /* optical wires */,
Invalid_wtype
};
class InputParameter
{
public:
void parse_cfg(const string & infile);
bool error_checking(); // return false if the input parameters are problematic
void display_ip();
unsigned int cache_sz; // in bytes
unsigned int line_sz;
unsigned int assoc;
unsigned int nbanks;
unsigned int out_w;// == nr_bits_out
bool specific_tag;
unsigned int tag_w;
unsigned int access_mode;
unsigned int obj_func_dyn_energy;
unsigned int obj_func_dyn_power;
unsigned int obj_func_leak_power;
unsigned int obj_func_cycle_t;
double F_sz_nm; // feature size in nm
double F_sz_um; // feature size in um
unsigned int num_rw_ports;
unsigned int num_rd_ports;
unsigned int num_wr_ports;
unsigned int num_se_rd_ports; // number of single ended read ports
unsigned int num_search_ports; // Sheng: number of search ports for CAM
bool is_main_mem;
bool is_cache;
bool pure_ram;
bool pure_cam;
bool rpters_in_htree; // if there are repeaters in htree segment
unsigned int ver_htree_wires_over_array;
unsigned int broadcast_addr_din_over_ver_htrees;
unsigned int temp;
unsigned int ram_cell_tech_type;
unsigned int peri_global_tech_type;
unsigned int data_arr_ram_cell_tech_type;
unsigned int data_arr_peri_global_tech_type;
unsigned int tag_arr_ram_cell_tech_type;
unsigned int tag_arr_peri_global_tech_type;
unsigned int burst_len;
unsigned int int_prefetch_w;
unsigned int page_sz_bits;
unsigned int ic_proj_type; // interconnect_projection_type
unsigned int wire_is_mat_type; // wire_inside_mat_type
unsigned int wire_os_mat_type; // wire_outside_mat_type
enum Wire_type wt;
int force_wiretype;
bool print_input_args;
unsigned int nuca_cache_sz; // TODO
int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
bool force_cache_config;
int cache_level;
int cores;
int nuca_bank_count;
int force_nuca_bank;
int delay_wt, dynamic_power_wt, leakage_power_wt,
cycle_time_wt, area_wt;
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
cycle_time_wt_nuca, area_wt_nuca;
int delay_dev, dynamic_power_dev, leakage_power_dev,
cycle_time_dev, area_dev;
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
cycle_time_dev_nuca, area_dev_nuca;
int ed; //ED or ED2 optimization
int nuca;
bool fast_access;
unsigned int block_sz; // bytes
unsigned int tag_assoc;
unsigned int data_assoc;
bool is_seq_acc;
bool fully_assoc;
unsigned int nsets; // == number_of_sets
int print_detail;
bool add_ecc_b_;
//parameters for design constraint
double throughput;
double latency;
bool pipelinable;
int pipeline_stages;
int per_stage_vector;
bool with_clock_grid;
};
typedef struct{
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
int number_activated_mats_horizontal_direction;
int number_subbanks;
int page_size_in_bits;
double delay_route_to_bank;
double delay_crossbar;
double delay_addr_din_horizontal_htree;
double delay_addr_din_vertical_htree;
double delay_row_predecode_driver_and_block;
double delay_row_decoder;
double delay_bitlines;
double delay_sense_amp;
double delay_subarray_output_driver;
double delay_bit_mux_predecode_driver_and_block;
double delay_bit_mux_decoder;
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
double delay_senseamp_mux_lev_1_decoder;
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
double delay_senseamp_mux_lev_2_decoder;
double delay_input_htree;
double delay_output_htree;
double delay_dout_vertical_htree;
double delay_dout_horizontal_htree;
double delay_comparator;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double delay_request_network;
double delay_inside_mat;
double delay_reply_network;
double trcd;
double cas_latency;
double precharge_delay;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_addr_horizontal_htree;
powerDef power_datain_horizontal_htree;
powerDef power_dataout_horizontal_htree;
powerDef power_addr_vertical_htree;
powerDef power_datain_vertical_htree;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_crossbar;
powerDef total_power;
double area;
double all_banks_height;
double all_banks_width;
double bank_height;
double bank_width;
double subarray_memory_cell_area_height;
double subarray_memory_cell_area_width;
double mat_height;
double mat_width;
double routing_area_height_within_bank;
double routing_area_width_within_bank;
double area_efficiency;
// double perc_power_dyn_routing_to_bank;
// double perc_power_dyn_addr_horizontal_htree;
// double perc_power_dyn_datain_horizontal_htree;
// double perc_power_dyn_dataout_horizontal_htree;
// double perc_power_dyn_addr_vertical_htree;
// double perc_power_dyn_datain_vertical_htree;
// double perc_power_dyn_row_predecoder_drivers;
// double perc_power_dyn_row_predecoder_blocks;
// double perc_power_dyn_row_decoders;
// double perc_power_dyn_bit_mux_predecoder_drivers;
// double perc_power_dyn_bit_mux_predecoder_blocks;
// double perc_power_dyn_bit_mux_decoders;
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
// double perc_power_dyn_senseamp_mux_lev_1_decoders;
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
// double perc_power_dyn_senseamp_mux_lev_2_decoders;
// double perc_power_dyn_bitlines;
// double perc_power_dyn_sense_amps;
// double perc_power_dyn_prechg_eq_drivers;
// double perc_power_dyn_subarray_output_drivers;
// double perc_power_dyn_dataout_vertical_htree;
// double perc_power_dyn_comparators;
// double perc_power_dyn_crossbar;
// double perc_power_dyn_spent_outside_mats;
// double perc_power_leak_routing_to_bank;
// double perc_power_leak_addr_horizontal_htree;
// double perc_power_leak_datain_horizontal_htree;
// double perc_power_leak_dataout_horizontal_htree;
// double perc_power_leak_addr_vertical_htree;
// double perc_power_leak_datain_vertical_htree;
// double perc_power_leak_row_predecoder_drivers;
// double perc_power_leak_row_predecoder_blocks;
// double perc_power_leak_row_decoders;
// double perc_power_leak_bit_mux_predecoder_drivers;
// double perc_power_leak_bit_mux_predecoder_blocks;
// double perc_power_leak_bit_mux_decoders;
// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
// double perc_power_leak_senseamp_mux_lev_1_decoders;
// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
// double perc_power_leak_senseamp_mux_lev_2_decoders;
// double perc_power_leak_bitlines;
// double perc_power_leak_sense_amps;
// double perc_power_leak_prechg_eq_drivers;
// double perc_power_leak_subarray_output_drivers;
// double perc_power_leak_dataout_vertical_htree;
// double perc_power_leak_comparators;
// double perc_power_leak_crossbar;
// double perc_leak_mats;
// double perc_active_mats;
double refresh_power;
double dram_refresh_period;
double dram_array_availability;
double dyn_read_energy_from_closed_page;
double dyn_read_energy_from_open_page;
double leak_power_subbank_closed_page;
double leak_power_subbank_open_page;
double leak_power_request_and_reply_networks;
double activate_energy;
double read_energy;
double write_energy;
double precharge_energy;
} results_mem_array;
class uca_org_t
{
public:
mem_array * tag_array2;
mem_array * data_array2;
double access_time;
double cycle_time;
double area;
double area_efficiency;
powerDef power;
double leak_power_with_sleep_transistors_in_mats;
double cache_ht;
double cache_len;
char file_n[100];
double vdd_periph_global;
bool valid;
results_mem_array tag_array;
results_mem_array data_array;
uca_org_t();
void find_delay();
void find_energy();
void find_area();
void find_cyc();
void adjust_area();//for McPAT only to adjust routing overhead
void cleanup();
~uca_org_t(){};
};
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
uca_org_t cacti_interface(const string & infile_name);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(InputParameter * const local_interface);
//McPAT's plain interface, please keep !!!
uca_org_t init_interface(InputParameter * const local_interface);
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,
int excl_write_ports,
int single_ended_read_ports,
int search_ports,
int banks,
double tech_node,
int output_width,
int specific_tag,
int tag_width,
int access_mode,
int cache,
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_cycle_time,
int obj_func_area,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area,
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in,
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in,
int wire_inside_mat_type_in,
int wire_outside_mat_type_in,
int REPEATERS_IN_HTREE_SEGMENTS_in,
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
int PAGE_SIZE_BITS_in,
int BURST_LENGTH_in,
int INTERNAL_PREFETCH_WIDTH_in,
int force_wiretype,
int wiretype,
int force_config,
int ndwl,
int ndbl,
int nspd,
int ndcm,
int ndsam1,
int ndsam2,
int ecc);
// int cache_size,
// int line_size,
// int associativity,
// int rw_ports,
// int excl_read_ports,
// int excl_write_ports,
// int single_ended_read_ports,
// int banks,
// double tech_node,
// int output_width,
// int specific_tag,
// int tag_width,
// int access_mode,
// int cache,
// int main_mem,
// int obj_func_delay,
// int obj_func_dynamic_power,
// int obj_func_leakage_power,
// int obj_func_area,
// int obj_func_cycle_time,
// int dev_func_delay,
// int dev_func_dynamic_power,
// int dev_func_leakage_power,
// int dev_func_area,
// int dev_func_cycle_time,
// int temp,
// int data_arr_ram_cell_tech_flavor_in,
// int data_arr_peri_global_tech_flavor_in,
// int tag_arr_ram_cell_tech_flavor_in,
// int tag_arr_peri_global_tech_flavor_in,
// int interconnect_projection_type_in,
// int wire_inside_mat_type_in,
// int wire_outside_mat_type_in,
// int REPEATERS_IN_HTREE_SEGMENTS_in,
// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
//// double MAXAREACONSTRAINT_PERC_in,
//// double MAXACCTIMECONSTRAINT_PERC_in,
//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
// int PAGE_SIZE_BITS_in,
// int BURST_LENGTH_in,
// int INTERNAL_PREFETCH_WIDTH_in);
//Naveen's interface
uca_org_t cacti_interface(
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,
int excl_write_ports,
int single_ended_read_ports,
int banks,
double tech_node,
int page_sz,
int burst_length,
int pre_width,
int output_width,
int specific_tag,
int tag_width,
int access_mode, //0 normal, 1 seq, 2 fast
int cache, //scratch ram or cache
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_area,
int obj_func_cycle_time,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area,
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in,
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
int wire_inside_mat_type_in,
int wire_outside_mat_type_in,
int is_nuca, // 0 - UCA, 1 - NUCA
int core_count,
int cache_level, // 0 - L2, 1 - L3
int nuca_bank_count,
int nuca_obj_func_delay,
int nuca_obj_func_dynamic_power,
int nuca_obj_func_leakage_power,
int nuca_obj_func_area,
int nuca_obj_func_cycle_time,
int nuca_dev_func_delay,
int nuca_dev_func_dynamic_power,
int nuca_dev_func_leakage_power,
int nuca_dev_func_area,
int nuca_dev_func_cycle_time,
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
int p_input);
class mem_array
{
public:
int Ndcm;
int Ndwl;
int Ndbl;
double Nspd;
int deg_bl_muxing;
int Ndsam_lev_1;
int Ndsam_lev_2;
double access_time;
double cycle_time;
double multisubbank_interleave_cycle_time;
double area_ram_cells;
double area;
powerDef power;
double delay_senseamp_mux_decoder;
double delay_before_subarray_output_driver;
double delay_from_subarray_output_driver_to_output;
double height;
double width;
double mat_height;
double mat_length;
double subarray_length;
double subarray_height;
double delay_route_to_bank,
delay_input_htree,
delay_row_predecode_driver_and_block,
delay_row_decoder,
delay_bitlines,
delay_sense_amp,
delay_subarray_output_driver,
delay_dout_htree,
delay_comparator,
delay_matchlines;
double all_banks_height,
all_banks_width,
area_efficiency;
powerDef power_routing_to_bank;
powerDef power_addr_input_htree;
powerDef power_data_input_htree;
powerDef power_data_output_htree;
powerDef power_htree_in_search;
powerDef power_htree_out_search;
powerDef power_row_predecoder_drivers;
powerDef power_row_predecoder_blocks;
powerDef power_row_decoders;
powerDef power_bit_mux_predecoder_drivers;
powerDef power_bit_mux_predecoder_blocks;
powerDef power_bit_mux_decoders;
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
powerDef power_senseamp_mux_lev_1_decoders;
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
powerDef power_senseamp_mux_lev_2_decoders;
powerDef power_bitlines;
powerDef power_sense_amps;
powerDef power_prechg_eq_drivers;
powerDef power_output_drivers_at_subarray;
powerDef power_dataout_vertical_htree;
powerDef power_comparators;
powerDef power_cam_bitline_precharge_eq_drv;
powerDef power_searchline;
powerDef power_searchline_precharge;
powerDef power_matchlines;
powerDef power_matchline_precharge;
powerDef power_matchline_to_wordline_drv;
min_values_t *arr_min;
enum Wire_type wt;
// dram stats
double activate_energy, read_energy, write_energy, precharge_energy,
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
leak_power_request_and_reply_networks;
double precharge_delay;
static bool lt(const mem_array * m1, const mem_array * m2);
};
#endif