diff --git a/ext/mcpat/XML_Parse.cc b/ext/mcpat/XML_Parse.cc deleted file mode 100644 index ae3ee6f17..000000000 --- a/ext/mcpat/XML_Parse.cc +++ /dev/null @@ -1,1798 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - - -#include -#include - -#include "XML_Parse.h" -#include "xmlParser.h" - -using namespace std; - -void ParseXML::parse(char* filepath) -{ - unsigned int i,j,k,m,n; - unsigned int NumofCom_4; - unsigned int itmp; - //Initialize all structures - ParseXML::initialize(); - - // this open and parse the XML file: - XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer - - XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer - //get all params in the second layer - itmp=xNode2.nChildNode("param"); - for(i=0; iOrderofComponents_3layer) - { - //___________________________get all system.core0-n________________________________________________ - if (sys.homogeneous_cores==1) OrderofComponents_3layer=0; - else OrderofComponents_3layer=sys.number_of_cores-1; - for (i=0; i<=OrderofComponents_3layer; i++) - { - xNode3=xNode2.getChildNode("component",i); - if (xNode3.isEmpty()==1) { - printf("The value of homogeneous_cores or number_of_cores is not correct!"); - exit(0); - } - else{ - if (strstr(xNode3.getAttribute("name"),"core")!=NULL) - { - { //For cpu0-cpui - //Get all params with system.core? - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL) - { - - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k -#include - -#include - -#include "xmlParser.h" -using namespace std; - -/* -void myfree(char *t); // {free(t);} -ToXMLStringTool tx,tx2; -*/ -//all subnodes at the level of system.core(0-n) -//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - -typedef struct{ - int prediction_width; - char prediction_scheme[20]; - int predictor_size; - int predictor_entries; - int local_predictor_size[20]; - int local_predictor_entries; - int global_predictor_entries; - int global_predictor_bits; - int chooser_predictor_entries; - int chooser_predictor_bits; - double predictor_accesses; -} predictor_systemcore; -typedef struct{ - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - double total_hits; - double total_accesses; - double total_misses; - double conflicts; -} itlb_systemcore; -typedef struct{ - //params - double icache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double read_misses; - double replacements; - double read_hits; - double total_hits; - double total_misses; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double conflicts; -} icache_systemcore; -typedef struct{ - //params - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double write_hits; - double read_hits; - double read_misses; - double write_misses; - double total_hits; - double total_misses; - double conflicts; -} dtlb_systemcore; -typedef struct{ - //params - double dcache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; -} dcache_systemcore; -typedef struct{ - //params - int BTB_config[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; -} BTB_systemcore; -typedef struct{ - //all params at the level of system.core(0-n) - int clock_rate; - bool opt_local; - bool x86; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int opcode_width; - int micro_opcode_width; - int instruction_length; - int machine_type; - int internal_datapath_width; - int number_hardware_threads; - int fetch_width; - int number_instruction_fetch_ports; - int decode_width; - int issue_width; - int peak_issue_width; - int commit_width; - int pipelines_per_core[20]; - int pipeline_depth[20]; - char FPU[20]; - char divider_multiplier[20]; - int ALU_per_core; - double FPU_per_core; - int MUL_per_core; - int instruction_buffer_size; - int decoded_stream_buffer_size; - int instruction_window_scheme; - int instruction_window_size; - int fp_instruction_window_size; - int ROB_size; - int archi_Regs_IRF_size; - int archi_Regs_FRF_size; - int phy_Regs_IRF_size; - int phy_Regs_FRF_size; - int rename_scheme; - int register_windows_size; - char LSU_order[20]; - int store_buffer_size; - int load_buffer_size; - int memory_ports; - char Dcache_dual_pump[20]; - int RAS_size; - int fp_issue_width; - int prediction_width; - int number_of_BTB; - int number_of_BPT; - - //all stats at the level of system.core(0-n) - double total_instructions; - double int_instructions; - double fp_instructions; - double branch_instructions; - double branch_mispredictions; - double committed_instructions; - double committed_int_instructions; - double committed_fp_instructions; - double load_instructions; - double store_instructions; - double total_cycles; - double idle_cycles; - double busy_cycles; - double instruction_buffer_reads; - double instruction_buffer_write; - double ROB_reads; - double ROB_writes; - double rename_accesses; - double fp_rename_accesses; - double rename_reads; - double rename_writes; - double fp_rename_reads; - double fp_rename_writes; - double inst_window_reads; - double inst_window_writes; - double inst_window_wakeup_accesses; - double inst_window_selections; - double fp_inst_window_reads; - double fp_inst_window_writes; - double fp_inst_window_wakeup_accesses; - double fp_inst_window_selections; - double archi_int_regfile_reads; - double archi_float_regfile_reads; - double phy_int_regfile_reads; - double phy_float_regfile_reads; - double phy_int_regfile_writes; - double phy_float_regfile_writes; - double archi_int_regfile_writes; - double archi_float_regfile_writes; - double int_regfile_reads; - double float_regfile_reads; - double int_regfile_writes; - double float_regfile_writes; - double windowed_reg_accesses; - double windowed_reg_transports; - double function_calls; - double context_switches; - double ialu_accesses; - double fpu_accesses; - double mul_accesses; - double cdb_alu_accesses; - double cdb_mul_accesses; - double cdb_fpu_accesses; - double load_buffer_reads; - double load_buffer_writes; - double load_buffer_cams; - double store_buffer_reads; - double store_buffer_writes; - double store_buffer_cams; - double store_buffer_forwards; - double main_memory_access; - double main_memory_read; - double main_memory_write; - double pipeline_duty_cycle; - - double IFU_duty_cycle ; - double BR_duty_cycle ; - double LSU_duty_cycle ; - double MemManU_I_duty_cycle; - double MemManU_D_duty_cycle ; - double ALU_duty_cycle ; - double MUL_duty_cycle ; - double FPU_duty_cycle ; - double ALU_cdb_duty_cycle ; - double MUL_cdb_duty_cycle ; - double FPU_cdb_duty_cycle ; - - //all subnodes at the level of system.core(0-n) - predictor_systemcore predictor; - itlb_systemcore itlb; - icache_systemcore icache; - dtlb_systemcore dtlb; - dcache_systemcore dcache; - BTB_systemcore BTB; - -} system_core; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; -} system_L1Directory; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; -} system_L2Directory; -typedef struct{ - //params - double L2_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; - - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; -} system_L2; -typedef struct{ - //params - double L3_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; - - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; -} system_L3; -typedef struct{ - //params - int number_of_inputs_of_crossbars; - int number_of_outputs_of_crossbars; - int flit_bits; - int input_buffer_entries_per_port; - int ports_of_input_buffer[20]; - //stats - double crossbar_accesses; -} xbar0_systemNoC; -typedef struct{ - //params - int clockrate; - bool type; - bool has_global_link; - char topology[20]; - int horizontal_nodes; - int vertical_nodes; - int link_throughput; - int link_latency; - int input_ports; - int output_ports; - int virtual_channel_per_port; - int flit_bits; - int input_buffer_entries_per_vc; - int ports_of_input_buffer[20]; - int dual_pump; - int number_of_crossbars; - char crossbar_type[20]; - char crosspoint_type[20]; - xbar0_systemNoC xbar0; - int arbiter_type; - double chip_coverage; - //stats - double total_accesses; - double duty_cycle; - double route_over_perc; -} system_NoC; -typedef struct{ - //params - int mem_tech_node; - int device_clock; - int peak_transfer_rate; - int internal_prefetch_of_DRAM_chip; - int capacity_per_channel; - int number_ranks; - int num_banks_of_DRAM_chip; - int Block_width_of_DRAM_chip; - int output_width_of_DRAM_chip; - int page_size_of_DRAM_chip; - int burstlength_of_DRAM_chip; - //stats - double memory_accesses; - double memory_reads; - double memory_writes; -} system_mem; -typedef struct{ - //params - //Common Param for mc and fc - double peak_transfer_rate; - int number_mcs; - bool withPHY; - int type; - - //FCParam - //stats - double duty_cycle; - double total_load_perc; - - //McParam - int mc_clock; - int llc_line_length; - int memory_channels_per_mc; - int number_ranks; - int req_window_size_per_channel; - int IO_buffer_size_per_channel; - int databus_width; - int addressbus_width; - bool LVDS; - - //stats - double memory_accesses; - double memory_reads; - double memory_writes; -} system_mc; - -typedef struct{ - //params - int clockrate; - int number_units; - int type; - //stats - double duty_cycle; - double total_load_perc; -} system_niu; - -typedef struct{ - //params - int clockrate; - int number_units; - int num_channels; - int type; - bool withPHY; - //stats - double duty_cycle; - double total_load_perc; -} system_pcie; - -typedef struct{ - //All number_of_* at the level of 'system' Ying 03/21/2009 - int number_of_cores; - int number_of_L1Directories; - int number_of_L2Directories; - int number_of_L2s; - bool Private_L2; - int number_of_L3s; - int number_of_NoCs; - int number_of_dir_levels; - int domain_size; - int first_level_dir; - // All params at the level of 'system' - int homogeneous_cores; - int homogeneous_L1Directories; - int homogeneous_L2Directories; - double core_tech_node; - int target_core_clockrate; - int target_chip_area; - int temperature; - int number_cache_levels; - int L1_property; - int L2_property; - int homogeneous_L2s; - int L3_property; - int homogeneous_L3s; - int homogeneous_NoCs; - int homogeneous_ccs; - int Max_area_deviation; - int Max_power_deviation; - int device_type; - bool longer_channel_device; - bool Embedded; - bool opt_dynamic_power; - bool opt_lakage_power; - bool opt_clockrate; - bool opt_area; - int interconnect_projection_type; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int virtual_memory_page_size; - double total_cycles; - //system.core(0-n):3rd level - system_core core[64]; - system_L1Directory L1Directory[64]; - system_L2Directory L2Directory[64]; - system_L2 L2[64]; - system_L3 L3[64]; - system_NoC NoC[64]; - system_mem mem; - system_mc mc; - system_mc flashc; - system_niu niu; - system_pcie pcie; -} root_system; - -class ParseXML -{ -public: - void parse(char* filepath); - void initialize(); -public: - root_system sys; -}; - - -#endif /* XML_PARSE_H_ */ - - - - diff --git a/ext/mcpat/array.cc b/ext/mcpat/array.cc index 975f82fad..0e46afe03 100644 --- a/ext/mcpat/array.cc +++ b/ext/mcpat/array.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,232 +26,242 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ -#define GLOBALVAR -#include -#include #include +#include #include "area.h" #include "array.h" +#include "common.h" #include "decoder.h" -#include "globalvar.h" #include "parameter.h" using namespace std; -ArrayST::ArrayST(const InputParameter *configure_interface, - string _name, - enum Device_ty device_ty_, - bool opt_local_, - enum Core_type core_ty_, - bool _is_default) -:l_ip(*configure_interface), - name(_name), - device_ty(device_ty_), - opt_local(opt_local_), - core_ty(core_ty_), - is_default(_is_default) - { - - if (l_ip.cache_sz<64) l_ip.cache_sz=64; - l_ip.error_checking();//not only do the error checking but also fill some missing parameters - optimize_array(); - -} +double ArrayST::area_efficiency_threshold = 20.0; +int ArrayST::ed = 0; +//Fixed number, make sure timing can be satisfied. +int ArrayST::delay_wt = 100; +int ArrayST::cycle_time_wt = 1000; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::area_wt = 10; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::dynamic_power_wt = 10; +int ArrayST::leakage_power_wt = 10; +//Fixed number, make sure timing can be satisfied. +int ArrayST::delay_dev = 1000000; +int ArrayST::cycle_time_dev = 100; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::area_dev = 1000000; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::dynamic_power_dev = 1000000; +int ArrayST::leakage_power_dev = 1000000; +int ArrayST::cycle_time_dev_threshold = 10; -void ArrayST::compute_base_power() - { - //l_ip.out_w =l_ip.line_sz*8; - local_result=cacti_interface(&l_ip); +ArrayST::ArrayST(XMLNode* _xml_data, + const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, double _clockRate, + bool opt_local_, enum Core_type core_ty_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), + is_default(_is_default) { + name = _name; + clockRate = _clockRate; + if (l_ip.cache_sz < MIN_BUFFER_SIZE) + l_ip.cache_sz = MIN_BUFFER_SIZE; + if (!l_ip.error_checking(name)) { + exit(1); } -void ArrayST::optimize_array() -{ - list candidate_solutions(0); - list::iterator candidate_iter, min_dynamic_energy_iter; + output_data.reset(); - uca_org_t * temp_res = 0; - local_result.valid=false; + computeEnergy(); + computeArea(); +} - double throughput=l_ip.throughput, latency=l_ip.latency; - double area_efficiency_threshold = 20.0; - bool throughput_overflow=true, latency_overflow=true; - compute_base_power(); +void ArrayST::compute_base_power() { + local_result = cacti_interface(&l_ip); +} - if ((local_result.cycle_time - throughput) <= 1e-10 ) - throughput_overflow=false; - if ((local_result.access_time - latency)<= 1e-10) - latency_overflow=false; +void ArrayST::computeArea() { + area.set_area(local_result.area); + output_data.area = local_result.area / 1e6; +} - if (opt_for_clk && opt_local) - { - if (throughput_overflow || latency_overflow) - { - l_ip.ed=0; +void ArrayST::computeEnergy() { + list candidate_solutions(0); + list::iterator candidate_iter, min_dynamic_energy_iter; - l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. - l_ip.cycle_time_wt = 1000; + uca_org_t* temp_res = NULL; + local_result.valid = false; - l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components. - l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components. - l_ip.leakage_power_wt = 10; + double throughput = l_ip.throughput; + double latency = l_ip.latency; + bool throughput_overflow = true; + bool latency_overflow = true; + compute_base_power(); - l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied. - l_ip.cycle_time_dev = 100; + if ((local_result.cycle_time - throughput) <= 1e-10 ) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; - l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. - l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. - l_ip.leakage_power_dev = 1000000; + if (opt_for_clk && opt_local) { + if (throughput_overflow || latency_overflow) { + l_ip.ed = ed; - throughput_overflow=true; //Reset overflow flag before start optimization iterations - latency_overflow=true; + l_ip.delay_wt = delay_wt; + l_ip.cycle_time_wt = cycle_time_wt; - temp_res = &local_result; //Clean up the result for optimized for ED^2P - temp_res->cleanup(); - } + l_ip.area_wt = area_wt; + l_ip.dynamic_power_wt = dynamic_power_wt; + l_ip.leakage_power_wt = leakage_power_wt; + l_ip.delay_dev = delay_dev; + l_ip.cycle_time_dev = cycle_time_dev; - while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10 - { - compute_base_power(); + l_ip.area_dev = area_dev; + l_ip.dynamic_power_dev = dynamic_power_dev; + l_ip.leakage_power_dev = leakage_power_dev; - l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration + //Reset overflow flag before start optimization iterations + throughput_overflow = true; + latency_overflow = true; - // from best area to worst area -->worst timing to best timing - if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)|| - (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0)) - { //if no satisfiable solution is found,the most aggressive one is left - candidate_solutions.push_back(local_result); - //output_data_csv(candidate_solutions.back()); - if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10)) - //ensure stop opt not because of cam - { - throughput_overflow=false; - latency_overflow=false; - } - - } - else - { - //TODO: whether checking the partial satisfied results too, or just change the mark??? - if ((local_result.cycle_time - throughput) <= 1e-10) - throughput_overflow=false; - if ((local_result.access_time - latency)<= 1e-10) - latency_overflow=false; - - if (l_ip.cycle_time_dev > 10) - { //if not >10 local_result is the last result, it cannot be cleaned up - temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up - temp_res->cleanup(); - } - } -// l_ip.cycle_time_dev-=10; -// l_ip.delay_dev-=10; - - } - - - if (l_ip.assoc > 0) - { - //For array structures except CAM and FA, Give warning but still provide a result with best timing found - if (throughput_overflow==true) - cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl; - if (latency_overflow==true) - cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl; + //Clean up the result for optimized for ED^2P + temp_res = &local_result; + temp_res->cleanup(); } -// else -// { -// /*According to "Content-Addressable Memory (CAM) Circuits and -// Architectures": A Tutorial and Survey -// by Kostas Pagiamtzis et al. -// CAM structures can be heavily pipelined and use look-ahead techniques, -// therefore timing can be relaxed. But McPAT does not model the advanced -// techniques. If continue optimizing, the area efficiency will be too low -// */ -// //For CAM and FA, stop opt if area efficiency is too low -// if (throughput_overflow==true) -// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name -// <<" array structure because its area efficiency is below "< (candidate_iter)->power.readOp.dynamic) - { - min_dynamic_energy = (candidate_iter)->power.readOp.dynamic; - min_dynamic_energy_iter = candidate_iter; - local_result = *(min_dynamic_energy_iter); - //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match. - - } - else - { - candidate_iter->cleanup() ; - } - - } + while ((throughput_overflow || latency_overflow) && + l_ip.cycle_time_dev > cycle_time_dev_threshold) { + compute_base_power(); + //This is the time_dev to be used for next iteration + l_ip.cycle_time_dev -= cycle_time_dev_threshold; + // from best area to worst area -->worst timing to best timing + if ((((local_result.cycle_time - throughput) <= 1e-10 ) && + (local_result.access_time - latency) <= 1e-10) || + (local_result.data_array2->area_efficiency < + area_efficiency_threshold && l_ip.assoc == 0)) { + //if no satisfiable solution is found,the most aggressive one + //is left + candidate_solutions.push_back(local_result); + if (((local_result.cycle_time - throughput) <= 1e-10) && + ((local_result.access_time - latency) <= 1e-10)) { + //ensure stop opt not because of cam + throughput_overflow = false; + latency_overflow = false; } + + } else { + if ((local_result.cycle_time - throughput) <= 1e-10) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + //if not >10 local_result is the last result, it cannot be + //cleaned up + if (l_ip.cycle_time_dev > cycle_time_dev_threshold) { + //Only solutions not saved in the list need to be + //cleaned up + temp_res = &local_result; + temp_res->cleanup(); + } + } + } + + + if (l_ip.assoc > 0) { + //For array structures except CAM and FA, Give warning but still + //provide a result with best timing found + if (throughput_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy throughput constraint." + << endl; + if (latency_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy latency constraint." + << endl; + } + + double min_dynamic_energy = BIGNUM; + if (candidate_solutions.empty() == false) { + local_result.valid = true; + for (candidate_iter = candidate_solutions.begin(); + candidate_iter != candidate_solutions.end(); + ++candidate_iter) { + if (min_dynamic_energy > + (candidate_iter)->power.readOp.dynamic) { + min_dynamic_energy = + (candidate_iter)->power.readOp.dynamic; + min_dynamic_energy_iter = candidate_iter; + local_result = *(min_dynamic_energy_iter); + } else { + candidate_iter->cleanup() ; + } + + } + + + } candidate_solutions.clear(); - } + } - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - double total_overhead = macro_layout_overhead*chip_PR_overhead; - local_result.area *= total_overhead; + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; + local_result.area *= total_overhead; - //maintain constant power density - double pppm_t[4] = {total_overhead,1,1,total_overhead}; + //maintain constant power density + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; - double sckRation = g_tp.sckt_co_eff; - local_result.power.readOp.dynamic *= sckRation; - local_result.power.writeOp.dynamic *= sckRation; - local_result.power.searchOp.dynamic *= sckRation; - local_result.power.readOp.leakage *= l_ip.nbanks; - local_result.power.readOp.longer_channel_leakage = - local_result.power.readOp.leakage*long_channel_device_reduction; - local_result.power = local_result.power* pppm_t; + double sckRation = g_tp.sckt_co_eff; + local_result.power.readOp.dynamic *= sckRation; + local_result.power.writeOp.dynamic *= sckRation; + local_result.power.searchOp.dynamic *= sckRation; + local_result.power.readOp.leakage *= l_ip.nbanks; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + local_result.power = local_result.power * pppm_t; - local_result.data_array2->power.readOp.dynamic *= sckRation; - local_result.data_array2->power.writeOp.dynamic *= sckRation; - local_result.data_array2->power.searchOp.dynamic *= sckRation; - local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.data_array2->power.readOp.longer_channel_leakage = - local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.data_array2->power = local_result.data_array2->power* pppm_t; + local_result.data_array2->power.readOp.dynamic *= sckRation; + local_result.data_array2->power.writeOp.dynamic *= sckRation; + local_result.data_array2->power.searchOp.dynamic *= sckRation; + local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.data_array2->power = local_result.data_array2->power * pppm_t; - if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) - { - local_result.tag_array2->power.readOp.dynamic *= sckRation; - local_result.tag_array2->power.writeOp.dynamic *= sckRation; - local_result.tag_array2->power.searchOp.dynamic *= sckRation; - local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.tag_array2->power.readOp.longer_channel_leakage = - local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; - } + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { + local_result.tag_array2->power.readOp.dynamic *= sckRation; + local_result.tag_array2->power.writeOp.dynamic *= sckRation; + local_result.tag_array2->power.searchOp.dynamic *= sckRation; + local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.tag_array2->power = + local_result.tag_array2->power * pppm_t; + } + power = local_result.power; + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; } void ArrayST::leakage_feedback(double temperature) @@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature) } } -ArrayST:: ~ArrayST() -{ - local_result.cleanup(); +ArrayST::~ArrayST() { + local_result.cleanup(); } diff --git a/ext/mcpat/array.h b/ext/mcpat/array.h index 8c6124d46..6a4c0b6cb 100644 --- a/ext/mcpat/array.h +++ b/ext/mcpat/array.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -43,59 +44,42 @@ using namespace std; -class ArrayST :public Component{ - public: - ArrayST(){}; - ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true); - - InputParameter l_ip; - string name; - enum Device_ty device_ty; - bool opt_local; - enum Core_type core_ty; - bool is_default; - uca_org_t local_result; - - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - - virtual void optimize_array(); - virtual void compute_base_power(); - virtual ~ArrayST(); - - void leakage_feedback(double temperature); -}; - -class InstCache :public Component{ +class ArrayST : public McPATComponent { public: - ArrayST* caches; - ArrayST* missb; - ArrayST* ifb; - ArrayST* prefetchb; - powerDef power_t;//temp value holder for both (max) power and runtime power - InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;}; - ~InstCache(){ - if (caches) {//caches->local_result.cleanup(); - delete caches; caches=0;} - if (missb) {//missb->local_result.cleanup(); - delete missb; missb=0;} - if (ifb) {//ifb->local_result.cleanup(); - delete ifb; ifb=0;} - if (prefetchb) {//prefetchb->local_result.cleanup(); - delete prefetchb; prefetchb=0;} - }; + static double area_efficiency_threshold; + + // These are used for the CACTI interface. + static int ed; + static int delay_wt; + static int cycle_time_wt; + static int area_wt; + static int dynamic_power_wt; + static int leakage_power_wt; + static int delay_dev; + static int cycle_time_dev; + static int area_dev; + static int dynamic_power_dev; + static int leakage_power_dev; + static int cycle_time_dev_threshold; + + InputParameter l_ip; + enum Device_ty device_ty; + bool opt_local; + enum Core_type core_ty; + bool is_default; + uca_org_t local_result; + statsDef stats_t; + + ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface, + string _name, enum Device_ty device_ty_, double _clockRate = 0.0f, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, bool _is_default = true); + void computeArea(); + void computeEnergy(); + void compute_base_power(); + ~ArrayST(); + + void leakage_feedback(double temperature); }; -class DataCache :public InstCache{ -public: - ArrayST* wbb; - DataCache(){wbb=0;}; - ~DataCache(){ - if (wbb) {//wbb->local_result.cleanup(); - delete wbb; wbb=0;} - }; -}; - -#endif /* TLB_H_ */ +#endif /* ARRAY_H_ */ diff --git a/ext/mcpat/basic_components.cc b/ext/mcpat/basic_components.cc index f288d7479..3835460f3 100644 --- a/ext/mcpat/basic_components.cc +++ b/ext/mcpat/basic_components.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -34,94 +35,327 @@ #include #include "basic_components.h" +#include "cacheunit.h" +#include "common.h" + +// Turn this to true to get debugging messages +bool McPATComponent::debug = false; + +bool McPATComponent::opt_for_clk = true; +int McPATComponent::longer_channel_device = 0; +// Number of cycles per second, 2GHz = 2e9 +double McPATComponent::target_core_clockrate = 2e9; +double McPATComponent::total_cycles = 0.0f; +double McPATComponent::execution_time = 0.0f; +int McPATComponent::physical_address_width = 0; +int McPATComponent::virtual_address_width = 0; +int McPATComponent::virtual_memory_page_size = 0; +int McPATComponent::data_path_width = 0; + +void McPATOutput::reset() { + storage = 0.0; + area = 0.0; + peak_dynamic_power = 0.0; + subthreshold_leakage_power = 0.0; + gate_leakage_power = 0.0; + runtime_dynamic_energy = 0.0; +} + +McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) { + McPATOutput to_return; + to_return.storage = lhs.storage + rhs.storage; + to_return.area = lhs.area + rhs.area; + to_return.peak_dynamic_power = lhs.peak_dynamic_power + + rhs.peak_dynamic_power; + to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power + + rhs.subthreshold_leakage_power; + to_return.gate_leakage_power = lhs.gate_leakage_power + + rhs.gate_leakage_power; + to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy + + rhs.runtime_dynamic_energy; + return to_return; +} + +void McPATOutput::operator+=(const McPATOutput &rhs) { + storage += rhs.storage; + area += rhs.area; + peak_dynamic_power += rhs.peak_dynamic_power; + subthreshold_leakage_power += rhs.subthreshold_leakage_power; + gate_leakage_power += rhs.gate_leakage_power; + runtime_dynamic_energy += rhs.runtime_dynamic_energy; +} + +McPATComponent::McPATComponent() + : xml_data(NULL), name("") { +} + +McPATComponent::McPATComponent(XMLNode* _xml_data) + : xml_data(_xml_data), name("") { +} + +McPATComponent::McPATComponent(XMLNode* _xml_data, + InputParameter* _interface_ip) + : xml_data(_xml_data), interface_ip(*_interface_ip), name("") { +} + +McPATComponent::~McPATComponent() { +} + +void McPATComponent::recursiveInstantiate() { + if (debug) { + fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ", + "'type' %s\n", name.c_str(), xml_data->getAttribute("type")); + } + int i; + int numChildren = xml_data->nChildNode("component"); + for (i = 0; i < numChildren; i++ ) { + // For each child node of the system, + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "Core") + warnIncompleteComponentType(type); + STRCMP(type, "CacheUnit") + children.push_back(new CacheUnit(childXML, &interface_ip)); + STRCMP(type, "CacheController") + warnIncompleteComponentType(type); + STRCMP(type, "MemoryController") + warnIncompleteComponentType(type); + STRCMP(type, "Memory") + warnIncompleteComponentType(type); + STRCMP(type, "OnChipNetwork") + warnIncompleteComponentType(type); + STRCMP(type, "BusInterconnect") + warnIncompleteComponentType(type); + STRCMP(type, "Directory") + warnIncompleteComponentType(type); + + else + warnUnrecognizedComponent(type); + } +} + +void McPATComponent::computeArea() { + if (debug) { + fprintf(stderr, "WARNING: Called computeArea from %s, with 'type' ", + "%s\n", name.c_str(), xml_data->getAttribute("type")); + } + + // TODO: This calculation is incorrect and is overwritten by computeEnergy + // Fix it up so that the values are available at the correct times + int i; + int numChildren = children.size(); + area.set_area(0.0); + output_data.area = 0.0; + for (i = 0; i < numChildren; i++) { + children[i]->computeArea(); + output_data.area += area.get_area(); + } +} + +void McPATComponent::computeEnergy() { + if (debug) { + fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ", + "%s\n", name.c_str(), xml_data->getAttribute("type")); + } + + power.reset(); + rt_power.reset(); + memset(&output_data, 0, sizeof(McPATOutput)); + int i; + int numChildren = children.size(); + for (i = 0; i < numChildren; i++) { + children[i]->computeEnergy(); + output_data += children[i]->output_data; + } +} + +void McPATComponent::displayData(uint32_t indent, int plevel) { + if (debug) { + fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ", + "%s\n", name.c_str(), xml_data->getAttribute("type")); + } + + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + + double leakage_power = output_data.subthreshold_leakage_power + + output_data.gate_leakage_power; + double total_runtime_energy = output_data.runtime_dynamic_energy + + leakage_power * execution_time; + cout << indent_str << name << ":" << endl; + cout << indent_str_next << "Area = " << output_data.area << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic Power = " + << output_data.peak_dynamic_power << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage Power = " + << output_data.subthreshold_leakage_power << " W" << endl; + cout << indent_str_next << "Gate Leakage Power = " + << output_data.gate_leakage_power << " W" << endl; + cout << indent_str_next << "Runtime Dynamic Power = " + << (output_data.runtime_dynamic_energy / execution_time) << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic Energy = " + << output_data.runtime_dynamic_energy << " J" << endl; + cout << indent_str_next << "Total Runtime Energy = " + << total_runtime_energy << " J" << endl; + cout << endl; + + // Recursively print children + int i; + int numChildren = children.size(); + for (i = 0; i < numChildren; i++) { + children[i]->displayData(indent + 4, plevel); + } +} + +void McPATComponent::errorUnspecifiedParam(string param) { + fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n", + name.c_str(), param.c_str()); + exit(1); +} + +void McPATComponent::errorNonPositiveParam(string param) { + fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n", + name.c_str(), param.c_str()); + exit(1); +} + +void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) { + fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n", + name.c_str(), component); +} + +void McPATComponent::warnUnrecognizedParam(XMLCSTR param) { + fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n", + name.c_str(), param); +} + +void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) { + fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n", + name.c_str(), stat); +} + +void McPATComponent::warnIncompleteComponentType(XMLCSTR type) { + fprintf(stderr, " WARNING: %s handling not yet complete\n", type); +} + +void McPATComponent::warnMissingComponentType(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a component due to the missing type: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a component in %s due to the missing type\n", + name.c_str()); + } +} + +void McPATComponent::warnMissingParamName(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a parameter due to the missing name: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a parameter in %s due to the missing name\n", + name.c_str()); + } +} + +void McPATComponent::warnMissingStatName(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a statistic due to the missing name: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a statistic in %s due to the missing name\n", + name.c_str()); + } +} double longer_channel_device_reduction( - enum Device_ty device_ty, - enum Core_type core_ty) -{ + enum Device_ty device_ty, + enum Core_type core_ty) { - double longer_channel_device_percentage_core; - double longer_channel_device_percentage_uncore; - double longer_channel_device_percentage_llc; + double longer_channel_device_percentage_core; + double longer_channel_device_percentage_uncore; + double longer_channel_device_percentage_llc; - double long_channel_device_reduction; + double long_channel_device_reduction; - longer_channel_device_percentage_llc = 1.0; - longer_channel_device_percentage_uncore = 0.82; - if (core_ty==OOO) - { - longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam - //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam + longer_channel_device_percentage_llc = 1.0; + longer_channel_device_percentage_uncore = 0.82; + if (core_ty == OOO) { + //0.54 Xeon Tulsa //0.58 Nehelam + longer_channel_device_percentage_core = 0.56; + } else { + //0.8;//Niagara + longer_channel_device_percentage_core = 0.8; + } - } - else - { - longer_channel_device_percentage_core = 0.8;//0.8;//Niagara - //longer_channel_device_percentage_uncore = 0.9;//Niagara - } + if (device_ty == Core_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_core) + + longer_channel_device_percentage_core * + g_tp.peri_global.long_channel_leakage_reduction; + } else if (device_ty == Uncore_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_uncore) + + longer_channel_device_percentage_uncore * + g_tp.peri_global.long_channel_leakage_reduction; + } else if (device_ty == LLC_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_llc) + + longer_channel_device_percentage_llc * + g_tp.peri_global.long_channel_leakage_reduction; + } else { + cout << "ERROR: Unknown device category: " << device_ty << endl; + exit(0); + } - if (device_ty==Core_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_core) - + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==Uncore_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_uncore) - + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==LLC_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_llc) - + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction; - } - else - { - cout<<"unknown device category"< -#include "XML_Parse.h" +#include "component.h" #include "parameter.h" +#include "xmlParser.h" +/** + * TODO: Since revisions to McPAT aim to make the component hierarchy more + * modular, many of the parameter and statistics classes/structs included in + * this file should be moved to the files for their respective components. + */ const double cdb_overhead = 1.1; enum FU_type { @@ -46,21 +53,28 @@ enum FU_type { }; enum Core_type { - OOO, - Inorder + OOO, + Inorder }; enum Renaming_type { RAMbased, - CAMbased + CAMbased }; enum Scheduler_type { PhysicalRegFile, - ReservationStation + ReservationStation }; -enum cache_level { +enum Cache_type { + DATA_CACHE, + INSTRUCTION_CACHE, + MIXED +}; + +enum CacheLevel { + L1, L2, L3, L1Directory, @@ -68,198 +82,408 @@ enum cache_level { }; enum MemoryCtrl_type { - MC, //memory controller - FLASHC //flash controller + MC, //memory controller + FLASHC //flash controller }; enum Dir_type { - ST,//shadowed tag - DC,//directory cache - SBT,//static bank tag - NonDir + ST,//shadowed tag + DC,//directory cache + SBT,//static bank tag + NonDir }; enum Cache_policy { - Write_through, - Write_back + Write_through, + Write_back }; enum Device_ty { - Core_device, - Uncore_device, - LLC_device + Core_device, + Uncore_device, + LLC_device }; -class statsComponents -{ - public: +enum Access_mode { + Normal, + Sequential, + Fast +}; + +class statsComponents { +public: double access; double hit; double miss; statsComponents() : access(0), hit(0), miss(0) {} - statsComponents(const statsComponents & obj) { *this = obj; } - statsComponents & operator=(const statsComponents & rhs) - { - access = rhs.access; - hit = rhs.hit; - miss = rhs.miss; - return *this; + statsComponents(const statsComponents & obj) { + *this = obj; + } + statsComponents & operator=(const statsComponents & rhs) { + access = rhs.access; + hit = rhs.hit; + miss = rhs.miss; + return *this; + } + void reset() { + access = 0; + hit = 0; + miss = 0; } - void reset() { access = 0; hit = 0; miss = 0;} - friend statsComponents operator+(const statsComponents & x, const statsComponents & y); - friend statsComponents operator*(const statsComponents & x, double const * const y); + friend statsComponents operator+(const statsComponents & x, + const statsComponents & y); + friend statsComponents operator*(const statsComponents & x, + double const * const y); }; -class statsDef -{ - public: +class statsDef { +public: statsComponents readAc; statsComponents writeAc; statsComponents searchAc; + statsComponents dataReadAc; + statsComponents dataWriteAc; + statsComponents tagReadAc; + statsComponents tagWriteAc; - statsDef() : readAc(), writeAc(),searchAc() { } - void reset() { readAc.reset(); writeAc.reset();searchAc.reset();} + statsDef() : readAc(), writeAc(), searchAc() { } + void reset() { + readAc.reset(); + writeAc.reset(); + searchAc.reset(); + } friend statsDef operator+(const statsDef & x, const statsDef & y); friend statsDef operator*(const statsDef & x, double const * const y); }; +/** + * An object to store the computed data that will be output from McPAT on a + * per-component-instance basis. Currently, this includes the amount of storage + * that the component comprises, its chip area, and power and energy + * calculations. + */ +class McPATOutput { +public: + // Storage is in bytes (B) + double storage; + // Area is in mm^2 + double area; + // Peak Dynamic Power is in W + double peak_dynamic_power; + // Subthreshold Leakage Power is in W + double subthreshold_leakage_power; + // Gate Leakage Power is in W + double gate_leakage_power; + // Runtime Dynamic Energy is in J + double runtime_dynamic_energy; + + void reset(); + + friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs); + void operator+=(const McPATOutput &rhs); +}; + +/** + * A McPATComponent encompasses all the parts that are common to any component + * for which McPAT may compute and print power, area, and timing data. It + * includes a pointer to the XML data from which the component gathers its + * input parameters, it stores the variables that are commonly used in all + * components, and it maintains the hierarchical structure to recursively + * compute and print output. This is a base class from which all components + * should inherit these functionality (possibly through other descended + * classes. +*/ +class McPATComponent : public Component { +public: + static bool debug; + + // Variables shared across the system by all McPATComponents + static bool opt_for_clk; + static int longer_channel_device; + static double execution_time; + static int physical_address_width; + static int virtual_address_width; + static int virtual_memory_page_size; + static int data_path_width; + + // Although these two variables are static right now, they need to be + // modulated on a per-frequency-domain basis eventually. + static double target_core_clockrate; + static double total_cycles; + + XMLNode* xml_data; + InputParameter interface_ip; + string name; + // Number of cycles per second (consider changing name) + double clockRate; + vector children; + // The data structure that is printed in displayData + McPATOutput output_data; + // Set this to contain the stats to calculate peak dynamic power + statsDef tdp_stats; + // Set this to contain the stats to calculate runtime dynamic energy/power + statsDef rtp_stats; + // Holds the peak dynamic power calculation + powerDef power_t; + // Holds the runtime dynamic power calculation + powerDef rt_power; + + McPATComponent(); + // Which of these is a better way of doing things?! + McPATComponent(XMLNode* _xml_data); + McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip); + virtual void recursiveInstantiate(); + virtual void computeArea(); + // This function should probably be pure virtual, but it's too early in + // the modifying process to know for sure. Note that each component has + // to calculate it's own power consumption + virtual void computeEnergy(); + virtual void displayData(uint32_t indent, int plevel); + ~McPATComponent(); + + protected: + void errorUnspecifiedParam(string param); + void errorNonPositiveParam(string param); + void warnUnrecognizedComponent(XMLCSTR component); + void warnUnrecognizedParam(XMLCSTR param); + void warnUnrecognizedStat(XMLCSTR stat); + void warnIncompleteComponentType(XMLCSTR type); + void warnMissingComponentType(XMLCSTR id); + void warnMissingParamName(XMLCSTR id); + void warnMissingStatName(XMLCSTR id); +}; + double longer_channel_device_reduction( - enum Device_ty device_ty=Core_device, - enum Core_type core_ty=Inorder); + enum Device_ty device_ty = Core_device, + enum Core_type core_ty = Inorder); -class CoreDynParam { +class CoreParameters { public: - CoreDynParam(){}; - CoreDynParam(ParseXML *XML_interface, int ithCore_); - // :XML(XML_interface), - // ithCore(ithCore_) - // core_ty(inorder), - // rm_ty(CAMbased), - // scheu_ty(PhysicalRegFile), - // clockRate(1e9),//1GHz - // arch_ireg_width(32), - // arch_freg_width(32), - // phy_ireg_width(128), - // phy_freg_width(128), - // perThreadState(8), - // globalCheckpoint(32), - // instructionLength(32){}; - //ParseXML * XML; - bool opt_local; - bool x86; - bool Embedded; - enum Core_type core_ty; - enum Renaming_type rm_ty; + bool opt_local; + bool x86; + bool Embedded; + enum Core_type core_ty; + enum Renaming_type rm_ty; enum Scheduler_type scheu_ty; - double clockRate,executionTime; - int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width; - int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries; - int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW; - int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length; - int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines; - int num_alus, num_muls; + double clockRate; + int arch_ireg_width; + int arch_freg_width; + int archi_Regs_IRF_size; + int archi_Regs_FRF_size; + int phy_ireg_width; + int phy_freg_width; + int num_IRF_entry; + int num_FRF_entry; + int num_ifreelist_entries; + int num_ffreelist_entries; + int fetchW; + int decodeW; + int issueW; + int peak_issueW; + int commitW; + int peak_commitW; + int predictionW; + int fp_issueW; + int fp_decodeW; + int perThreadState; + int globalCheckpoint; + int instruction_length; + int pc_width; + int opcode_width; + int micro_opcode_length; + int num_hthreads; + int pipeline_stages; + int fp_pipeline_stages; + int num_pipelines; + int num_fp_pipelines; + int num_alus; + int num_muls; double num_fpus; - int int_data_width, fp_data_width,v_address_width, p_address_width; - double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles; - bool regWindowing,multithreaded; + int int_data_width; + int fp_data_width; + int v_address_width; + int p_address_width; + bool regWindowing; + bool multithreaded; double pppm_lkg_multhread[4]; - double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle, - MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle, - FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle, - FPU_cdb_duty_cycle; - ~CoreDynParam(){}; + int ROB_size; + int ROB_assoc; + int ROB_nbanks; + int ROB_tag_width; + int scheduler_assoc; + int scheduler_nbanks; + int register_window_size; + double register_window_throughput; + double register_window_latency; + int register_window_assoc; + int register_window_nbanks; + int register_window_tag_width; + int register_window_rw_ports; + int phy_Regs_IRF_size; + int phy_Regs_IRF_assoc; + int phy_Regs_IRF_nbanks; + int phy_Regs_IRF_tag_width; + int phy_Regs_IRF_rd_ports; + int phy_Regs_IRF_wr_ports; + int phy_Regs_FRF_size; + int phy_Regs_FRF_assoc; + int phy_Regs_FRF_nbanks; + int phy_Regs_FRF_tag_width; + int phy_Regs_FRF_rd_ports; + int phy_Regs_FRF_wr_ports; + int front_rat_nbanks; + int front_rat_rw_ports; + int retire_rat_nbanks; + int retire_rat_rw_ports; + int freelist_nbanks; + int freelist_rw_ports; + int memory_ports; + int load_buffer_size; + int load_buffer_assoc; + int load_buffer_nbanks; + int store_buffer_size; + int store_buffer_assoc; + int store_buffer_nbanks; + int instruction_window_size; + int fp_instruction_window_size; + int instruction_buffer_size; + int instruction_buffer_assoc; + int instruction_buffer_nbanks; + int instruction_buffer_tag_width; + int number_instruction_fetch_ports; + int RAS_size; + int execu_int_bypass_ports; + int execu_mul_bypass_ports; + int execu_fp_bypass_ports; + Wire_type execu_bypass_wire_type; + Wire_type execu_broadcast_wt; + int execu_wire_mat_type; + double execu_bypass_base_width; + double execu_bypass_base_height; + int execu_bypass_start_wiring_level; + double execu_bypass_route_over_perc; + double broadcast_numerator; }; -class CacheDynParam { +class CoreStatistics { public: - CacheDynParam(){}; - CacheDynParam(ParseXML *XML_interface, int ithCache_); - string name; - enum Dir_type dir_ty; - double clockRate,executionTime; - double capacity, blockW, assoc, nbanks; - double throughput, latency; - double duty_cycle, dir_duty_cycle; - //double duty_cycle; - int missb_size, fu_size, prefetchb_size, wbb_size; - ~CacheDynParam(){}; + double pipeline_duty_cycle; + double total_cycles; + double busy_cycles; + double idle_cycles; + double IFU_duty_cycle; + double BR_duty_cycle; + double LSU_duty_cycle; + double MemManU_I_duty_cycle; + double MemManU_D_duty_cycle; + double ALU_duty_cycle; + double MUL_duty_cycle; + double FPU_duty_cycle; + double ALU_cdb_duty_cycle; + double MUL_cdb_duty_cycle; + double FPU_cdb_duty_cycle; + double ROB_reads; + double ROB_writes; + double total_instructions; + double int_instructions; + double fp_instructions; + double branch_instructions; + double branch_mispredictions; + double load_instructions; + double store_instructions; + double committed_instructions; + double committed_int_instructions; + double committed_fp_instructions; + double rename_reads; + double rename_writes; + double fp_rename_reads; + double fp_rename_writes; + double inst_window_reads; + double inst_window_writes; + double inst_window_wakeup_accesses; + double fp_inst_window_reads; + double fp_inst_window_writes; + double fp_inst_window_wakeup_accesses; + double int_regfile_reads; + double float_regfile_reads; + double int_regfile_writes; + double float_regfile_writes; + double context_switches; + double ialu_accesses; + double fpu_accesses; + double mul_accesses; + double cdb_alu_accesses; + double cdb_fpu_accesses; + double cdb_mul_accesses; + double function_calls; }; -class MCParam { +class MCParameters { public: - MCParam(){}; - MCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate,num_mcs, peakDataTransferRate, num_channels; - // double mcTEPowerperGhz; - // double mcPHYperGbit; - // double area; - int llcBlockSize, dataBusWidth, addressBusWidth; - int opcodeW; - int memAccesses; - int memRank; - int type; - double frontend_duty_cycle, duty_cycle, perc_load; - double executionTime, reads, writes; - bool LVDS, withPHY; - - ~MCParam(){}; + double clockRate; + enum MemoryCtrl_type mc_type; + double num_mcs; + int num_channels; + int llcBlockSize; + int dataBusWidth; + int databus_width; + int llc_line_length; + int req_window_size_per_channel; + int IO_buffer_size_per_channel; + int addressbus_width; + int opcodeW; + int type; + bool LVDS; + bool withPHY; + int peak_transfer_rate; + int number_ranks; + int reorder_buffer_assoc; + int reorder_buffer_nbanks; + int read_buffer_assoc; + int read_buffer_nbanks; + int read_buffer_tag_width; + int write_buffer_assoc; + int write_buffer_nbanks; + int write_buffer_tag_width; }; -class NoCParam { +class MCStatistics { public: - NoCParam(){}; - NoCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int flit_size; - int input_ports, output_ports, min_ports, global_linked_ports; - int virtual_channel_per_port,input_buffer_entries_per_vc; - int horizontal_nodes,vertical_nodes, total_nodes; - double executionTime, total_access, link_throughput,link_latency, - duty_cycle, chip_coverage, route_over_perc; - bool has_global_link, type; - - ~NoCParam(){}; + double duty_cycle; + double perc_load; + double reads; + double writes; }; -class ProcParam { -public: - ProcParam(){}; - ProcParam(ParseXML *XML_interface, int ithCache_); - string name; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel; - bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir; - - ~ProcParam(){}; +class NIUParameters { + public: + double clockRate; + int num_units; + int type; }; -class NIUParam { -public: - NIUParam(){}; - NIUParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_units; - int type; - double duty_cycle, perc_load; - ~NIUParam(){}; +class NIUStatistics { + public: + double duty_cycle; + double perc_load; }; -class PCIeParam { -public: - PCIeParam(){}; - PCIeParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_channels, num_units; - bool withPHY; - int type; - double duty_cycle, perc_load; - ~PCIeParam(){}; +class PCIeParameters { + public: + double clockRate; + int num_channels; + int num_units; + bool withPHY; + int type; +}; + +class PCIeStatistics { + public: + double duty_cycle; + double perc_load; }; #endif /* BASIC_COMPONENTS_H_ */ diff --git a/ext/mcpat/bus_interconnect.cc b/ext/mcpat/bus_interconnect.cc new file mode 100644 index 000000000..1dee2c338 --- /dev/null +++ b/ext/mcpat/bus_interconnect.cc @@ -0,0 +1,179 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#include +#include +#include +#include +#include + +#include "basic_circuit.h" +#include "bus_interconnect.h" +#include "common.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +BusInterconnect::BusInterconnect(XMLNode* _xml_data, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) { + name = "Bus Interconnect"; + set_param_stats(); + local_result = init_interface(&interface_ip, name); + scktRatio = g_tp.sckt_co_eff; + + interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate; + interface_ip.latency = bus_params.link_latency / bus_params.clockRate; + + link_len /= bus_params.total_nodes; + if (bus_params.total_nodes > 1) { + //All links are shared by neighbors + link_len /= 2; + } + + link_bus = new Interconnect(xml_data, "Link", Uncore_device, + bus_params.link_base_width, + bus_params.link_base_height, + bus_params.flit_size, link_len, &interface_ip, + bus_params.link_start_wiring_level, + bus_params.clockRate, + bus_params.pipelinable, + bus_params.route_over_perc); + children.push_back(link_bus); +} + +void BusInterconnect::computeEnergy() { + // Initialize stats for TDP + tdp_stats.reset(); + tdp_stats.readAc.access = bus_stats.duty_cycle; + link_bus->int_params.active_ports = bus_params.min_ports - 1; + link_bus->int_stats.duty_cycle = + bus_params.M_traffic_pattern * bus_stats.duty_cycle; + + // Initialize stats for runtime energy and power + rtp_stats.reset(); + rtp_stats.readAc.access = bus_stats.total_access; + link_bus->int_stats.accesses = bus_stats.total_access; + + // Recursively compute energy + McPATComponent::computeEnergy(); +} + +void BusInterconnect::set_param_stats() { + memset(&bus_params, 0, sizeof(BusInterconnectParameters)); + + int num_children = xml_data->nChildNode("param"); + int i; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("clockrate", bus_params.clockRate); + ASSIGN_INT_IF("flit_bits", bus_params.flit_size); + ASSIGN_FP_IF("link_throughput", bus_params.link_throughput); + ASSIGN_FP_IF("link_latency", bus_params.link_latency); + ASSIGN_INT_IF("total_nodes", bus_params.total_nodes); + ASSIGN_INT_IF("input_ports", bus_params.input_ports); + ASSIGN_INT_IF("output_ports", bus_params.output_ports); + ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports); + ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage); + ASSIGN_INT_IF("pipelinable", bus_params.pipelinable); + ASSIGN_FP_IF("link_routing_over_percentage", + bus_params.route_over_perc); + ASSIGN_INT_IF("virtual_channel_per_port", + bus_params.virtual_channel_per_port); + ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern); + ASSIGN_FP_IF("link_len", link_len); + ASSIGN_FP_IF("link_base_width", bus_params.link_base_width); + ASSIGN_FP_IF("link_base_height", bus_params.link_base_height); + ASSIGN_FP_IF("link_start_wiring_level", + bus_params.link_start_wiring_level); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + bus_params.clockRate *= 1e6; + + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle); + ASSIGN_FP_IF("total_accesses", bus_stats.total_access); + + else { + warnUnrecognizedStat(node_name); + } + } + + clockRate = bus_params.clockRate; + bus_params.min_ports = + min(bus_params.input_ports, bus_params.output_ports); + + assert(bus_params.chip_coverage <= 1); + assert(bus_params.route_over_perc <= 1); + assert(link_len > 0); +} + +void +BusInterconnect::set_duty_cycle(double duty_cycle) { + bus_stats.duty_cycle = duty_cycle; +} + +void +BusInterconnect::set_number_of_accesses(double total_accesses) { + bus_stats.total_access = total_accesses; +} + +BusInterconnect::~BusInterconnect() { + delete link_bus; + link_bus = NULL; +} diff --git a/ext/mcpat/sharedcache.h b/ext/mcpat/bus_interconnect.h similarity index 52% rename from ext/mcpat/sharedcache.h rename to ext/mcpat/bus_interconnect.h index 923408482..5c8b00420 100644 --- a/ext/mcpat/sharedcache.h +++ b/ext/mcpat/bus_interconnect.h @@ -1,7 +1,7 @@ /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,65 +25,71 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness * ***************************************************************************/ -#ifndef SHAREDCACHE_H_ -#define SHAREDCACHE_H_ -#include +#ifndef BUS_INTERCONNECT_H_ +#define BUS_INTERCONNECT_H_ -#include "XML_Parse.h" -#include "area.h" #include "array.h" #include "basic_components.h" +#include "interconnect.h" #include "logic.h" #include "parameter.h" -class SharedCache :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - enum cache_level cacheL; - DataCache unicache;//Shared cache - CacheDynParam cachep; - statsDef homenode_tdp_stats; - statsDef homenode_rtp_stats; - statsDef homenode_stats_t; - double dir_overhead; - // cache_processor llCache,directory, directory1, inv_dir; - - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, executionTime; - // Component L2Tot, cc, cc1, ccTot; - - SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2); - void set_cache_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~SharedCache(){}; +class BusInterconnectParameters { +public: + double clockRate; + int flit_size; + int input_ports; + int output_ports; + int min_ports; + int global_linked_ports; + int virtual_channel_per_port; + int input_buffer_entries_per_vc; + int total_nodes; + double link_throughput; + double link_latency; + double chip_coverage; + bool pipelinable; + double route_over_perc; + bool has_global_link; + bool type; + double M_traffic_pattern; + double link_base_width; + double link_base_height; + int link_start_wiring_level; }; -class CCdir :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - DataCache dc;//Shared cache - ArrayST * shadow_dir; -// cache_processor llCache,directory, directory1, inv_dir; - - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, clockRate, executionTime; - Component L2Tot, cc, cc1, ccTot; - - CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~CCdir(); +class BusInterconnectStatistics { +public: + double duty_cycle; + double total_access; }; -#endif /* SHAREDCACHE_H_ */ +class BusInterconnect : public McPATComponent { +public: + Interconnect* link_bus; + + int ithNoC; + InputParameter interface_ip; + double link_len; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + BusInterconnectParameters bus_params; + BusInterconnectStatistics bus_stats; + uca_org_t local_result; + statsDef stats_t; + double M_traffic_pattern; + + BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_); + void set_param_stats(); + void set_duty_cycle(double duty_cycle); + void set_number_of_accesses(double total_accesses); + void computeEnergy(); + ~BusInterconnect(); +}; + +#endif /* BUS_INTERCONNECT_H_ */ diff --git a/ext/mcpat/cachearray.cc b/ext/mcpat/cachearray.cc new file mode 100644 index 000000000..cebea289e --- /dev/null +++ b/ext/mcpat/cachearray.cc @@ -0,0 +1,321 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include + +#include "area.h" +#include "cachearray.h" +#include "common.h" +#include "decoder.h" +#include "parameter.h" + +using namespace std; + +double CacheArray::area_efficiency_threshold = 20.0; +int CacheArray::ed = 0; +//Fixed number, make sure timing can be satisfied. +int CacheArray::delay_wt = 100; +int CacheArray::cycle_time_wt = 1000; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::area_wt = 10; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::dynamic_power_wt = 10; +int CacheArray::leakage_power_wt = 10; +//Fixed number, make sure timing can be satisfied. +int CacheArray::delay_dev = 1000000; +int CacheArray::cycle_time_dev = 100; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::area_dev = 1000000; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::dynamic_power_dev = 1000000; +int CacheArray::leakage_power_dev = 1000000; +int CacheArray::cycle_time_dev_threshold = 10; + +CacheArray::CacheArray(XMLNode* _xml_data, + const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, double _clockRate, + bool opt_local_, enum Core_type core_ty_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), + is_default(_is_default), sbt_dir_overhead(0) { + name = _name; + clockRate = _clockRate; + if (l_ip.cache_sz < MIN_BUFFER_SIZE) { + l_ip.cache_sz = MIN_BUFFER_SIZE; + } + + if (!l_ip.error_checking(name)) { + exit(1); + } + + sbt_tdp_stats.reset(); + sbt_rtp_stats.reset(); + + // Compute initial search point + local_result.valid = false; + compute_base_power(); + + // Set up the cache by searching design space with cacti + list candidate_solutions(0); + list::iterator candidate_iter, min_dynamic_energy_iter; + uca_org_t* temp_res = NULL; + double throughput = l_ip.throughput; + double latency = l_ip.latency; + bool throughput_overflow = true; + bool latency_overflow = true; + + if ((local_result.cycle_time - throughput) <= 1e-10 ) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + if (opt_for_clk && opt_local) { + if (throughput_overflow || latency_overflow) { + l_ip.ed = ed; + + l_ip.delay_wt = delay_wt; + l_ip.cycle_time_wt = cycle_time_wt; + + l_ip.area_wt = area_wt; + l_ip.dynamic_power_wt = dynamic_power_wt; + l_ip.leakage_power_wt = leakage_power_wt; + + l_ip.delay_dev = delay_dev; + l_ip.cycle_time_dev = cycle_time_dev; + + l_ip.area_dev = area_dev; + l_ip.dynamic_power_dev = dynamic_power_dev; + l_ip.leakage_power_dev = leakage_power_dev; + + //Reset overflow flag before start optimization iterations + throughput_overflow = true; + latency_overflow = true; + + //Clean up the result for optimized for ED^2P + temp_res = &local_result; + temp_res->cleanup(); + } + + + while ((throughput_overflow || latency_overflow) && + l_ip.cycle_time_dev > cycle_time_dev_threshold) { + compute_base_power(); + + //This is the time_dev to be used for next iteration + l_ip.cycle_time_dev -= cycle_time_dev_threshold; + + // from best area to worst area -->worst timing to best timing + if ((((local_result.cycle_time - throughput) <= 1e-10 ) && + (local_result.access_time - latency) <= 1e-10) || + (local_result.data_array2->area_efficiency < + area_efficiency_threshold && l_ip.assoc == 0)) { + //if no satisfiable solution is found,the most aggressive one + //is left + candidate_solutions.push_back(local_result); + if (((local_result.cycle_time - throughput) <= 1e-10) && + ((local_result.access_time - latency) <= 1e-10)) { + //ensure stop opt not because of cam + throughput_overflow = false; + latency_overflow = false; + } + + } else { + if ((local_result.cycle_time - throughput) <= 1e-10) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + //if not >10 local_result is the last result, it cannot be + //cleaned up + if (l_ip.cycle_time_dev > cycle_time_dev_threshold) { + //Only solutions not saved in the list need to be + //cleaned up + temp_res = &local_result; + temp_res->cleanup(); + } + } + } + + + if (l_ip.assoc > 0) { + //For array structures except CAM and FA, Give warning but still + //provide a result with best timing found + if (throughput_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy throughput constraint." + << endl; + if (latency_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy latency constraint." + << endl; + } + + double min_dynamic_energy = BIGNUM; + if (candidate_solutions.empty() == false) { + local_result.valid = true; + for (candidate_iter = candidate_solutions.begin(); + candidate_iter != candidate_solutions.end(); + ++candidate_iter) { + if (min_dynamic_energy > + (candidate_iter)->power.readOp.dynamic) { + min_dynamic_energy = + (candidate_iter)->power.readOp.dynamic; + min_dynamic_energy_iter = candidate_iter; + local_result = *(min_dynamic_energy_iter); + + } else { + candidate_iter->cleanup() ; + } + + } + + + } + candidate_solutions.clear(); + } + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; + local_result.area *= total_overhead; + + //maintain constant power density + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; + + double sckRation = g_tp.sckt_co_eff; + local_result.power.readOp.dynamic *= sckRation; + local_result.power.writeOp.dynamic *= sckRation; + local_result.power.searchOp.dynamic *= sckRation; + local_result.power.readOp.leakage *= l_ip.nbanks; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + local_result.power = local_result.power * pppm_t; + + local_result.data_array2->power.readOp.dynamic *= sckRation; + local_result.data_array2->power.writeOp.dynamic *= sckRation; + local_result.data_array2->power.searchOp.dynamic *= sckRation; + local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.data_array2->power = local_result.data_array2->power * pppm_t; + + + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { + local_result.tag_array2->power.readOp.dynamic *= sckRation; + local_result.tag_array2->power.writeOp.dynamic *= sckRation; + local_result.tag_array2->power.searchOp.dynamic *= sckRation; + local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.tag_array2->power = + local_result.tag_array2->power * pppm_t; + } +} + +void CacheArray::compute_base_power() { + local_result = cacti_interface(&l_ip); +} + +void CacheArray::computeArea() { + area.set_area(local_result.area); + output_data.area = local_result.area / 1e6; +} + +void CacheArray::computeEnergy() { + // Set the leakage power numbers + output_data.subthreshold_leakage_power = local_result.power.readOp.leakage; + output_data.gate_leakage_power = local_result.power.readOp.gate_leakage; + + if (l_ip.assoc && l_ip.is_cache) { + // This is a standard cache array with data and tags + // Calculate peak dynamic power + output_data.peak_dynamic_power = + (local_result.tag_array2->power.readOp.dynamic + + local_result.data_array2->power.readOp.dynamic) * + tdp_stats.readAc.hit + + (local_result.tag_array2->power.readOp.dynamic) * + tdp_stats.readAc.miss + + (local_result.tag_array2->power.readOp.dynamic + + local_result.data_array2->power.writeOp.dynamic) * + tdp_stats.writeAc.hit + + (local_result.tag_array2->power.readOp.dynamic) * + tdp_stats.writeAc.miss; + output_data.peak_dynamic_power *= clockRate; + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy = + local_result.data_array2->power.readOp.dynamic * + rtp_stats.dataReadAc.access + + local_result.data_array2->power.writeOp.dynamic * + rtp_stats.dataWriteAc.access + + (local_result.tag_array2->power.readOp.dynamic * + rtp_stats.tagReadAc.access + + local_result.tag_array2->power.writeOp.dynamic * + rtp_stats.tagWriteAc.access) * l_ip.assoc; + } else { + // Calculate peak dynamic power + output_data.peak_dynamic_power = + local_result.power.readOp.dynamic * tdp_stats.readAc.access + + local_result.power.writeOp.dynamic * tdp_stats.writeAc.access + + local_result.power.searchOp.dynamic * tdp_stats.searchAc.access; + output_data.peak_dynamic_power *= clockRate; + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy = + local_result.power.readOp.dynamic * rtp_stats.readAc.access + + local_result.power.writeOp.dynamic * rtp_stats.writeAc.access + + local_result.power.searchOp.dynamic * rtp_stats.searchAc.access; + } + + // An SBT directory has more dynamic power + if (sbt_dir_overhead > 0) { + // Calculate peak dynamic power + output_data.peak_dynamic_power += + (computeSBTDynEnergy(&sbt_tdp_stats) * clockRate); + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy += + computeSBTDynEnergy(&sbt_rtp_stats); + } +} + +CacheArray::~CacheArray() { + local_result.cleanup(); +} diff --git a/ext/mcpat/cachearray.h b/ext/mcpat/cachearray.h new file mode 100644 index 000000000..ba55ffcd1 --- /dev/null +++ b/ext/mcpat/cachearray.h @@ -0,0 +1,117 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#ifndef CACHEARRAY_H_ +#define CACHEARRAY_H_ + +#include +#include + +#include "basic_components.h" +#include "cacti_interface.h" +#include "component.h" +#include "const.h" +#include "parameter.h" + +class CacheArray : public McPATComponent { +public: + static double area_efficiency_threshold; + + // These are used for the CACTI interface. + static int ed; + static int delay_wt; + static int cycle_time_wt; + static int area_wt; + static int dynamic_power_wt; + static int leakage_power_wt; + static int delay_dev; + static int cycle_time_dev; + static int area_dev; + static int dynamic_power_dev; + static int leakage_power_dev; + static int cycle_time_dev_threshold; + + InputParameter l_ip; + enum Device_ty device_ty; + bool opt_local; + enum Core_type core_ty; + bool is_default; + uca_org_t local_result; + + // These are only used for static bank tag (SBT) directory type. + double sbt_dir_overhead; + // Set this to contain SBT peak power stats + statsDef sbt_tdp_stats; + // Set this to contain SBT runtime power stats + statsDef sbt_rtp_stats; + + CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface, + string _name, enum Device_ty device_ty_, double _clockRate = 0.0f, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, bool _is_default = true); + void computeArea(); + void computeEnergy(); + void compute_base_power(); + void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; } + ~CacheArray(); + + private: + double computeSBTDynEnergy(statsDef *sbt_stats_ptr); +}; + +extern inline +double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) { + if (sbt_dir_overhead == 0) { + return 0; + } + + // Write miss on dynamic home node will generate a replacement write on + // whole cache block + double dynamic = + sbt_stats_p->readAc.hit * + (local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead + + local_result.tag_array2->power.readOp.dynamic) + + sbt_stats_p->readAc.miss * + local_result.tag_array2->power.readOp.dynamic + + sbt_stats_p->writeAc.miss * + local_result.tag_array2->power.readOp.dynamic + + sbt_stats_p->writeAc.hit * + (local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead + + local_result.tag_array2->power.readOp.dynamic+ + sbt_stats_p->writeAc.miss * + local_result.power.writeOp.dynamic); + return dynamic; +} + +#endif /* CACHEARRAY_H_ */ diff --git a/ext/mcpat/cachecontroller.cc b/ext/mcpat/cachecontroller.cc new file mode 100644 index 000000000..6b505aac3 --- /dev/null +++ b/ext/mcpat/cachecontroller.cc @@ -0,0 +1,42 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#include "cachecontroller.h" + +CacheController::CacheController(XMLNode* _xml_data, + InputParameter* _interface_ip) + : McPATComponent(_xml_data, _interface_ip) { + name = "Cache Controller"; + clockRate = target_core_clockrate; + McPATComponent::recursiveInstantiate(); +} diff --git a/ext/mcpat/globalvar.h b/ext/mcpat/cachecontroller.h similarity index 82% rename from ext/mcpat/globalvar.h rename to ext/mcpat/cachecontroller.h index 953257653..26eccb6de 100644 --- a/ext/mcpat/globalvar.h +++ b/ext/mcpat/cachecontroller.h @@ -1,7 +1,7 @@ /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,24 +25,21 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness * ***************************************************************************/ +#ifndef CACHECONTROLLER_H_ +#define CACHECONTROLLER_H_ -#ifndef GLOBALVAR_H_ -#define GLOBALVAR_H_ - -#ifdef GLOBALVAR -#define EXTERN -#else -#define EXTERN extern -#endif - -EXTERN bool opt_for_clk; - -#endif /* GLOBALVAR_H_ */ - - +#include "basic_components.h" +class CacheController : public McPATComponent { +public: + CacheController(XMLNode* _xml_data, InputParameter* _interface_ip); + ~CacheController(); +}; +#endif /* CACHECONTROLLER_H_ */ diff --git a/ext/mcpat/cacheunit.cc b/ext/mcpat/cacheunit.cc new file mode 100644 index 000000000..3b9e84749 --- /dev/null +++ b/ext/mcpat/cacheunit.cc @@ -0,0 +1,647 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include +#include +#include + +#include "arbiter.h" +#include "array.h" +#include "basic_circuit.h" +#include "cachearray.h" +#include "cacheunit.h" +#include "common.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +bool CacheUnit::is_cache = true; +bool CacheUnit::pure_cam = false; +bool CacheUnit::opt_local = true; +bool CacheUnit::force_cache_config = false; + +CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip) + : dir_overhead(0), McPATComponent(_xml_data, _interface_ip) { + + int tag; + int data; + + name = "Cache Unit"; + CacheArray* arrayPtr = NULL; + + set_cache_param_from_xml_data(); + + //All lower level cache are physically indexed and tagged. + double size; + double line; + double assoc; + double banks; + size = cache_params.capacity; + line = cache_params.blockW; + assoc = cache_params.assoc; + banks = cache_params.nbanks; + if ((cache_params.dir_ty == ST && + cache_params.cache_level == L1Directory) || + (cache_params.dir_ty == ST && + cache_params.cache_level == L2Directory)) { + tag = physical_address_width + EXTRA_TAG_BITS; + } else { + tag = physical_address_width - int(ceil(log2(size / line / assoc))) - + int(ceil(log2(line))) + EXTRA_TAG_BITS; + + if (cache_params.dir_ty == SBT) { + dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) * + BITS_PER_BYTE / (line * BITS_PER_BYTE); + line *= (1 + dir_overhead); + size *= (1 + dir_overhead); + } + } + + interface_ip.cache_sz = (int)size; + interface_ip.line_sz = (int)line; + interface_ip.assoc = (int)assoc; + interface_ip.nbanks = (int)banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.cache_access_mode; + interface_ip.throughput= cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.num_rw_ports = cache_params.cache_rw_ports; + interface_ip.num_rd_ports = cache_params.cache_rd_ports; + interface_ip.num_wr_ports = cache_params.cache_wr_ports; + interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports; + interface_ip.num_search_ports = cache_params.cache_search_ports; + + arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + // This is for calculating TDP, which depends on the number of + // available ports + int num_tdp_ports = arrayPtr->l_ip.num_rw_ports + + arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports; + + // Set new array stats for calculating TDP and runtime power + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar * + num_tdp_ports * cache_stats.duty_cycle * + cache_stats.homenode_access_scalar; + arrayPtr->tdp_stats.readAc.miss = 0; + arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access - + arrayPtr->tdp_stats.readAc.miss; + arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar * + num_tdp_ports * cache_stats.duty_cycle * + cache_stats.homenode_access_scalar; + arrayPtr->tdp_stats.writeAc.miss = 0; + arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access - + arrayPtr->tdp_stats.writeAc.miss; + arrayPtr->tdp_stats.searchAc.access = 0; + arrayPtr->tdp_stats.searchAc.miss = 0; + arrayPtr->tdp_stats.searchAc.hit = 0; + + arrayPtr->rtp_stats.reset(); + if (cache_stats.use_detailed_stats) { + arrayPtr->rtp_stats.dataReadAc.access = + cache_stats.num_data_array_reads; + arrayPtr->rtp_stats.dataWriteAc.access = + cache_stats.num_data_array_writes; + arrayPtr->rtp_stats.tagReadAc.access = + cache_stats.num_tag_array_reads; + arrayPtr->rtp_stats.tagWriteAc.access = + cache_stats.num_tag_array_writes; + } else { + // This code makes assumptions. For instance, it assumes that + // tag and data arrays are accessed in parallel on a read request and + // this is a write-allocate cache. It also ignores any coherence + // requests. Using detailed stats as above can avoid the ambiguity + // that is introduced here + arrayPtr->rtp_stats.dataReadAc.access = + cache_stats.read_accesses + cache_stats.write_misses; + arrayPtr->rtp_stats.dataWriteAc.access = + cache_stats.write_accesses + cache_stats.read_misses; + arrayPtr->rtp_stats.tagReadAc.access = + cache_stats.read_accesses + cache_stats.write_accesses; + arrayPtr->rtp_stats.tagWriteAc.access = + cache_stats.read_misses + cache_stats.write_misses; + } + + // Set SBT stats if this is an SBT directory type + if (dir_overhead > 0) { + arrayPtr->setSBTDirOverhead(dir_overhead); + + // TDP stats + arrayPtr->sbt_tdp_stats.readAc.access = + cache_stats.tdp_read_access_scalar * + num_tdp_ports * cache_stats.dir_duty_cycle * + (1 - cache_stats.homenode_access_scalar); + arrayPtr->sbt_tdp_stats.readAc.miss = 0; + arrayPtr->sbt_tdp_stats.readAc.hit = + arrayPtr->sbt_tdp_stats.readAc.access - + arrayPtr->sbt_tdp_stats.readAc.miss; + arrayPtr->sbt_tdp_stats.writeAc.access = + cache_stats.tdp_sbt_write_access_scalar * + num_tdp_ports * cache_stats.dir_duty_cycle * + (1 - cache_stats.homenode_access_scalar); + arrayPtr->sbt_tdp_stats.writeAc.miss = 0; + arrayPtr->sbt_tdp_stats.writeAc.hit = + arrayPtr->sbt_tdp_stats.writeAc.access - + arrayPtr->sbt_tdp_stats.writeAc.miss; + + // Runtime power stats + arrayPtr->sbt_rtp_stats.readAc.access = + cache_stats.homenode_read_accesses; + arrayPtr->sbt_rtp_stats.readAc.miss = + cache_stats.homenode_read_misses; + arrayPtr->sbt_rtp_stats.readAc.access = + cache_stats.homenode_read_accesses - + cache_stats.homenode_read_misses; + arrayPtr->sbt_rtp_stats.writeAc.access = + cache_stats.homenode_write_accesses; + arrayPtr->sbt_rtp_stats.writeAc.miss = + cache_stats.homenode_write_misses; + arrayPtr->sbt_rtp_stats.writeAc.hit = + cache_stats.homenode_write_accesses - + cache_stats.homenode_write_misses; + } + + interface_ip.force_cache_config = force_cache_config; + if (!((cache_params.dir_ty == ST && + cache_params.cache_level == L1Directory) || + (cache_params.dir_ty == ST && + cache_params.cache_level== L2Directory))) { + // Miss Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + data = (physical_address_width) + + int(ceil(log2(size / cache_params.blockW))) + + (cache_params.blockW * BITS_PER_BYTE); + line = int(ceil(data / BITS_PER_BYTE)); + size = cache_params.missb_size * line; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.missb_assoc; + interface_ip.nbanks = cache_params.missb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = line * BITS_PER_BYTE; + } else { + interface_ip.out_w = line * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.miss_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.miss_buff_search_ports; + + arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Fill Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + data = cache_params.blockW; + + interface_ip.cache_sz = data * cache_params.fu_size; + interface_ip.line_sz = data; + interface_ip.assoc = cache_params.fu_assoc; + interface_ip.nbanks = cache_params.fu_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.fetch_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.fetch_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Prefetch Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + line = cache_params.blockW; + + interface_ip.cache_sz = cache_params.prefetchb_size * line; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.prefetchb_assoc; + interface_ip.nbanks = cache_params.prefetchb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.prefetch_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.pf_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses; + arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Writeback Buffer + if (cache_params.wbb_size > 0) { + tag = physical_address_width + EXTRA_TAG_BITS; + line = cache_params.blockW; + + interface_ip.cache_sz = cache_params.wbb_size * line; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.wbb_assoc; + interface_ip.nbanks = cache_params.wbb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.writeback_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.wb_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, + "Writeback Buffer", + cache_params.device_ty, clockRate, + opt_local, cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = + arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = + arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + } + } +} + +void CacheUnit::computeEnergy() { + McPATComponent::computeEnergy(); +} + +void CacheUnit::set_cache_param_from_xml_data() { + int level, type; + + // Initialization... move this? + memset(&cache_params, 0, sizeof(CacheParameters)); + memset(&cache_stats, 0, sizeof(CacheStatistics)); + + // By default, use the core clock frequency. This can be changed by + // setting the clockrate param in the XML definition of the CacheUnit + clockRate = target_core_clockrate; + XMLCSTR comp_name = xml_data->getAttribute("name"); + if (comp_name) { + name = comp_name; + } + + int num_children = xml_data->nChildNode("param"); + int i; + int tech_type; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("level", level); + ASSIGN_FP_IF("size", cache_params.capacity); + ASSIGN_FP_IF("block_size", cache_params.blockW); + ASSIGN_FP_IF("assoc", cache_params.assoc); + ASSIGN_FP_IF("num_banks", cache_params.nbanks); + ASSIGN_FP_IF("latency", cache_params.latency); + ASSIGN_FP_IF("throughput", cache_params.throughput); + ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size); + ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size); + ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size); + ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size); + ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc); + ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc); + ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc); + ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc); + ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks); + ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks); + ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks); + ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks); + ASSIGN_ENUM_IF("cache_access_mode", + cache_params.cache_access_mode, Access_mode); + ASSIGN_ENUM_IF("miss_buff_access_mode", + cache_params.miss_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("fetch_buff_access_mode", + cache_params.fetch_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("prefetch_buff_access_mode", + cache_params.prefetch_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("writeback_buff_access_mode", + cache_params.writeback_buff_access_mode, Access_mode); + ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports); + ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports); + ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports); + ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports); + ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports); + ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports); + ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports); + ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports); + ASSIGN_INT_IF("miss_buff_se_rd_ports" , + cache_params.miss_buff_se_rd_ports); + ASSIGN_INT_IF("miss_buff_search_ports", + cache_params.miss_buff_search_ports); + ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports); + ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports); + ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports); + ASSIGN_INT_IF("fetch_buff_se_rd_ports", + cache_params.fetch_buff_se_rd_ports); + ASSIGN_INT_IF("fetch_buff_search_ports", + cache_params.fetch_buff_search_ports); + ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports); + ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports); + ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports); + ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports); + ASSIGN_INT_IF("pf_buff_search_ports", + cache_params.pf_buff_search_ports); + ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports); + ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports); + ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports); + ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports); + ASSIGN_INT_IF("wb_buff_search_ports", + cache_params.wb_buff_search_ports); + ASSIGN_FP_IF("clockrate", cache_params.clockRate); + ASSIGN_INT_IF("pure_ram", cache_params.pure_ram); + ASSIGN_INT_IF("tech_type", tech_type); + ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type); + ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty); + ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type); + ASSIGN_INT_IF("num_cores", cache_params.num_cores); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + cache_params.clockRate *= 1e6; + if (cache_params.clockRate > 0) { + clockRate = cache_params.clockRate; + } + + interface_ip.data_arr_ram_cell_tech_type = tech_type; + interface_ip.data_arr_peri_global_tech_type = tech_type; + interface_ip.tag_arr_ram_cell_tech_type = tech_type; + interface_ip.tag_arr_peri_global_tech_type = tech_type; + + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; + + switch(level) { + case 1: + cache_params.cache_level = L1; + break; + case 2: + cache_params.cache_level = L2; + break; + case 3: + cache_params.cache_level = L3; + break; + case 4: + cache_params.cache_level = L1Directory; + break; + case 5: + cache_params.cache_level = L2Directory; + break; + + default: + fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n", + name.c_str(), level); + exit(1); + } + + cache_stats.use_detailed_stats = false; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads); + ASSIGN_FP_IF("num_data_array_writes", + cache_stats.num_data_array_writes); + ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads); + ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes); + ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle); + ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses); + ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses); + ASSIGN_FP_IF("read_misses", cache_stats.read_misses); + ASSIGN_FP_IF("write_misses", cache_stats.write_misses); + ASSIGN_FP_IF("conflicts", cache_stats.conflicts); + ASSIGN_INT_IF("homenode_read_accesses", + cache_stats.homenode_read_accesses); + ASSIGN_INT_IF("homenode_write_accesses", + cache_stats.homenode_write_accesses); + ASSIGN_INT_IF("homenode_read_misses", + cache_stats.homenode_read_misses); + ASSIGN_INT_IF("homenode_write_misses", + cache_stats.homenode_write_misses); + ASSIGN_FP_IF("homenode_access_scalar", + cache_stats.homenode_access_scalar); + ASSIGN_FP_IF("tdp_read_access_scalar", + cache_stats.tdp_read_access_scalar); + ASSIGN_FP_IF("tdp_write_access_scalar", + cache_stats.tdp_write_access_scalar); + ASSIGN_FP_IF("tdp_sbt_write_access_scalar", + cache_stats.tdp_sbt_write_access_scalar); + ASSIGN_FP_IF("dir_duty_cycle", + cache_stats.dir_duty_cycle); + + else { + warnUnrecognizedStat(node_name); + } + } + + if (cache_stats.num_data_array_reads > 0 || + cache_stats.num_data_array_writes > 0 || + cache_stats.num_tag_array_reads > 0 || + cache_stats.num_tag_array_writes > 0) { + cache_stats.use_detailed_stats = true; + calculate_runtime_data_and_tag = true; + } +} diff --git a/ext/mcpat/cacheunit.h b/ext/mcpat/cacheunit.h new file mode 100644 index 000000000..e4429e74b --- /dev/null +++ b/ext/mcpat/cacheunit.h @@ -0,0 +1,167 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#ifndef CACHEUNIT_H_ +#define CACHEUNIT_H_ + +#include "area.h" +#include "array.h" +#include "basic_components.h" +#include "logic.h" +#include "parameter.h" + +class CacheParameters { +public: + enum Dir_type dir_ty; + double clockRate; + double capacity; + double blockW; + double assoc; + double nbanks; + double throughput; + double latency; + int missb_size; + int fu_size; + int prefetchb_size; + int wbb_size; + int missb_assoc; + int fu_assoc; + int prefetchb_assoc; + int wbb_assoc; + int missb_banks; + int fu_banks; + int prefetchb_banks; + int wbb_banks; + enum Access_mode cache_access_mode; + enum Access_mode miss_buff_access_mode; + enum Access_mode fetch_buff_access_mode; + enum Access_mode prefetch_buff_access_mode; + enum Access_mode writeback_buff_access_mode; + int cache_rw_ports; + int cache_rd_ports; + int cache_wr_ports; + int cache_se_rd_ports; + int cache_search_ports; + int miss_buff_rw_ports; + int miss_buff_rd_ports; + int miss_buff_wr_ports; + int miss_buff_se_rd_ports; + int miss_buff_search_ports; + int fetch_buff_rw_ports; + int fetch_buff_rd_ports; + int fetch_buff_wr_ports; + int fetch_buff_se_rd_ports; + int fetch_buff_search_ports; + int pf_buff_rw_ports; + int pf_buff_rd_ports; + int pf_buff_wr_ports; + int pf_buff_se_rd_ports; + int pf_buff_search_ports; + int wb_buff_rw_ports; + int wb_buff_rd_ports; + int wb_buff_wr_ports; + int wb_buff_se_rd_ports; + int wb_buff_search_ports; + bool pure_ram; + enum CacheLevel cache_level; + enum Device_ty device_ty; + enum Core_type core_ty; + int num_cores; +}; + +class CacheStatistics { +public: + // Duty cycle is used for estimating TDP. It should reflect the highest + // sustainable rate of access to the cache unit in execution of a benchmark + // Default should be 1.0: one access per cycle + double duty_cycle; + // This duty cycle is only used for SBT directory types + double dir_duty_cycle; + // The following two stats are also used for estimating TDP. + double tdp_read_access_scalar; + double tdp_write_access_scalar; + // There are 2 ways to calculate dynamic power from activity statistics: + // Default is false + bool use_detailed_stats; + // 1) Count the number and type of accesses to each cache array + // splitting data and tag arrays (use_detailed_stats = true). + // These are extremely detailed statistics. + // read_misses and write_misses are still required for this method for + // various buffers associated with this cache. + double num_data_array_reads; + double num_data_array_writes; + double num_tag_array_reads; + double num_tag_array_writes; + // 2) Count the number and type of access to the cache unit and + // use them to extrapolate the number of accesses to the other + // subcomponents (cache arrays and buffers) + double read_accesses; + double write_accesses; + double read_misses; + double write_misses; + double conflicts; + // The following is only used for SBT directory types + int homenode_read_accesses; + int homenode_write_accesses; + int homenode_read_misses; + int homenode_write_misses; + double homenode_access_scalar; + double tdp_sbt_write_access_scalar; +}; + +class CacheUnit : public McPATComponent { +public: + static bool is_cache; + static bool pure_cam; + // This is used for CacheArray objects + static bool opt_local; + static bool force_cache_config; + + int ithCache; + CacheParameters cache_params; + CacheStatistics cache_stats; + Cache_type cacheType; + bool calculate_runtime_data_and_tag; + double dir_overhead; + + double scktRatio; + + // TODO: REMOVE _interface_ip... It promotes a mess. Find a better way... + CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip); + void set_cache_param_from_xml_data(); + void computeEnergy(); + ~CacheUnit() {}; +}; + +#endif /* CACHEUNIT_H_ */ diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc index f3e1227df..ada9c5aa1 100644 --- a/ext/mcpat/cacti/Ucache.cc +++ b/ext/mcpat/cacti/Ucache.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -54,176 +55,163 @@ using namespace std; const uint32_t nthreads = NTHREADS; -void min_values_t::update_min_values(const min_values_t * val) -{ - min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; - min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; - min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; - min_area = (min_area > val->min_area) ? val->min_area : min_area; - min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; +void min_values_t::update_min_values(const min_values_t * val) { + min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; + min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; + min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; + min_area = (min_area > val->min_area) ? val->min_area : min_area; + min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; } -void min_values_t::update_min_values(const uca_org_t & res) -{ - min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; - min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; - min_area = (min_area > res.area) ? res.area : min_area; - min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; +void min_values_t::update_min_values(const uca_org_t & res) { + min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; + min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; + min_area = (min_area > res.area) ? res.area : min_area; + min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; } -void min_values_t::update_min_values(const nuca_org_t * res) -{ - min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; - min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; - min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; - min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; +void min_values_t::update_min_values(const nuca_org_t * res) { + min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; + min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; + min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; + min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; } -void min_values_t::update_min_values(const mem_array * res) -{ - min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; - min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; - min_area = (min_area > res->area) ? res->area : min_area; - min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; +void min_values_t::update_min_values(const mem_array * res) { + min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; + min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; + min_area = (min_area > res->area) ? res->area : min_area; + min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; } -void * calc_time_mt_wrapper(void * void_obj) -{ - calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; - uint32_t tid = calc_obj->tid; - list & data_arr = calc_obj->data_arr; - list & tag_arr = calc_obj->tag_arr; - bool is_tag = calc_obj->is_tag; - bool pure_ram = calc_obj->pure_ram; - bool pure_cam = calc_obj->pure_cam; - bool is_main_mem = calc_obj->is_main_mem; - double Nspd_min = calc_obj->Nspd_min; - min_values_t * data_res = calc_obj->data_res; - min_values_t * tag_res = calc_obj->tag_res; +void * calc_time_mt_wrapper(void * void_obj) { + calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; + uint32_t tid = calc_obj->tid; + list & data_arr = calc_obj->data_arr; + list & tag_arr = calc_obj->tag_arr; + bool is_tag = calc_obj->is_tag; + bool pure_ram = calc_obj->pure_ram; + bool pure_cam = calc_obj->pure_cam; + bool is_main_mem = calc_obj->is_main_mem; + double Nspd_min = calc_obj->Nspd_min; + min_values_t * data_res = calc_obj->data_res; + min_values_t * tag_res = calc_obj->tag_res; - data_arr.clear(); - data_arr.push_back(new mem_array); - tag_arr.clear(); - tag_arr.push_back(new mem_array); + data_arr.clear(); + data_arr.push_back(new mem_array); + tag_arr.clear(); + tag_arr.push_back(new mem_array); - uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; - uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; - uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; - uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; + uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; + uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; - bool is_valid_partition; - int wt_min, wt_max; + bool is_valid_partition; + int wt_min, wt_max; - if (g_ip->force_wiretype) { - if (g_ip->wt == 0) { - wt_min = Low_swing; - wt_max = Low_swing; - } - else { - wt_min = Global; - wt_max = Low_swing-1; - } - } - else { - wt_min = Global; - wt_max = Low_swing; - } - - for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) - { - for (int wr = wt_min; wr <= wt_max; wr++) - { - for (uint32_t iter = tid; iter < niter; iter += nthreads) - { - // reconstruct Ndwl, Ndbl, Ndcm - unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); - unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); - unsigned int Ndcm = 1 << (iter % Ndcm_niter); - for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) - { - for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) - { - //for debuging - if (g_ip->force_cache_config && is_tag == false) - { - wr = g_ip->wt; - Ndwl = g_ip->ndwl; - Ndbl = g_ip->ndbl; - Ndcm = g_ip->ndcm; - if(g_ip->nspd != 0) { - Nspd = g_ip->nspd; - } - if(g_ip->ndsam1 != 0) { - Ndsam_lev_1 = g_ip->ndsam1; - Ndsam_lev_2 = g_ip->ndsam2; - } - } - - if (is_tag == true) - { - is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - tag_arr.back(), 0, NULL, NULL, - is_main_mem); - } - // If it's a fully-associative cache, the data array partition parameters are identical to that of - // the tag array, so compute data array partition properties also here. - if (is_tag == false || g_ip->fully_assoc) - { - is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - data_arr.back(), 0, NULL, NULL, - is_main_mem); - } - - if (is_valid_partition) - { - if (is_tag == true) - { - tag_arr.back()->wt = (enum Wire_type) wr; - tag_res->update_min_values(tag_arr.back()); - tag_arr.push_back(new mem_array); - } - if (is_tag == false || g_ip->fully_assoc) - { - data_arr.back()->wt = (enum Wire_type) wr; - data_res->update_min_values(data_arr.back()); - data_arr.push_back(new mem_array); - } - } - - if (g_ip->force_cache_config && is_tag == false) - { - wr = wt_max; - iter = niter; - if(g_ip->nspd != 0) { - Nspd = MAXDATASPD; - } - if (g_ip->ndsam1 != 0) { - Ndsam_lev_1 = MAX_COL_MUX+1; - Ndsam_lev_2 = MAX_COL_MUX+1; - } - } - } + if (g_ip->force_wiretype) { + if (g_ip->wt == 0) { + wt_min = Low_swing; + wt_max = Low_swing; + } else { + wt_min = Global; + wt_max = Low_swing - 1; } - } + } else { + wt_min = Global; + wt_max = Low_swing; } - } - delete data_arr.back(); - delete tag_arr.back(); - data_arr.pop_back(); - tag_arr.pop_back(); + for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) { + for (int wr = wt_min; wr <= wt_max; wr++) { + for (uint32_t iter = tid; iter < niter; iter += nthreads) { + // reconstruct Ndwl, Ndbl, Ndcm + unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); + unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter); + unsigned int Ndcm = 1 << (iter % Ndcm_niter); + for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; + Ndsam_lev_1 *= 2) { + for (unsigned int Ndsam_lev_2 = 1; + Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) { + //for debuging + if (g_ip->force_cache_config && is_tag == false) { + wr = g_ip->wt; + Ndwl = g_ip->ndwl; + Ndbl = g_ip->ndbl; + Ndcm = g_ip->ndcm; + if (g_ip->nspd != 0) { + Nspd = g_ip->nspd; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = g_ip->ndsam1; + Ndsam_lev_2 = g_ip->ndsam2; + } + } - pthread_exit(NULL); + if (is_tag == true) { + is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + tag_arr.back(), 0, NULL, NULL, + is_main_mem); + } + // If it's a fully-associative cache, the data array partition parameters are identical to that of + // the tag array, so compute data array partition properties also here. + if (is_tag == false || g_ip->fully_assoc) { + is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + data_arr.back(), 0, NULL, NULL, + is_main_mem); + } + + if (is_valid_partition) { + if (is_tag == true) { + tag_arr.back()->wt = (enum Wire_type) wr; + tag_res->update_min_values(tag_arr.back()); + tag_arr.push_back(new mem_array); + } + if (is_tag == false || g_ip->fully_assoc) { + data_arr.back()->wt = (enum Wire_type) wr; + data_res->update_min_values(data_arr.back()); + data_arr.push_back(new mem_array); + } + } + + if (g_ip->force_cache_config && is_tag == false) { + wr = wt_max; + iter = niter; + if (g_ip->nspd != 0) { + Nspd = MAXDATASPD; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = MAX_COL_MUX + 1; + Ndsam_lev_2 = MAX_COL_MUX + 1; + } + } + } + } + } + } + } + + delete data_arr.back(); + delete tag_arr.back(); + data_arr.pop_back(); + tag_arr.pop_back(); + +#ifndef DEBUG + pthread_exit(NULL); +#else + return NULL; +#endif } @@ -242,423 +230,448 @@ bool calculate_time( int flag_results_populate, results_mem_array *ptr_results, uca_org_t *ptr_fin_res, - bool is_main_mem) -{ - DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); + bool is_main_mem) { + DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); - if (dyn_p.is_valid == false) - { - return false; - } + if (dyn_p.is_valid == false) { + return false; + } - UCA * uca = new UCA(dyn_p); + UCA * uca = new UCA(dyn_p); - if (flag_results_populate) - { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables - } - else - { - int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; - int num_mats = uca->bank.dp.num_mats; - bool is_fa = uca->bank.dp.fully_assoc; - bool pure_cam = uca->bank.dp.pure_cam; + //For the final solution, populate the ptr_results data structure + //-- TODO: copy only necessary variables + if (flag_results_populate) { + } else { + int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; + int num_mats = uca->bank.dp.num_mats; + bool is_fa = uca->bank.dp.fully_assoc; + bool pure_cam = uca->bank.dp.pure_cam; ptr_array->Ndwl = Ndwl; - ptr_array->Ndbl = Ndbl; - ptr_array->Nspd = Nspd; - ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; - ptr_array->Ndsam_lev_1 = Ndsam_lev_1; - ptr_array->Ndsam_lev_2 = Ndsam_lev_2; - ptr_array->access_time = uca->access_time; - ptr_array->cycle_time = uca->cycle_time; - ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; - ptr_array->area_ram_cells = uca->area_all_dataramcells; - ptr_array->area = uca->area.get_area(); - ptr_array->height = uca->area.h; - ptr_array->width = uca->area.w; - ptr_array->mat_height = uca->bank.mat.area.h; - ptr_array->mat_length = uca->bank.mat.area.w; - ptr_array->subarray_height = uca->bank.mat.subarray.area.h; - ptr_array->subarray_length = uca->bank.mat.subarray.area.w; - ptr_array->power = uca->power; - ptr_array->delay_senseamp_mux_decoder = - MAX(uca->delay_array_to_sa_mux_lev_1_decoder, - uca->delay_array_to_sa_mux_lev_2_decoder); - ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; - ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; + ptr_array->Ndbl = Ndbl; + ptr_array->Nspd = Nspd; + ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; + ptr_array->Ndsam_lev_1 = Ndsam_lev_1; + ptr_array->Ndsam_lev_2 = Ndsam_lev_2; + ptr_array->access_time = uca->access_time; + ptr_array->cycle_time = uca->cycle_time; + ptr_array->multisubbank_interleave_cycle_time = + uca->multisubbank_interleave_cycle_time; + ptr_array->area_ram_cells = uca->area_all_dataramcells; + ptr_array->area = uca->area.get_area(); + ptr_array->height = uca->area.h; + ptr_array->width = uca->area.w; + ptr_array->mat_height = uca->bank.mat.area.h; + ptr_array->mat_length = uca->bank.mat.area.w; + ptr_array->subarray_height = uca->bank.mat.subarray.area.h; + ptr_array->subarray_length = uca->bank.mat.subarray.area.w; + ptr_array->power = uca->power; + ptr_array->delay_senseamp_mux_decoder = + MAX(uca->delay_array_to_sa_mux_lev_1_decoder, + uca->delay_array_to_sa_mux_lev_2_decoder); + ptr_array->delay_before_subarray_output_driver = + uca->delay_before_subarray_output_driver; + ptr_array->delay_from_subarray_output_driver_to_output = + uca->delay_from_subarray_out_drv_to_out; - ptr_array->delay_route_to_bank = uca->htree_in_add->delay; - ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; - ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; - ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; - ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; - ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; - ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; - ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; - ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; - ptr_array->delay_comparator = uca->bank.mat.delay_comparator; + ptr_array->delay_route_to_bank = uca->htree_in_add->delay; + ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; + ptr_array->delay_row_predecode_driver_and_block = + uca->bank.mat.r_predec->delay; + ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; + ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; + ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; + ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; + ptr_array->delay_subarray_output_driver = + uca->bank.mat.delay_subarray_out_drv_htree; + ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; + ptr_array->delay_comparator = uca->bank.mat.delay_comparator; - ptr_array->all_banks_height = uca->area.h; - ptr_array->all_banks_width = uca->area.w; - ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); + ptr_array->all_banks_height = uca->area.h; + ptr_array->all_banks_width = uca->area.w; + ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / + (uca->area.get_area()); - ptr_array->power_routing_to_bank = uca->power_routing_to_bank; - ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; - ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; -// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; -// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; - ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_routing_to_bank = uca->power_routing_to_bank; + ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; + ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; + ptr_array->power_data_output_htree = uca->bank.htree_out_data->power; - ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; - ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers = + uca->bank.mat.r_predec->driver_power; + ptr_array->power_row_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; - ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks = + uca->bank.mat.r_predec->block_power; + ptr_array->power_row_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; - ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; + ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; - ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers = + uca->bank.mat.b_mux_predec->driver_power; + ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; - ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks = + uca->bank.mat.b_mux_predec->block_power; + ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; + ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = + uca->bank.mat.sa_mux_lev_1_predec->driver_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; - ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = + uca->bank.mat.sa_mux_lev_1_predec->block_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders = + uca->bank.mat.power_sa_mux_lev_1_decoders; + ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = + uca->bank.mat.sa_mux_lev_2_predec->driver_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; - ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = + uca->bank.mat.sa_mux_lev_2_predec->block_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_bitlines = uca->bank.mat.power_bitline; - ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders = + uca->bank.mat.power_sa_mux_lev_2_decoders; + ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_sense_amps = uca->bank.mat.power_sa; - ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines = uca->bank.mat.power_bitline; + ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; - ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps = uca->bank.mat.power_sa; + ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; - ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers = + uca->bank.mat.power_bl_precharge_eq_drv; + ptr_array->power_prechg_eq_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; - ptr_array->power_comparators = uca->bank.mat.power_comparator; - ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray = + uca->bank.mat.power_subarray_out_drv; + ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= + num_act_mats_hor_dir; -// cout << " num of mats: " << dyn_p.num_mats << endl; - if (is_fa || pure_cam) - { - ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; -// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power; -// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline; -// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats; - ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; - ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; - ptr_array->power_matchlines = uca->bank.mat.power_matchline; - ptr_array->power_matchlines.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; - ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; -// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<power_comparators = uca->bank.mat.power_comparator; + ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; - ptr_array->activate_energy = uca->activate_energy; - ptr_array->read_energy = uca->read_energy; - ptr_array->write_energy = uca->write_energy; - ptr_array->precharge_energy = uca->precharge_energy; - ptr_array->refresh_power = uca->refresh_power; - ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; - ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; - ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; - - ptr_array->precharge_delay = uca->precharge_delay; - - -// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<min_delay)*100/minval->min_delay) > g_ip->delay_dev) { - return false; - } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev) { - return false; - } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev) { - return false; - } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev) { - return false; - } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { - return false; - } - return true; -} - -bool check_mem_org(mem_array & u, const min_values_t *minval) -{ - if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { - return false; - } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev) { - return false; - } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev) { - return false; - } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev) { - return false; - } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { - return false; - } - return true; -} - - - - -void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist) -{ - double cost = 0; - double min_cost = BIGNUM; - float d, a, dp, lp, c; - - dp = g_ip->dynamic_power_wt; - lp = g_ip->leakage_power_wt; - a = g_ip->area_wt; - d = g_ip->delay_wt; - c = g_ip->cycle_time_wt; - - if (ulist.empty() == true) - { - cout << "ERROR: no valid cache organizations found" << endl; - exit(0); - } - - for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++) - { - if (g_ip->ed == 1) - { - cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { - min_cost = cost; - *res = (*(niter)); - } - } - else if (g_ip->ed == 2) - { - cost = ((niter)->access_time/minval->min_delay)* - ((niter)->access_time/minval->min_delay)* - ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { - min_cost = cost; - *res = (*(niter)); - } - } - else - { - /* - * check whether the current organization - * meets the input deviation constraints - */ - bool v = check_uca_org(*niter, minval); - //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling - - if (v) - { - cost = (d * ((niter)->access_time/minval->min_delay) + - c * ((niter)->cycle_time/minval->min_cyc) + - dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + - lp * ((niter)->power.readOp.leakage/minval->min_leakage) + - a * ((niter)->area/minval->min_area)); - //fprintf(stderr, "cost = %g\n", cost); - - if (min_cost > cost) { - min_cost = cost; - *res = (*(niter)); - niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; + if (is_fa || pure_cam) { + ptr_array->power_htree_in_search = + uca->bank.htree_in_search->power; + ptr_array->power_htree_out_search = + uca->bank.htree_out_search->power; + ptr_array->power_searchline = uca->bank.mat.power_searchline; + ptr_array->power_searchline.searchOp.dynamic *= num_mats; + ptr_array->power_searchline_precharge = + uca->bank.mat.power_searchline_precharge; + ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchlines = uca->bank.mat.power_matchline; + ptr_array->power_matchlines.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_precharge = + uca->bank.mat.power_matchline_precharge; + ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_to_wordline_drv = + uca->bank.mat.power_ml_to_ram_wl_drv; } - } - else { - niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; - } - } - } - if (min_cost == BIGNUM) - { - cout << "ERROR: no cache organizations met optimization criteria" << endl; - exit(0); - } + ptr_array->activate_energy = uca->activate_energy; + ptr_array->read_energy = uca->read_energy; + ptr_array->write_energy = uca->write_energy; + ptr_array->precharge_energy = uca->precharge_energy; + ptr_array->refresh_power = uca->refresh_power; + ptr_array->leak_power_subbank_closed_page = + uca->leak_power_subbank_closed_page; + ptr_array->leak_power_subbank_open_page = + uca->leak_power_subbank_open_page; + ptr_array->leak_power_request_and_reply_networks = + uca->leak_power_request_and_reply_networks; + + ptr_array->precharge_delay = uca->precharge_delay; + } + + + delete uca; + return true; } -void filter_tag_arr(const min_values_t * min, list & list) -{ - double cost = BIGNUM; - double cur_cost; - double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; - mem_array * res = NULL; - - if (list.empty() == true) - { - cout << "ERROR: no valid tag organizations found" << endl; - exit(1); - } - - - while (list.empty() != true) - { - bool v = check_mem_org(*list.back(), min); - if (v) - { - cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + - wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + - wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + - wt_area * (list.back()->area/min->min_area) + - wt_cyc * (list.back()->cycle_time/min->min_cyc); +bool check_uca_org(uca_org_t & u, min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { + return false; } - else - { - cur_cost = BIGNUM; + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; } - if (cur_cost < cost) - { - if (res != NULL) - { - delete res; - } - cost = cur_cost; - res = list.back(); + if (((u.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage) * 100 > + g_ip->leakage_power_dev) { + return false; } - else - { - delete list.back(); + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 > + g_ip->cycle_time_dev) { + return false; } - list.pop_back(); - } - if(!res) - { - cout << "ERROR: no valid tag organizations found" << endl; - exit(0); - } + if (((u.area - minval->min_area) / minval->min_area)*100 > + g_ip->area_dev) { + return false; + } + return true; +} - list.push_back(res); +bool check_mem_org(mem_array & u, const min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { + return false; + } + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; + } + if (((u.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage) * 100 > + g_ip->leakage_power_dev) { + return false; + } + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 > + g_ip->cycle_time_dev) { + return false; + } + if (((u.area - minval->min_area) / minval->min_area) * 100 > + g_ip->area_dev) { + return false; + } + return true; } -void filter_data_arr(list & curr_list) -{ - if (curr_list.empty() == true) - { - cout << "ERROR: no valid data array organizations found" << endl; - exit(1); - } - list::iterator iter; +void find_optimal_uca(uca_org_t *res, min_values_t * minval, + list & ulist) { + double cost = 0; + double min_cost = BIGNUM; + float d, a, dp, lp, c; - for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) - { - mem_array * m = *iter; + dp = g_ip->dynamic_power_wt; + lp = g_ip->leakage_power_wt; + a = g_ip->area_wt; + d = g_ip->delay_wt; + c = g_ip->cycle_time_wt; - if (m == NULL) exit(1); - - if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && - ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) - { - delete m; - iter = curr_list.erase(iter); - iter --; + if (ulist.empty() == true) { + cout << "ERROR: no valid cache organizations found" << endl; + exit(0); + } + + for (list::iterator niter = ulist.begin(); niter != ulist.end(); + niter++) { + if (g_ip->ed == 1) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + } + } else if (g_ip->ed == 2) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + } + } else { + /* + * check whether the current organization + * meets the input deviation constraints + */ + bool v = check_uca_org(*niter, minval); + + if (v) { + cost = (d * ((niter)->access_time / minval->min_delay) + + c * ((niter)->cycle_time / minval->min_cyc) + + dp * ((niter)->power.readOp.dynamic / minval->min_dyn) + + lp * + ((niter)->power.readOp.leakage / minval->min_leakage) + + a * ((niter)->area / minval->min_area)); + + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + niter = ulist.erase(niter); + if (niter != ulist.begin()) + niter--; + } + } else { + niter = ulist.erase(niter); + if (niter != ulist.begin()) + niter--; + } + } + } + + if (min_cost == BIGNUM) { + cout << "ERROR: no cache organizations met optimization criteria" + << endl; + exit(0); + } +} + + + +void filter_tag_arr(const min_values_t * min, list & list) { + double cost = BIGNUM; + double cur_cost; + double wt_delay = g_ip->delay_wt; + double wt_dyn = g_ip->dynamic_power_wt; + double wt_leakage = g_ip->leakage_power_wt; + double wt_cyc = g_ip->cycle_time_wt; + double wt_area = g_ip->area_wt; + mem_array * res = NULL; + + if (list.empty() == true) { + cout << "ERROR: no valid tag organizations found" << endl; + exit(1); + } + + + while (list.empty() != true) { + bool v = check_mem_org(*list.back(), min); + if (v) { + cur_cost = wt_delay * (list.back()->access_time / min->min_delay) + + wt_dyn * (list.back()->power.readOp.dynamic / + min->min_dyn) + + wt_leakage * (list.back()->power.readOp.leakage / + min->min_leakage) + + wt_area * (list.back()->area / min->min_area) + + wt_cyc * (list.back()->cycle_time / min->min_cyc); + } else { + cur_cost = BIGNUM; + } + if (cur_cost < cost) { + if (res != NULL) { + delete res; + } + cost = cur_cost; + res = list.back(); + } else { + delete list.back(); + } + list.pop_back(); + } + if (!res) { + cout << "ERROR: no valid tag organizations found" << endl; + exit(0); + } + + list.push_back(res); +} + + + +void filter_data_arr(list & curr_list) { + if (curr_list.empty() == true) { + cout << "ERROR: no valid data array organizations found" << endl; + exit(1); + } + + list::iterator iter; + + for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) { + mem_array * m = *iter; + + if (m == NULL) exit(1); + + if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay > + 0.5) && + ((m->power.readOp.dynamic - m->arr_min->min_dyn) / + m->arr_min->min_dyn > 0.5)) { + delete m; + iter = curr_list.erase(iter); + iter --; + } } - } } @@ -675,210 +688,199 @@ void filter_data_arr(list & curr_list) * above results * 4. Cache model with least cost is picked from sol_list */ -void solve(uca_org_t *fin_res) -{ - bool is_dram = false; - int pure_ram = g_ip->pure_ram; - bool pure_cam = g_ip->pure_cam; +void solve(uca_org_t *fin_res) { + bool is_dram = false; + int pure_ram = g_ip->pure_ram; + bool pure_cam = g_ip->pure_cam; - init_tech_params(g_ip->F_sz_um, false); + init_tech_params(g_ip->F_sz_um, false); - list tag_arr (0); - list data_arr(0); - list::iterator miter; - list sol_list(1, uca_org_t()); + list tag_arr (0); + list data_arr(0); + list::iterator miter; + list sol_list(1, uca_org_t()); - fin_res->tag_array.access_time = 0; - fin_res->tag_array.Ndwl = 0; - fin_res->tag_array.Ndbl = 0; - fin_res->tag_array.Nspd = 0; - fin_res->tag_array.deg_bl_muxing = 0; - fin_res->tag_array.Ndsam_lev_1 = 0; - fin_res->tag_array.Ndsam_lev_2 = 0; + fin_res->tag_array.access_time = 0; + fin_res->tag_array.Ndwl = 0; + fin_res->tag_array.Ndbl = 0; + fin_res->tag_array.Nspd = 0; + fin_res->tag_array.deg_bl_muxing = 0; + fin_res->tag_array.Ndsam_lev_1 = 0; + fin_res->tag_array.Ndsam_lev_2 = 0; - // distribute calculate_time() execution to multiple threads - calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; - pthread_t threads[nthreads]; + // distribute calculate_time() execution to multiple threads + calc_time_mt_wrapper_struct * calc_array = + new calc_time_mt_wrapper_struct[nthreads]; + pthread_t threads[nthreads]; - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].tid = t; - calc_array[t].pure_ram = pure_ram; - calc_array[t].pure_cam = pure_cam; - calc_array[t].data_res = new min_values_t(); - calc_array[t].tag_res = new min_values_t(); - } - - bool is_tag; - uint32_t ram_cell_tech_type; - - // If it's a cache, first calculate the area, delay and power for all tag array partitions. - if (!(pure_ram||pure_cam||g_ip->fully_assoc)) - { //cache - is_tag = true; - ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - init_tech_params(g_ip->F_sz_um, is_tag); - - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; - calc_array[t].is_main_mem = false; - calc_array[t].Nspd_min = 0.125; - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].tid = t; + calc_array[t].pure_ram = pure_ram; + calc_array[t].pure_cam = pure_cam; + calc_array[t].data_res = new min_values_t(); + calc_array[t].tag_res = new min_values_t(); } - for (uint32_t t = 0; t < nthreads; t++) - { - pthread_join(threads[t], NULL); + bool is_tag; + uint32_t ram_cell_tech_type; + + // If it's a cache, first calculate the area, delay and power for all tag array partitions. + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache + is_tag = true; + ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; + is_dram = ((ram_cell_tech_type == lp_dram) || + (ram_cell_tech_type == comm_dram)); + init_tech_params(g_ip->F_sz_um, is_tag); + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = false; + calc_array[t].Nspd_min = 0.125; +#ifndef DEBUG + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); +#else + calc_time_mt_wrapper((void *)(&(calc_array[t]))); +#endif + } + +#ifndef DEBUG + for (uint32_t t = 0; t < nthreads; t++) { + pthread_join(threads[t], NULL); + } +#endif + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + calc_array[t].tag_arr.sort(mem_array::lt); + tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); + } } - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].data_arr.sort(mem_array::lt); - data_arr.merge(calc_array[t].data_arr, mem_array::lt); - calc_array[t].tag_arr.sort(mem_array::lt); - tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); - } - } - - // calculate the area, delay and power for all data array partitions (for cache or plain RAM). -// if (!g_ip->fully_assoc) -// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion + // calculate the area, delay and power for all data array partitions (for cache or plain RAM). + // in the new cacti, cam, fully_associative cache are processed as single array in the data portion is_tag = false; ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; - calc_array[t].is_main_mem = g_ip->is_main_mem; - if (!(pure_cam||g_ip->fully_assoc)) - { - calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); - } - else - { - calc_array[t].Nspd_min = 1; - } + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam || g_ip->fully_assoc)) { + calc_array[t].Nspd_min = (double)(g_ip->out_w) / + (double)(g_ip->block_sz * 8); + } else { + calc_array[t].Nspd_min = 1; + } - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); +#ifndef DEBUG + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); +#else + calc_time_mt_wrapper((void *)(&(calc_array[t]))); +#endif } - for (uint32_t t = 0; t < nthreads; t++) - { - pthread_join(threads[t], NULL); +#ifndef DEBUG + for (uint32_t t = 0; t < nthreads; t++) { + pthread_join(threads[t], NULL); } +#endif data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].data_arr.sort(mem_array::lt); - data_arr.merge(calc_array[t].data_arr, mem_array::lt); + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + + } -// } - min_values_t * d_min = new min_values_t(); - min_values_t * t_min = new min_values_t(); - min_values_t * cache_min = new min_values_t(); - for (uint32_t t = 0; t < nthreads; t++) - { - d_min->update_min_values(calc_array[t].data_res); - t_min->update_min_values(calc_array[t].tag_res); - } + min_values_t * d_min = new min_values_t(); + min_values_t * t_min = new min_values_t(); + min_values_t * cache_min = new min_values_t(); - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - (*miter)->arr_min = d_min; - } - - - //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; - filter_data_arr(data_arr); - if(!(pure_ram||pure_cam||g_ip->fully_assoc)) - { - filter_tag_arr(t_min, tag_arr); - } - //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; - - - if (pure_ram||pure_cam||g_ip->fully_assoc) - { - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); - curr_org.tag_array2 = NULL; - curr_org.data_array2 = (*miter); - - curr_org.find_delay(); - curr_org.find_energy(); - curr_org.find_area(); - curr_org.find_cyc(); - - //update min values for the entire cache - cache_min->update_min_values(curr_org); - - sol_list.push_back(uca_org_t()); + for (uint32_t t = 0; t < nthreads; t++) { + d_min->update_min_values(calc_array[t].data_res); + t_min->update_min_values(calc_array[t].tag_res); } - } - else - { - while (tag_arr.empty() != true) - { - mem_array * arr_temp = (tag_arr.back()); - //delete tag_arr.back(); - tag_arr.pop_back(); - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); - curr_org.tag_array2 = arr_temp; - curr_org.data_array2 = (*miter); - - curr_org.find_delay(); - curr_org.find_energy(); - curr_org.find_area(); - curr_org.find_cyc(); - - //update min values for the entire cache - cache_min->update_min_values(curr_org); - - sol_list.push_back(uca_org_t()); - } + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + (*miter)->arr_min = d_min; } - } - sol_list.pop_back(); - - find_optimal_uca(fin_res, cache_min, sol_list); - - sol_list.clear(); - - for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) - { - if (*miter != fin_res->data_array2) - { - delete *miter; + filter_data_arr(data_arr); + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { + filter_tag_arr(t_min, tag_arr); } - } - data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { - delete calc_array[t].data_res; - delete calc_array[t].tag_res; - } + if (pure_ram || pure_cam || g_ip->fully_assoc) { + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = NULL; + curr_org.data_array2 = (*miter); - delete [] calc_array; - delete cache_min; - delete d_min; - delete t_min; + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); + + //update min values for the entire cache + cache_min->update_min_values(curr_org); + + sol_list.push_back(uca_org_t()); + } + } else { + while (tag_arr.empty() != true) { + mem_array * arr_temp = (tag_arr.back()); + tag_arr.pop_back(); + + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = arr_temp; + curr_org.data_array2 = (*miter); + + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); + + //update min values for the entire cache + cache_min->update_min_values(curr_org); + + sol_list.push_back(uca_org_t()); + } + } + } + + sol_list.pop_back(); + + find_optimal_uca(fin_res, cache_min, sol_list); + + sol_list.clear(); + + for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) { + if (*miter != fin_res->data_array2) { + delete *miter; + } + } + data_arr.clear(); + + for (uint32_t t = 0; t < nthreads; t++) { + delete calc_array[t].data_res; + delete calc_array[t].tag_res; + } + + delete [] calc_array; + delete cache_min; + delete d_min; + delete t_min; } void update(uca_org_t *fin_res) @@ -886,7 +888,14 @@ void update(uca_org_t *fin_res) if(fin_res->tag_array2) { init_tech_params(g_ip->F_sz_um,true); - DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, + fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->Ndcm, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); @@ -894,12 +903,20 @@ void update(uca_org_t *fin_res) } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + cout << "ERROR: Cannot retrieve array structure for leakage feedback" + << endl; exit(1); } } init_tech_params(g_ip->F_sz_um,false); - DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, + fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->Ndcm, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); @@ -907,7 +924,8 @@ void update(uca_org_t *fin_res) } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + cout << "ERROR: Cannot retrieve array structure for leakage feedback" + << endl; exit(1); } diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h index 20985fff1..87836adcd 100644 --- a/ext/mcpat/cacti/Ucache.h +++ b/ext/mcpat/cacti/Ucache.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,9 +40,8 @@ #include "nuca.h" #include "router.h" -class min_values_t -{ - public: +class min_values_t { +public: double min_delay; double min_dyn; double min_leakage; @@ -58,17 +58,16 @@ class min_values_t -struct solution -{ - int tag_array_index; - int data_array_index; - list::iterator tag_array_iter; - list::iterator data_array_iter; - double access_time; - double cycle_time; - double area; - double efficiency; - powerDef total_power; +struct solution { + int tag_array_index; + int data_array_index; + list::iterator tag_array_iter; + list::iterator data_array_iter; + double access_time; + double cycle_time; + double area; + double efficiency; + powerDef total_power; }; @@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res); void init_tech_params(double tech, bool is_tag); -struct calc_time_mt_wrapper_struct -{ - uint32_t tid; - bool is_tag; - bool pure_ram; - bool pure_cam; - bool is_main_mem; - double Nspd_min; +struct calc_time_mt_wrapper_struct { + uint32_t tid; + bool is_tag; + bool pure_ram; + bool pure_cam; + bool is_main_mem; + double Nspd_min; - min_values_t * data_res; - min_values_t * tag_res; + min_values_t * data_res; + min_values_t * tag_res; - list data_arr; - list tag_arr; + list data_arr; + list tag_arr; }; void *calc_time_mt_wrapper(void * void_obj); diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc index 6664abf13..8106d2025 100644 --- a/ext/mcpat/cacti/arbiter.cc +++ b/ext/mcpat/cacti/arbiter.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,95 +37,107 @@ Arbiter::Arbiter( double flit_size_, double output_len, TechnologyParameter::DeviceType *dt - ):R(n_req), flit_size(flit_size_), - o_len (output_len), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - Vdd = dt->Vdd; - double technology = g_ip->F_sz_um; - NTn1 = 13.5*technology/2; - PTn1 = 76*technology/2; - NTn2 = 13.5*technology/2; - PTn2 = 76*technology/2; - NTi = 12.5*technology/2; - PTi = 25*technology/2; - NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology/2; /* pmos tr. length*/ + ): R(n_req), flit_size(flit_size_), + o_len (output_len), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + Vdd = dt->Vdd; + double technology = g_ip->F_sz_um; + NTn1 = 13.5 * technology / 2; + PTn1 = 76 * technology / 2; + NTn2 = 13.5 * technology / 2; + PTn2 = 76 * technology / 2; + NTi = 12.5 * technology / 2; + PTi = 25 * technology / 2; + NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology / 2; /* pmos tr. length*/ } -Arbiter::~Arbiter(){} +Arbiter::~Arbiter() {} double Arbiter::arb_req() { - double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) + - gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); - return temp; + double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 * + gate_C(NTn2, 0) + + gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); + return temp; } double Arbiter::arb_pri() { - double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance - of flip-flop is ignored */ - return temp; + /* switching capacitance of flip-flop is ignored */ + double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)); + return temp; } double Arbiter::arb_grant() { - double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); - return temp; + double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); + return temp; } double Arbiter::arb_int() { - double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + - 2*gate_C(NTn2, 0) + gate_C(PTn2, 0)); - return temp; + double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0)); + return temp; } void Arbiter::compute_power() { - power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 + - arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd); - double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage - power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd; + power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() * + Vdd * Vdd / 2 + + arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd * + Vdd); + double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi, + min_w_pmos * PTi, 1, inv); + double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi, + min_w_pmos * PTi, 1, inv); + //FIXME include priority table leakage + power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd; + power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd + + not_leak_gate * Vdd; } double //wire cap with triple spacing Arbiter::Cw3(double length) { - Wire wc(g_ip->wt, length, 1, 3, 3); - double temp = (wc.wire_cap(length,true)); - return temp; + Wire wc(g_ip->wt, length, 1, 3, 3); + double temp = (wc.wire_cap(length, true)); + return temp; } double Arbiter::crossbar_ctrline() { - double temp = (Cw3(o_len * 1e-6 /* m */) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + - gate_C(NTi, 0) + gate_C(PTi, 0)); - return temp; + double temp = (Cw3(o_len * 1e-6 /* m */) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + + gate_C(NTi, 0) + gate_C(PTi, 0)); + return temp; } double Arbiter::transmission_buf_ctrcap() { - double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0); - return temp; + double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0); + return temp; } -void Arbiter::print_arbiter() -{ - cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; - cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; +void Arbiter::print_arbiter() { + cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; } diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc old mode 100755 new mode 100644 index a18c7f1ed..b4fd95090 --- a/ext/mcpat/cacti/bank.cc +++ b/ext/mcpat/cacti/bank.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,163 +37,174 @@ #include "bank.h" Bank::Bank(const DynamicParameter & dyn_p): - dp(dyn_p), mat(dp), - num_addr_b_mat(dyn_p.number_addr_bits_mat), - num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir) -{ - int RWP; - int ERP; - int EWP; - int SCHP; + dp(dyn_p), mat(dp), + num_addr_b_mat(dyn_p.number_addr_bits_mat), + num_mats_hor_dir(dyn_p.num_mats_h_dir), + num_mats_ver_dir(dyn_p.num_mats_v_dir) { + int RWP; + int ERP; + int EWP; + int SCHP; - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - } - - int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); - int searchinbits; - int searchoutbits; - - if (dp.fully_assoc || dp.pure_cam) - { - datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); - searchinbits = dp.num_si_b_bank_per_port * SCHP; - searchoutbits = dp.num_so_b_bank_per_port * SCHP; - } - - if (!(dp.fully_assoc || dp.pure_cam)) - { - if (g_ip->fast_access && dp.is_tag == false) - { - dataoutbits *= g_ip->data_assoc; + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; } - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + int total_addrbits = (dp.number_addr_bits_mat + + dp.number_subbanks_decode) * (RWP + ERP + EWP); + int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + int searchinbits; + int searchoutbits; + + if (dp.fully_assoc || dp.pure_cam) { + datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + searchinbits = dp.num_si_b_bank_per_port * SCHP; + searchoutbits = dp.num_so_b_bank_per_port * SCHP; + } + + if (!(dp.fully_assoc || dp.pure_cam)) { + if (g_ip->fast_access && dp.is_tag == false) { + dataoutbits *= g_ip->data_assoc; + } + + htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2, + Add_htree); + htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2, + Data_in_htree); + htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree); // htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, -// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); +// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - } - else - { - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true); - htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true); + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } else { + htree_in_add = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Add_htree); + htree_in_data = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_in_htree); + htree_out_data = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree); + htree_in_search = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_in_htree, true, true); + htree_out_search = + new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree, true); - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - } + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } - num_addr_b_row_dec = _log2(mat.subarray.num_rows); - num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec; - num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec; + num_addr_b_row_dec = _log2(mat.subarray.num_rows); + num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec; + num_addr_b_routed_to_mat_for_rd_or_wr = + num_addr_b_mat - num_addr_b_row_dec; } -Bank::~Bank() -{ - delete htree_in_add; - delete htree_out_data; - delete htree_in_data; - if (dp.fully_assoc || dp.pure_cam) - { - delete htree_in_search; - delete htree_out_search; - } +Bank::~Bank() { + delete htree_in_add; + delete htree_out_data; + delete htree_in_data; + if (dp.fully_assoc || dp.pure_cam) { + delete htree_in_search; + delete htree_out_search; + } } -double Bank::compute_delays(double inrisetime) -{ - return mat.compute_delays(inrisetime); +double Bank::compute_delays(double inrisetime) { + return mat.compute_delays(inrisetime); } -void Bank::compute_power_energy() -{ - mat.compute_power_energy(); +void Bank::compute_power_energy() { + mat.compute_power_energy(); - if (!(dp.fully_assoc || dp.pure_cam)) - { - power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + if (!(dp.fully_assoc || dp.pure_cam)) { + power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; - } - else - { + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + } else { - power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; - power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + - mat.power_sa.searchOp.dynamic + - mat.power_bitline.searchOp.dynamic + - mat.power_subarray_out_drv.searchOp.dynamic+ - mat.ml_to_ram_wl_drv->power.readOp.dynamic; + power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; + power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + + mat.power_sa.searchOp.dynamic + + mat.power_bitline.searchOp.dynamic + + mat.power_subarray_out_drv.searchOp.dynamic + + mat.ml_to_ram_wl_drv->power.readOp.dynamic; - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; - power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - power.readOp.leakage += htree_in_search->power.readOp.leakage; - power.readOp.leakage += htree_out_search->power.readOp.leakage; + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.leakage += htree_in_search->power.readOp.leakage; + power.readOp.leakage += htree_out_search->power.readOp.leakage; - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; - } + } } diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h index 153609ab0..49151f050 100755 --- a/ext/mcpat/cacti/bank.h +++ b/ext/mcpat/cacti/bank.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -39,9 +40,8 @@ #include "htree2.h" #include "mat.h" -class Bank : public Component -{ - public: +class Bank : public Component { +public: Bank(const DynamicParameter & dyn_p); ~Bank(); double compute_delays(double inrisetime); // return outrisetime diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc index 6efd5dd27..00ea3ce9d 100644 --- a/ext/mcpat/cacti/basic_circuit.cc +++ b/ext/mcpat/cacti/basic_circuit.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,59 +40,48 @@ #include "basic_circuit.h" #include "parameter.h" -uint32_t _log2(uint64_t num) -{ - uint32_t log2 = 0; +uint32_t _log2(uint64_t num) { + uint32_t log2 = 0; - if (num == 0) - { - std::cerr << "log0?" << std::endl; - exit(1); - } + if (num == 0) { + std::cerr << "log0?" << std::endl; + exit(1); + } - while (num > 1) - { - num = (num >> 1); - log2++; - } + while (num > 1) { + num = (num >> 1); + log2++; + } - return log2; + return log2; } -bool is_pow2(int64_t val) -{ - if (val <= 0) - { - return false; - } - else if (val == 1) - { - return true; - } - else - { - return (_log2(val) != _log2(val-1)); - } +bool is_pow2(int64_t val) { + if (val <= 0) { + return false; + } else if (val == 1) { + return true; + } else { + return (_log2(val) != _log2(val - 1)); + } } -int powers (int base, int n) -{ - int i, p; +int powers (int base, int n) { + int i, p; - p = 1; - for (i = 1; i <= n; ++i) - p *= base; - return p; + p = 1; + for (i = 1; i <= n; ++i) + p *= base; + return p; } /*----------------------------------------------------------------------*/ -double logtwo (double x) -{ - assert(x > 0); - return ((double) (log (x) / log (2.0))); +double logtwo (double x) { + assert(x > 0); + return ((double) (log (x) / log (2.0))); } /*----------------------------------------------------------------------*/ @@ -102,28 +92,20 @@ double gate_C( double wirelength, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; - if (_is_dram && _is_cell) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if (_is_dram && _is_wl_tr) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if (!_is_dram && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } + if (_is_dram && _is_cell) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if (_is_dram && _is_wl_tr) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if (!_is_dram && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; } @@ -134,29 +116,21 @@ double gate_C_pass( double wirelength, // poly wire length going to gate in lambda bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - // v5.0 - const TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + // v5.0 + const TechnologyParameter::DeviceType * dt; - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; } @@ -169,83 +143,67 @@ double drain_C_( double fold_dimension, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - double w_folded_tr; - const TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + double w_folded_tr; + const TechnologyParameter::DeviceType * dt; - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; // DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; // DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - double c_junc_area = dt->C_junc; - double c_junc_sidewall = dt->C_junc_sidewall; - double c_fringe = 2*dt->C_fringe; - double c_overlap = 2*dt->C_overlap; - double drain_C_metal_connecting_folded_tr = 0; - - // determine the width of the transistor after folding (if it is getting folded) - if (next_arg_thresh_folding_width_or_height_cell == 0) - { // interpret fold_dimension as the the folding width threshold - // i.e. the value of transistor width above which the transistor gets folded - w_folded_tr = fold_dimension; - } - else - { // interpret fold_dimension as the height of the cell that this transistor is part of. - double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; - // TODO : w_folded_tr must come from Component::compute_gate_area() - double ratio_p_to_n = 2.0 / (2.0 + 1.0); - if (nchannel) - { - w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; } - else - { - w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + + double c_junc_area = dt->C_junc; + double c_junc_sidewall = dt->C_junc_sidewall; + double c_fringe = 2 * dt->C_fringe; + double c_overlap = 2 * dt->C_overlap; + double drain_C_metal_connecting_folded_tr = 0; + + // determine the width of the transistor after folding (if it is getting folded) + if (next_arg_thresh_folding_width_or_height_cell == 0) { + // interpret fold_dimension as the the folding width threshold + // i.e. the value of transistor width above which the transistor gets folded + w_folded_tr = fold_dimension; + } else { // interpret fold_dimension as the height of the cell that this transistor is part of. + double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; + // TODO : w_folded_tr must come from Component::compute_gate_area() + double ratio_p_to_n = 2.0 / (2.0 + 1.0); + if (nchannel) { + w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } else { + w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } } - } - int num_folded_tr = (int) (ceil(width / w_folded_tr)); + int num_folded_tr = (int) (ceil(width / w_folded_tr)); - if (num_folded_tr < 2) - { - w_folded_tr = width; - } - - double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain - (stack - 1) * g_tp.spacing_poly_to_poly; - double drain_h_for_sidewall = w_folded_tr; - double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); - if (num_folded_tr > 1) - { - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + - (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); - - if (num_folded_tr%2 == 0) - { - drain_h_for_sidewall = 0; + if (num_folded_tr < 2) { + w_folded_tr = width; } - total_drain_height_for_cap_wrt_gate *= num_folded_tr; - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; - } - double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; - double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); - double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; + double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain + (stack - 1) * g_tp.spacing_poly_to_poly; + double drain_h_for_sidewall = w_folded_tr; + double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); + if (num_folded_tr > 1) { + total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); - return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); + if (num_folded_tr % 2 == 0) { + drain_h_for_sidewall = 0; + } + total_drain_height_for_cap_wrt_gate *= num_folded_tr; + drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; + } + + double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; + double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); + double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; + + return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); } @@ -255,29 +213,21 @@ double tr_R_on( int stack, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } - double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; - return (stack * restrans / width); + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (stack * restrans / width); } @@ -291,46 +241,34 @@ double R_to_w( int nchannel, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && (_is_cell)) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && (_is_cell)) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } - double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; - return (restrans / res); + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (restrans / res); } double pmos_to_nmos_sz_ratio( bool _is_dram, - bool _is_wl_tr) -{ - double p_to_n_sizing_ratio; - if ((_is_dram) && (_is_wl_tr)) - { //DRAM wordline transistor - p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; - } - else - { //DRAM or SRAM all other transistors - p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; - } - return p_to_n_sizing_ratio; + bool _is_wl_tr) { + double p_to_n_sizing_ratio; + if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; + } else { //DRAM or SRAM all other transistors + p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; + } + return p_to_n_sizing_ratio; } @@ -340,26 +278,23 @@ double horowitz( double tf, // time constant of gate double vs1, // threshold voltage double vs2, // threshold voltage - int rise) // whether input rises or fall -{ - if (inputramptime == 0 && vs1 == vs2) - { - return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); - } - double a, b, td; + int rise) { // whether input rises or fall + if (inputramptime == 0 && vs1 == vs2) { + return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); + } + double a, b, td; - a = inputramptime / tf; - if (rise == RISE) - { - b = 0.5; - td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2)); - } - else - { - b = 0.4; - td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2)); - } - return (td); + a = inputramptime / tf; + if (rise == RISE) { + b = 0.5; + td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) + + tf * (log(vs1) - log(vs2)); + } else { + b = 0.4; + td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) + + tf * (log(1.0 - vs1) - log(1.0 - vs2)); + } + return (td); } double cmos_Ileak( @@ -367,23 +302,17 @@ double cmos_Ileak( double pWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nWidth*dt->I_off_n + pWidth*dt->I_off_p; + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_off_n + pWidth*dt->I_off_p; } @@ -391,107 +320,81 @@ double simplified_nmos_leakage( double nwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nwidth * dt->I_off_n; + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nwidth * dt->I_off_n; } -int factorial(int n, int m) -{ - int fa = m, i; - for (i=m+1; i<=n; i++) - fa *=i; - return fa; +int factorial(int n, int m) { + int fa = m, i; + for (i = m + 1; i <= n; i++) + fa *= i; + return fa; } -int combination(int n, int m) -{ - int ret; - ret = factorial(n, m+1) / factorial(n - m); - return ret; +int combination(int n, int m) { + int ret; + ret = factorial(n, m + 1) / factorial(n - m); + return ret; } double simplified_pmos_leakage( double pwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return pwidth * dt->I_off_p; + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pwidth * dt->I_off_p; } double cmos_Ig_n( double nWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nWidth*dt->I_g_on_n; + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_g_on_n; } double cmos_Ig_p( double pWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return pWidth*dt->I_g_on_p; + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pWidth*dt->I_g_on_p; } double cmos_Isub_leakage( @@ -502,98 +405,93 @@ double cmos_Isub_leakage( bool _is_dram, bool _is_cell, bool _is_wl_tr, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr); - double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr); - double Isub=0; + enum Half_net_topology topo) { + assert (fanin >= 1); + double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr); + double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr); + double Isub = 0; int num_states; int num_off_tx; num_states = int(pow(2.0, fanin)); - switch (g_type) - { + switch (g_type) { case nmos: - if (fanin==1) - { - Isub = nmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; + if (fanin == 1) { + Isub = nmos_leak / num_states; + } else { + if (topo == parallel) { + //only when all tx are off, leakage power is non-zero. + //The possibility of this state is 1/num_states + Isub = nmos_leak * fanin / num_states; + } else { + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + //when num_off_tx ==0 there is no leakage power + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } + Isub /= num_states; + } } break; case pmos: - if (fanin==1) - { - Isub = pmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; + if (fanin == 1) { + Isub = pmos_leak / num_states; + } else { + if (topo == parallel) { + //only when all tx are off, leakage power is non-zero. + //The possibility of this state is 1/num_states + Isub = pmos_leak * fanin / num_states; + } else { + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + //when num_off_tx ==0 there is no leakage power + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } + Isub /= num_states; + } } break; case inv: - Isub = (nmos_leak + pmos_leak)/2; + Isub = (nmos_leak + pmos_leak) / 2; break; case nand: - Isub += fanin*pmos_leak;//the pullup network - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + Isub += fanin * pmos_leak;//the pullup network + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + // the pulldown network + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } - Isub /=num_states; + Isub /= num_states; break; case nor: - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + // the pullup network + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } - Isub += fanin*nmos_leak;//the pulldown network - Isub /=num_states; + Isub += fanin * nmos_leak;//the pulldown network + Isub /= num_states; break; case tri: - Isub += (nmos_leak + pmos_leak)/2;//enabled - Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power - Isub /=2; + Isub += (nmos_leak + pmos_leak) / 2;//enabled + //disabled upper bound of leakage power + Isub += nmos_leak * UNI_LEAK_STACK_FACTOR; + Isub /= 2; break; case tg: - Isub = (nmos_leak + pmos_leak)/2; + Isub = (nmos_leak + pmos_leak) / 2; break; default: assert(0); break; - } + } return Isub; } @@ -607,120 +505,116 @@ double cmos_Ig_leakage( bool _is_dram, bool _is_cell, bool _is_wl_tr, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr); - double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr); - double Ig_on=0; - int num_states; - int num_on_tx; + enum Half_net_topology topo) { + assert (fanin >= 1); + double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr); + double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr); + double Ig_on = 0; + int num_states; + int num_on_tx; - num_states = int(pow(2.0, fanin)); + num_states = int(pow(2.0, fanin)); - switch (g_type) - { - case nmos: - if (fanin==1) - { - Ig_on = nmos_leak/num_states; + switch (g_type) { + case nmos: + if (fanin == 1) { + Ig_on = nmos_leak / num_states; + } else { + if (topo == parallel) { + for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) { + Ig_on += nmos_leak * combination(fanin, num_on_tx) * + num_on_tx; } - else - { - if (topo==parallel) - { - for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++) - { - Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx; - } - } - else - { - Ig_on += nmos_leak * fanin;//pull down network when all TXs are on. - //num_on_tx is the number of on tx - for (num_on_tx=1; num_on_txNspd < m2->Nspd) return true; - else if (m1->Nspd > m2->Nspd) return false; - else if (m1->Ndwl < m2->Ndwl) return true; - else if (m1->Ndwl > m2->Ndwl) return false; - else if (m1->Ndbl < m2->Ndbl) return true; - else if (m1->Ndbl > m2->Ndbl) return false; - else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; - else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; - else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; - else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; - else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; - else return false; +bool mem_array::lt(const mem_array * m1, const mem_array * m2) { + if (m1->Nspd < m2->Nspd) return true; + else if (m1->Nspd > m2->Nspd) return false; + else if (m1->Ndwl < m2->Ndwl) return true; + else if (m1->Ndwl > m2->Ndwl) return false; + else if (m1->Ndbl < m2->Ndbl) return true; + else if (m1->Ndbl > m2->Ndbl) return false; + else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; + else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; + else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; + else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; + else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; + else return false; } -void uca_org_t::find_delay() -{ - mem_array * data_arr = data_array2; - mem_array * tag_arr = tag_array2; +void uca_org_t::find_delay() { + mem_array * data_arr = data_array2; + mem_array * tag_arr = tag_array2; - // check whether it is a regular cache or scratch ram - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { - access_time = data_arr->access_time; - } - // Both tag and data lookup happen in parallel - // and the entire set is sent over the data array h-tree without - // waiting for the way-select signal --TODO add the corresponding - // power overhead Nav - else if (g_ip->fast_access == true) - { - access_time = MAX(tag_arr->access_time, data_arr->access_time); - } - // Tag is accessed first. On a hit, way-select signal along with the - // address is sent to read/write the appropriate block in the data - // array - else if (g_ip->is_seq_acc == true) - { - access_time = tag_arr->access_time + data_arr->access_time; - } - // Normal access: tag array access and data array access happen in parallel. - // But, the data array will wait for the way-select and transfer only the - // appropriate block over the h-tree. - else - { - access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, - data_arr->delay_before_subarray_output_driver) + - data_arr->delay_from_subarray_output_driver_to_output; - } -} - - - -void uca_org_t::find_energy() -{ - if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache) - power = data_array2->power + tag_array2->power; - else - power = data_array2->power; -} - - - -void uca_org_t::find_area() -{ - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false) - { - cache_ht = data_array2->height; - cache_len = data_array2->width; - } - else - { - cache_ht = MAX(tag_array2->height, data_array2->height); - cache_len = tag_array2->width + data_array2->width; - } - area = cache_ht * cache_len; -} - -void uca_org_t::adjust_area() -{ - double area_adjust; - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { - if (data_array2->area_efficiency/100.0<0.2) - { - //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); - area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0)); - cache_ht = cache_ht/area_adjust; - cache_len = cache_len/area_adjust; + // check whether it is a regular cache or scratch ram + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + access_time = data_arr->access_time; + } + // Both tag and data lookup happen in parallel + // and the entire set is sent over the data array h-tree without + // waiting for the way-select signal --TODO add the corresponding + // power overhead Nav + else if (g_ip->fast_access == true) { + access_time = MAX(tag_arr->access_time, data_arr->access_time); + } + // Tag is accessed first. On a hit, way-select signal along with the + // address is sent to read/write the appropriate block in the data + // array + else if (g_ip->is_seq_acc == true) { + access_time = tag_arr->access_time + data_arr->access_time; + } + // Normal access: tag array access and data array access happen in parallel. + // But, the data array will wait for the way-select and transfer only the + // appropriate block over the h-tree. + else { + access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, + data_arr->delay_before_subarray_output_driver) + + data_arr->delay_from_subarray_output_driver_to_output; } - } - area = cache_ht * cache_len; } -void uca_org_t::find_cyc() -{ - if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false) - { - cycle_time = data_array2->cycle_time; - } - else - { - cycle_time = MAX(tag_array2->cycle_time, - data_array2->cycle_time); - } + + +void uca_org_t::find_energy() { + if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) + power = data_array2->power + tag_array2->power; + else + power = data_array2->power; +} + + + +void uca_org_t::find_area() { + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + cache_ht = data_array2->height; + cache_len = data_array2->width; + } else { + cache_ht = MAX(tag_array2->height, data_array2->height); + cache_len = tag_array2->width + data_array2->width; + } + area = cache_ht * cache_len; +} + +void uca_org_t::adjust_area() { + double area_adjust; + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + if (data_array2->area_efficiency / 100.0 < 0.2) { + //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); + area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0)); + cache_ht = cache_ht / area_adjust; + cache_len = cache_len / area_adjust; + } + } + area = cache_ht * cache_len; +} + +void uca_org_t::find_cyc() { + if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { + cycle_time = data_array2->cycle_time; + } else { + cycle_time = MAX(tag_array2->cycle_time, + data_array2->cycle_time); + } } uca_org_t :: uca_org_t() -:tag_array2(0), - data_array2(0) -{ + : tag_array2(0), + data_array2(0) { } -void uca_org_t :: cleanup() -{ - if (data_array2!=0) - delete data_array2; - if (tag_array2!=0) - delete tag_array2; +void uca_org_t :: cleanup() { + if (data_array2 != 0) + delete data_array2; + if (tag_array2 != 0) + delete tag_array2; } diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h index f37596554..a2bddd819 100644 --- a/ext/mcpat/cacti/cacti_interface.h +++ b/ext/mcpat/cacti/cacti_interface.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -50,9 +51,8 @@ class mem_array; class uca_org_t; -class powerComponents -{ - public: +class powerComponents { +public: double dynamic; double leakage; double gate_leakage; @@ -60,17 +60,24 @@ class powerComponents double longer_channel_leakage; powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { } - powerComponents(const powerComponents & obj) { *this = obj; } - powerComponents & operator=(const powerComponents & rhs) - { - dynamic = rhs.dynamic; - leakage = rhs.leakage; - gate_leakage = rhs.gate_leakage; - short_circuit = rhs.short_circuit; - longer_channel_leakage = rhs.longer_channel_leakage; - return *this; + powerComponents(const powerComponents & obj) { + *this = obj; + } + powerComponents & operator=(const powerComponents & rhs) { + dynamic = rhs.dynamic; + leakage = rhs.leakage; + gate_leakage = rhs.gate_leakage; + short_circuit = rhs.short_circuit; + longer_channel_leakage = rhs.longer_channel_leakage; + return *this; + } + void reset() { + dynamic = 0; + leakage = 0; + gate_leakage = 0; + short_circuit = 0; + longer_channel_leakage = 0; } - void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;} friend powerComponents operator+(const powerComponents & x, const powerComponents & y); friend powerComponents operator*(const powerComponents & x, double const * const y); @@ -78,22 +85,24 @@ class powerComponents -class powerDef -{ - public: +class powerDef { +public: powerComponents readOp; powerComponents writeOp; powerComponents searchOp;//Sheng: for CAM and FA powerDef() : readOp(), writeOp(), searchOp() { } - void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();} + void reset() { + readOp.reset(); + writeOp.reset(); + searchOp.reset(); + } friend powerDef operator+(const powerDef & x, const powerDef & y); friend powerDef operator*(const powerDef & x, double const * const y); }; -enum Wire_type -{ +enum Wire_type { Global /* gloabl wires with repeaters */, Global_5 /* 5% delay penalty */, Global_10 /* 10% delay penalty */, @@ -108,12 +117,12 @@ enum Wire_type -class InputParameter -{ - public: +class InputParameter { +public: void parse_cfg(const string & infile); - bool error_checking(); // return false if the input parameters are problematic + // return false if the input parameters are problematic + bool error_checking(string name = "CACTI"); void display_ip(); unsigned int cache_sz; // in bytes @@ -172,14 +181,14 @@ class InputParameter int force_nuca_bank; int delay_wt, dynamic_power_wt, leakage_power_wt, - cycle_time_wt, area_wt; + cycle_time_wt, area_wt; int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, - cycle_time_wt_nuca, area_wt_nuca; + cycle_time_wt_nuca, area_wt_nuca; int delay_dev, dynamic_power_dev, leakage_power_dev, - cycle_time_dev, area_dev; + cycle_time_dev, area_dev; int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, - cycle_time_dev_nuca, area_dev_nuca; + cycle_time_dev_nuca, area_dev_nuca; int ed; //ED or ED2 optimization int nuca; @@ -194,167 +203,113 @@ class InputParameter bool add_ecc_b_; - //parameters for design constraint - double throughput; - double latency; - bool pipelinable; - int pipeline_stages; - int per_stage_vector; - bool with_clock_grid; + //parameters for design constraint + double throughput; + double latency; + bool pipelinable; + int pipeline_stages; + int per_stage_vector; + bool with_clock_grid; }; -typedef struct{ - int Ndwl; - int Ndbl; - double Nspd; - int deg_bl_muxing; - int Ndsam_lev_1; - int Ndsam_lev_2; - int number_activated_mats_horizontal_direction; - int number_subbanks; - int page_size_in_bits; - double delay_route_to_bank; - double delay_crossbar; - double delay_addr_din_horizontal_htree; - double delay_addr_din_vertical_htree; - double delay_row_predecode_driver_and_block; - double delay_row_decoder; - double delay_bitlines; - double delay_sense_amp; - double delay_subarray_output_driver; - double delay_bit_mux_predecode_driver_and_block; - double delay_bit_mux_decoder; - double delay_senseamp_mux_lev_1_predecode_driver_and_block; - double delay_senseamp_mux_lev_1_decoder; - double delay_senseamp_mux_lev_2_predecode_driver_and_block; - double delay_senseamp_mux_lev_2_decoder; - double delay_input_htree; - double delay_output_htree; - double delay_dout_vertical_htree; - double delay_dout_horizontal_htree; - double delay_comparator; - double access_time; - double cycle_time; - double multisubbank_interleave_cycle_time; - double delay_request_network; - double delay_inside_mat; - double delay_reply_network; - double trcd; - double cas_latency; - double precharge_delay; - powerDef power_routing_to_bank; - powerDef power_addr_input_htree; - powerDef power_data_input_htree; - powerDef power_data_output_htree; - powerDef power_addr_horizontal_htree; - powerDef power_datain_horizontal_htree; - powerDef power_dataout_horizontal_htree; - powerDef power_addr_vertical_htree; - powerDef power_datain_vertical_htree; - powerDef power_row_predecoder_drivers; - powerDef power_row_predecoder_blocks; - powerDef power_row_decoders; - powerDef power_bit_mux_predecoder_drivers; - powerDef power_bit_mux_predecoder_blocks; - powerDef power_bit_mux_decoders; - powerDef power_senseamp_mux_lev_1_predecoder_drivers; - powerDef power_senseamp_mux_lev_1_predecoder_blocks; - powerDef power_senseamp_mux_lev_1_decoders; - powerDef power_senseamp_mux_lev_2_predecoder_drivers; - powerDef power_senseamp_mux_lev_2_predecoder_blocks; - powerDef power_senseamp_mux_lev_2_decoders; - powerDef power_bitlines; - powerDef power_sense_amps; - powerDef power_prechg_eq_drivers; - powerDef power_output_drivers_at_subarray; - powerDef power_dataout_vertical_htree; - powerDef power_comparators; - powerDef power_crossbar; - powerDef total_power; - double area; - double all_banks_height; - double all_banks_width; - double bank_height; - double bank_width; - double subarray_memory_cell_area_height; - double subarray_memory_cell_area_width; - double mat_height; - double mat_width; - double routing_area_height_within_bank; - double routing_area_width_within_bank; - double area_efficiency; -// double perc_power_dyn_routing_to_bank; -// double perc_power_dyn_addr_horizontal_htree; -// double perc_power_dyn_datain_horizontal_htree; -// double perc_power_dyn_dataout_horizontal_htree; -// double perc_power_dyn_addr_vertical_htree; -// double perc_power_dyn_datain_vertical_htree; -// double perc_power_dyn_row_predecoder_drivers; -// double perc_power_dyn_row_predecoder_blocks; -// double perc_power_dyn_row_decoders; -// double perc_power_dyn_bit_mux_predecoder_drivers; -// double perc_power_dyn_bit_mux_predecoder_blocks; -// double perc_power_dyn_bit_mux_decoders; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_1_decoders; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_2_decoders; -// double perc_power_dyn_bitlines; -// double perc_power_dyn_sense_amps; -// double perc_power_dyn_prechg_eq_drivers; -// double perc_power_dyn_subarray_output_drivers; -// double perc_power_dyn_dataout_vertical_htree; -// double perc_power_dyn_comparators; -// double perc_power_dyn_crossbar; -// double perc_power_dyn_spent_outside_mats; -// double perc_power_leak_routing_to_bank; -// double perc_power_leak_addr_horizontal_htree; -// double perc_power_leak_datain_horizontal_htree; -// double perc_power_leak_dataout_horizontal_htree; -// double perc_power_leak_addr_vertical_htree; -// double perc_power_leak_datain_vertical_htree; -// double perc_power_leak_row_predecoder_drivers; -// double perc_power_leak_row_predecoder_blocks; -// double perc_power_leak_row_decoders; -// double perc_power_leak_bit_mux_predecoder_drivers; -// double perc_power_leak_bit_mux_predecoder_blocks; -// double perc_power_leak_bit_mux_decoders; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_1_decoders; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_2_decoders; -// double perc_power_leak_bitlines; -// double perc_power_leak_sense_amps; -// double perc_power_leak_prechg_eq_drivers; -// double perc_power_leak_subarray_output_drivers; -// double perc_power_leak_dataout_vertical_htree; -// double perc_power_leak_comparators; -// double perc_power_leak_crossbar; -// double perc_leak_mats; -// double perc_active_mats; - double refresh_power; - double dram_refresh_period; - double dram_array_availability; - double dyn_read_energy_from_closed_page; - double dyn_read_energy_from_open_page; - double leak_power_subbank_closed_page; - double leak_power_subbank_open_page; - double leak_power_request_and_reply_networks; - double activate_energy; - double read_energy; - double write_energy; - double precharge_energy; +typedef struct { + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + int number_activated_mats_horizontal_direction; + int number_subbanks; + int page_size_in_bits; + double delay_route_to_bank; + double delay_crossbar; + double delay_addr_din_horizontal_htree; + double delay_addr_din_vertical_htree; + double delay_row_predecode_driver_and_block; + double delay_row_decoder; + double delay_bitlines; + double delay_sense_amp; + double delay_subarray_output_driver; + double delay_bit_mux_predecode_driver_and_block; + double delay_bit_mux_decoder; + double delay_senseamp_mux_lev_1_predecode_driver_and_block; + double delay_senseamp_mux_lev_1_decoder; + double delay_senseamp_mux_lev_2_predecode_driver_and_block; + double delay_senseamp_mux_lev_2_decoder; + double delay_input_htree; + double delay_output_htree; + double delay_dout_vertical_htree; + double delay_dout_horizontal_htree; + double delay_comparator; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double delay_request_network; + double delay_inside_mat; + double delay_reply_network; + double trcd; + double cas_latency; + double precharge_delay; + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_addr_horizontal_htree; + powerDef power_datain_horizontal_htree; + powerDef power_dataout_horizontal_htree; + powerDef power_addr_vertical_htree; + powerDef power_datain_vertical_htree; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; + powerDef power_crossbar; + powerDef total_power; + double area; + double all_banks_height; + double all_banks_width; + double bank_height; + double bank_width; + double subarray_memory_cell_area_height; + double subarray_memory_cell_area_width; + double mat_height; + double mat_width; + double routing_area_height_within_bank; + double routing_area_width_within_bank; + double area_efficiency; + double refresh_power; + double dram_refresh_period; + double dram_array_availability; + double dyn_read_energy_from_closed_page; + double dyn_read_energy_from_open_page; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; } results_mem_array; -class uca_org_t -{ - public: +class uca_org_t { +public: mem_array * tag_array2; mem_array * data_array2; double access_time; @@ -378,7 +333,7 @@ class uca_org_t void find_cyc(); void adjust_area();//for McPAT only to adjust routing overhead void cleanup(); - ~uca_org_t(){}; + ~uca_org_t() {}; }; void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); @@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name); //McPAT's plain interface, please keep !!! uca_org_t cacti_interface(InputParameter * const local_interface); //McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter * const local_interface); +uca_org_t init_interface(InputParameter * const local_interface, + const string &name); //McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports, - int excl_write_ports, - int single_ended_read_ports, - int search_ports, - int banks, - double tech_node, - int output_width, - int specific_tag, - int tag_width, - int access_mode, - int cache, - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, - int obj_func_leakage_power, - int obj_func_cycle_time, - int obj_func_area, - int dev_func_delay, - int dev_func_dynamic_power, - int dev_func_leakage_power, - int dev_func_area, - int dev_func_cycle_time, - int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate - int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing - int data_arr_ram_cell_tech_flavor_in, - int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, - int interconnect_projection_type_in, - int wire_inside_mat_type_in, - int wire_outside_mat_type_in, - int REPEATERS_IN_HTREE_SEGMENTS_in, - int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, - int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, - int PAGE_SIZE_BITS_in, - int BURST_LENGTH_in, - int INTERNAL_PREFETCH_WIDTH_in, - int force_wiretype, - int wiretype, - int force_config, - int ndwl, - int ndbl, - int nspd, - int ndcm, - int ndsam1, - int ndsam2, - int ecc); -// int cache_size, -// int line_size, -// int associativity, -// int rw_ports, -// int excl_read_ports, -// int excl_write_ports, -// int single_ended_read_ports, -// int banks, -// double tech_node, -// int output_width, -// int specific_tag, -// int tag_width, -// int access_mode, -// int cache, -// int main_mem, -// int obj_func_delay, -// int obj_func_dynamic_power, -// int obj_func_leakage_power, -// int obj_func_area, -// int obj_func_cycle_time, -// int dev_func_delay, -// int dev_func_dynamic_power, -// int dev_func_leakage_power, -// int dev_func_area, -// int dev_func_cycle_time, -// int temp, -// int data_arr_ram_cell_tech_flavor_in, -// int data_arr_peri_global_tech_flavor_in, -// int tag_arr_ram_cell_tech_flavor_in, -// int tag_arr_peri_global_tech_flavor_in, -// int interconnect_projection_type_in, -// int wire_inside_mat_type_in, -// int wire_outside_mat_type_in, -// int REPEATERS_IN_HTREE_SEGMENTS_in, -// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, -// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, -//// double MAXAREACONSTRAINT_PERC_in, -//// double MAXACCTIMECONSTRAINT_PERC_in, -//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in, -// int PAGE_SIZE_BITS_in, -// int BURST_LENGTH_in, -// int INTERNAL_PREFETCH_WIDTH_in); + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node, + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in, + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config, + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1, + int ndsam2, + int ecc); //Naveen's interface uca_org_t cacti_interface( @@ -542,91 +456,90 @@ uca_org_t cacti_interface( int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported int p_input); -class mem_array -{ - public: - int Ndcm; - int Ndwl; - int Ndbl; - double Nspd; - int deg_bl_muxing; - int Ndsam_lev_1; - int Ndsam_lev_2; - double access_time; - double cycle_time; - double multisubbank_interleave_cycle_time; - double area_ram_cells; - double area; - powerDef power; - double delay_senseamp_mux_decoder; - double delay_before_subarray_output_driver; - double delay_from_subarray_output_driver_to_output; - double height; - double width; +class mem_array { +public: + int Ndcm; + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double area_ram_cells; + double area; + powerDef power; + double delay_senseamp_mux_decoder; + double delay_before_subarray_output_driver; + double delay_from_subarray_output_driver_to_output; + double height; + double width; - double mat_height; - double mat_length; - double subarray_length; - double subarray_height; + double mat_height; + double mat_length; + double subarray_length; + double subarray_height; - double delay_route_to_bank, - delay_input_htree, - delay_row_predecode_driver_and_block, - delay_row_decoder, - delay_bitlines, - delay_sense_amp, - delay_subarray_output_driver, - delay_dout_htree, - delay_comparator, - delay_matchlines; + double delay_route_to_bank, + delay_input_htree, + delay_row_predecode_driver_and_block, + delay_row_decoder, + delay_bitlines, + delay_sense_amp, + delay_subarray_output_driver, + delay_dout_htree, + delay_comparator, + delay_matchlines; - double all_banks_height, - all_banks_width, - area_efficiency; + double all_banks_height, + all_banks_width, + area_efficiency; - powerDef power_routing_to_bank; - powerDef power_addr_input_htree; - powerDef power_data_input_htree; - powerDef power_data_output_htree; - powerDef power_htree_in_search; - powerDef power_htree_out_search; - powerDef power_row_predecoder_drivers; - powerDef power_row_predecoder_blocks; - powerDef power_row_decoders; - powerDef power_bit_mux_predecoder_drivers; - powerDef power_bit_mux_predecoder_blocks; - powerDef power_bit_mux_decoders; - powerDef power_senseamp_mux_lev_1_predecoder_drivers; - powerDef power_senseamp_mux_lev_1_predecoder_blocks; - powerDef power_senseamp_mux_lev_1_decoders; - powerDef power_senseamp_mux_lev_2_predecoder_drivers; - powerDef power_senseamp_mux_lev_2_predecoder_blocks; - powerDef power_senseamp_mux_lev_2_decoders; - powerDef power_bitlines; - powerDef power_sense_amps; - powerDef power_prechg_eq_drivers; - powerDef power_output_drivers_at_subarray; - powerDef power_dataout_vertical_htree; - powerDef power_comparators; + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_htree_in_search; + powerDef power_htree_out_search; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; - powerDef power_cam_bitline_precharge_eq_drv; - powerDef power_searchline; - powerDef power_searchline_precharge; - powerDef power_matchlines; - powerDef power_matchline_precharge; - powerDef power_matchline_to_wordline_drv; + powerDef power_cam_bitline_precharge_eq_drv; + powerDef power_searchline; + powerDef power_searchline_precharge; + powerDef power_matchlines; + powerDef power_matchline_precharge; + powerDef power_matchline_to_wordline_drv; - min_values_t *arr_min; - enum Wire_type wt; + min_values_t *arr_min; + enum Wire_type wt; - // dram stats - double activate_energy, read_energy, write_energy, precharge_energy, - refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, - leak_power_request_and_reply_networks; + // dram stats + double activate_energy, read_energy, write_energy, precharge_energy, + refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, + leak_power_request_and_reply_networks; - double precharge_delay; + double precharge_delay; - static bool lt(const mem_array * m1, const mem_array * m2); + static bool lt(const mem_array * m1, const mem_array * m2); }; diff --git a/ext/mcpat/cacti/component.cc b/ext/mcpat/cacti/component.cc index 733108407..90e9baedf 100644 --- a/ext/mcpat/cacti/component.cc +++ b/ext/mcpat/cacti/component.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -45,34 +46,30 @@ using namespace std; Component::Component() - :area(), power(), rt_power(),delay(0) -{ + : area(), power(), rt_power(), delay(0) { } -Component::~Component() -{ +Component::~Component() { } -double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) -{ - double w_poly = g_ip->F_sz_um; - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain - num_stacked_in * w_poly + - (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; +double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) { + double w_poly = g_ip->F_sz_um; + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain + num_stacked_in * w_poly + + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; - if (num_folded_tr > 1) - { - total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + - (num_folded_tr - 1) * num_stacked_in * w_poly + - (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; - } + if (num_folded_tr > 1) { + total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + + (num_folded_tr - 1) * num_stacked_in * w_poly + + (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; + } - return total_diff_w; + return total_diff_w; } @@ -82,105 +79,96 @@ double Component::compute_gate_area( int num_inputs, double w_pmos, double w_nmos, - double h_gate) -{ - if (w_pmos <= 0.0 || w_nmos <= 0.0) - { - return 0.0; - } + double h_gate) { + if (w_pmos <= 0.0 || w_nmos <= 0.0) { + return 0.0; + } - double w_folded_pmos, w_folded_nmos; - int num_folded_pmos, num_folded_nmos; - double total_ndiff_w, total_pdiff_w; - Area gate; + double w_folded_pmos, w_folded_nmos; + int num_folded_pmos, num_folded_nmos; + double total_ndiff_w, total_pdiff_w; + Area gate; - double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; - double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); + double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; + double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); - if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) - { - return 0.0; - } + if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) { + return 0.0; + } - w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; - w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); - assert(w_folded_pmos > 0); + w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; + w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); + assert(w_folded_pmos > 0); - num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); - num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); + num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); + num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); - switch (gate_type) - { + switch (gate_type) { case INV: - total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); - total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); + break; case NOR: - total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos); - total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos); + total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos); + break; case NAND: - total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos); - total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos); + break; default: - cout << "Unknown gate type: " << gate_type << endl; - exit(1); - } + cout << "Unknown gate type: " << gate_type << endl; + exit(1); + } - gate.w = MAX(total_ndiff_w, total_pdiff_w); + gate.w = MAX(total_ndiff_w, total_pdiff_w); - if (w_folded_nmos > w_nmos) - { - //means that the height of the gate can - //be made smaller than the input height specified, so calculate the height of the gate. - gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; - } - else - { - gate.h = h_gate; - } - return gate.get_area(); + if (w_folded_nmos > w_nmos) { + //means that the height of the gate can + //be made smaller than the input height specified, so calculate the height of the gate. + gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; + } else { + gate.h = h_gate; + } + return gate.get_area(); } double Component::compute_tr_width_after_folding( double input_width, - double threshold_folding_width) -{//This is actually the width of the cell not the width of a device. -//The width of a cell and the width of a device is orthogonal. - if (input_width <= 0) - { - return 0; - } + double threshold_folding_width) { + //This is actually the width of the cell not the width of a device. + //The width of a cell and the width of a device is orthogonal. + if (input_width <= 0) { + return 0; + } - int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double width_poly = g_ip->F_sz_um; - double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; + int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double width_poly = g_ip->F_sz_um; + double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; - return total_diff_width; + return total_diff_width; } -double Component::height_sense_amplifier(double pitch_sense_amp) -{ - // compute the height occupied by all PMOS transistors - double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; +double Component::height_sense_amplifier(double pitch_sense_amp) { + // compute the height occupied by all PMOS transistors + double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; - // compute the height occupied by all NMOS transistors - double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + // compute the height occupied by all NMOS transistors + double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; - // compute total height by considering gap between the p and n diffusion areas - return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; + // compute total height by considering gap between the p and n diffusion areas + return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; } @@ -195,42 +183,39 @@ int Component::logical_effort( double p_to_n_sz_ratio, bool is_dram_, bool is_wl_tr_, - double max_w_nmos) -{ - int num_gates = (int) (log(F) / log(fopt)); + double max_w_nmos) { + int num_gates = (int) (log(F) / log(fopt)); - // check if num_gates is odd. if so, add 1 to make it even - num_gates+= (num_gates % 2) ? 1 : 0; - num_gates = MAX(num_gates, num_gates_min); - - // recalculate the effective fanout of each stage - double f = pow(F, 1.0 / num_gates); - int i = num_gates - 1; - double C_in = C_load / f; - w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); - w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); - w_p[i] = p_to_n_sz_ratio * w_n[i]; - - if (w_n[i] > max_w_nmos) - { - double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); - F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); - num_gates = (int) (log(F) / log(fopt)) + 1; - num_gates+= (num_gates % 2) ? 1 : 0; + // check if num_gates is odd. if so, add 1 to make it even + num_gates += (num_gates % 2) ? 1 : 0; num_gates = MAX(num_gates, num_gates_min); - f = pow(F, 1.0 / (num_gates - 1)); - i = num_gates - 1; - w_n[i] = max_w_nmos; + + // recalculate the effective fanout of each stage + double f = pow(F, 1.0 / num_gates); + int i = num_gates - 1; + double C_in = C_load / f; + w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); + w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); w_p[i] = p_to_n_sz_ratio * w_n[i]; - } - for (i = num_gates - 2; i >= 1; i--) - { - w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); - w_p[i] = p_to_n_sz_ratio * w_n[i]; - } + if (w_n[i] > max_w_nmos) { + double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); + F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); + num_gates = (int) (log(F) / log(fopt)) + 1; + num_gates += (num_gates % 2) ? 1 : 0; + num_gates = MAX(num_gates, num_gates_min); + f = pow(F, 1.0 / (num_gates - 1)); + i = num_gates - 1; + w_n[i] = max_w_nmos; + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } - assert(num_gates <= MAX_NUMBER_GATES_STAGE); - return num_gates; + for (i = num_gates - 2; i >= 1; i--) { + w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } + + assert(num_gates <= MAX_NUMBER_GATES_STAGE); + return num_gates; } diff --git a/ext/mcpat/cacti/component.h b/ext/mcpat/cacti/component.h index 75e2cb075..416e4e8e5 100644 --- a/ext/mcpat/cacti/component.h +++ b/ext/mcpat/cacti/component.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,41 +43,32 @@ using namespace std; class Crossbar; class Bank; -class Component -{ - public: +class Component { +public: Component(); ~Component(); Area area; - powerDef power,rt_power; + // TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE + // VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS + // MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER + // CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS + powerDef power, rt_power; double delay; double cycle_time; - double compute_gate_area( - int gate_type, - int num_inputs, - double w_pmos, - double w_nmos, - double h_gate); - - double compute_tr_width_after_folding(double input_width, double threshold_folding_width); + double compute_gate_area(int gate_type, int num_inputs, double w_pmos, + double w_nmos, double h_gate); + double compute_tr_width_after_folding(double input_width, + double threshold_folding_width); double height_sense_amplifier(double pitch_sense_amp); - protected: - int logical_effort( - int num_gates_min, - double g, - double F, - double * w_n, - double * w_p, - double C_load, - double p_to_n_sz_ratio, - bool is_dram_, - bool is_wl_tr_, - double max_w_nmos); +protected: + int logical_effort(int num_gates_min, double g, double F, double * w_n, + double * w_p, double C_load, double p_to_n_sz_ratio, + bool is_dram_, bool is_wl_tr_, double max_w_nmos); - private: +private: double compute_diffusion_width(int num_stacked_in, int num_folded_tr); }; diff --git a/ext/mcpat/cacti/const.h b/ext/mcpat/cacti/const.h index aef7d019b..c9b3905bf 100644 --- a/ext/mcpat/cacti/const.h +++ b/ext/mcpat/cacti/const.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -249,21 +250,20 @@ const double bit_to_byte = 8.0; // v : vertical or velocity -enum ram_cell_tech_type_num -{ - itrs_hp = 0, - itrs_lstp = 1, - itrs_lop = 2, - lp_dram = 3, - comm_dram = 4 +enum ram_cell_tech_type_num { + itrs_hp = 0, + itrs_lstp = 1, + itrs_lop = 2, + lp_dram = 3, + comm_dram = 4 }; -const double pppm[4] = {1,1,1,1}; -const double pppm_lkg[4] = {0,1,1,0}; -const double pppm_dyn[4] = {1,0,0,0}; -const double pppm_Isub[4] = {0,1,0,0}; -const double pppm_Ig[4] = {0,0,1,0}; -const double pppm_sc[4] = {0,0,0,1}; +const double pppm[4] = {1, 1, 1, 1}; +const double pppm_lkg[4] = {0, 1, 1, 0}; +const double pppm_dyn[4] = {1, 0, 0, 0}; +const double pppm_Isub[4] = {0, 1, 0, 0}; +const double pppm_Ig[4] = {0, 0, 1, 0}; +const double pppm_sc[4] = {0, 0, 0, 1}; diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc index a3d8532d5..ef2a373d6 100644 --- a/ext/mcpat/cacti/crossbar.cc +++ b/ext/mcpat/cacti/crossbar.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,123 +40,140 @@ Crossbar::Crossbar( double n_out_, double flit_size_, TechnologyParameter::DeviceType *dt - ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - Vdd = dt->Vdd; - CB_ADJ = 1; +): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + Vdd = dt->Vdd; + CB_ADJ = 1; } -Crossbar::~Crossbar(){} +Crossbar::~Crossbar() {} -double Crossbar::output_buffer() -{ +double Crossbar::output_buffer() { - //Wire winit(4, 4); - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire w1(g_ip->wt, l_eff); - //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; - double s1 = w1.repeater_size * (l_eff n_to_p_eff_curr_drv_ratio; - // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor - TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - TriS2 = s1; //driver transistor + //Wire winit(4, 4); + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire w1(g_ip->wt, l_eff); + //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; + double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ? + l_eff * ADJ / w1.repeater_spacing : ADJ); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor + TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + TriS2 = s1; //driver transistor - if (TriS1 < 1) - TriS1 = 1; + if (TriS1 < 1) + TriS1 = 1; - double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + - gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0); + double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) + + gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0); // input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + // gate_C(TriS2*g_tp.min_w_nmos_, 0)+ // drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + // gate_C(TriS2*min_w_pmos, 0); - tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(TriS2*g_tp.min_w_nmos_, 0)+ - drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(TriS2*min_w_pmos, 0); - double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); - double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0); + tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(TriS2 * g_tp.min_w_nmos_, 0) + + drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(TriS2 * min_w_pmos, 0); + double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1, + g_tp.cell_h_def) + + drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); + double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0); - tri_inp_cap = input_cap; - tri_out_cap = output_cap; - tri_ctr_cap = ctr_cap; - return input_cap + output_cap + ctr_cap; + tri_inp_cap = input_cap; + tri_out_cap = output_cap; + tri_ctr_cap = ctr_cap; + return input_cap + output_cap + ctr_cap; } -void Crossbar::compute_power() -{ +void Crossbar::compute_power() { - Wire winit(4, 4); - double tri_cap = output_buffer(); - assert(tri_cap > 0); - //area of a tristate logic - double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); - g_area *= 2; // to model area of output transistors - g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); - g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); - double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); - // effective no. of tristate buffers that need to be laid side by side - int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); - double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); - Wire w1(g_ip->wt, wire_len); + Wire winit(4, 4); + double tri_cap = output_buffer(); + assert(tri_cap > 0); + //area of a tristate logic + double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, + TriS2 * min_w_pmos, g_tp.cell_h_def); + g_area *= 2; // to model area of output transistors + g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, + TriS1 * min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_, + TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); + double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); + // effective no. of tristate buffers that need to be laid side by side + int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); + double wire_len = MAX(width * ntri * n_out, + flit_size * g_tp.wire_outside_mat.pitch * n_out); + Wire w1(g_ip->wt, wire_len); - area.w = wire_len; - area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; - Wire w2(g_ip->wt, area.h); + area.w = wire_len; + area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ; + Wire w2(g_ip->wt, area.h); - double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); - if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb; + double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp); + if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb; - if (aspect_ratio_cb < ASPECT_THRESHOLD) { - if (n_out > 2 && n_inp > 2) { - CB_ADJ+=0.2; - //cout << "CB ADJ " << CB_ADJ << endl; - if (CB_ADJ < 4) { - this->compute_power(); - } + if (aspect_ratio_cb < ASPECT_THRESHOLD) { + if (n_out > 2 && n_inp > 2) { + CB_ADJ += 0.2; + //cout << "CB ADJ " << CB_ADJ << endl; + if (CB_ADJ < 4) { + this->compute_power(); + } + } } - } - power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; - power.readOp.leakage = n_inp * n_out * flit_size * ( - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.leakage + w2.power.readOp.leakage); - power.readOp.gate_leakage = n_inp * n_out * flit_size * ( - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); + power.readOp.dynamic = + (w1.power.readOp.dynamic + w2.power.readOp.dynamic + + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + + tri_int_cap) * Vdd * Vdd) * flit_size; + power.readOp.leakage = n_inp * n_out * flit_size * ( + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.gate_leakage = n_inp * n_out * flit_size * ( + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); - // delay calculation - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire wdriver(g_ip->wt, l_eff); - double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); - double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; - delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + // delay calculation + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire wdriver(g_ip->wt, l_eff); + double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) + + tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); + double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out * + tri_inp_cap + n_inp * tri_out_cap; + delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth / + deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE); - Wire wreset(); + Wire wreset(); } -void Crossbar::print_crossbar() -{ - cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; - cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Width : " << area.w << " u" << endl; - cout << "Height : " << area.h << " u" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; - cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; - cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; +void Crossbar::print_crossbar() { + cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Width : " << area.w << " u" << endl; + cout << "Height : " << area.h << " u" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * + MIN(n_inp, n_out) << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" + << endl; + cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 + << " (mW)" << endl; + cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; } diff --git a/ext/mcpat/cacti/crossbar.h b/ext/mcpat/cacti/crossbar.h index 3b926517c..b8de7547b 100644 --- a/ext/mcpat/cacti/crossbar.h +++ b/ext/mcpat/cacti/crossbar.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -44,14 +45,13 @@ #include "parameter.h" #include "wire.h" -class Crossbar : public Component -{ - public: +class Crossbar : public Component { +public: Crossbar( - double in, - double out, - double flit_sz, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); + double in, + double out, + double flit_sz, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Crossbar(); void print_crossbar(); @@ -62,18 +62,18 @@ class Crossbar : public Component double flit_size; double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; - private: - double CB_ADJ; - /* - * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar - * buffer is adjusted to get an aspect ratio of whole cross bar close to one; - * when adjust the ratio, the number of wires route over the tri-state buffers does not change, - * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase - * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch - * will increase. As a result, the height of the crossbar (area.h) will increase. - */ +private: + double CB_ADJ; + /* + * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar + * buffer is adjusted to get an aspect ratio of whole cross bar close to one; + * when adjust the ratio, the number of wires route over the tri-state buffers does not change, + * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase + * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch + * will increase. As a result, the height of the crossbar (area.h) will increase. + */ - TechnologyParameter::DeviceType *deviceType; + TechnologyParameter::DeviceType *deviceType; double TriS1, TriS2; double min_w_pmos, Vdd; diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc index 0de6f6157..7fa66b4ff 100644 --- a/ext/mcpat/cacti/decoder.cc +++ b/ext/mcpat/cacti/decoder.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -51,207 +52,184 @@ Decoder::Decoder( bool is_dram_, bool is_wl_tr_, const Area & cell_) -:exist(false), - C_ld_dec_out(_C_ld_dec_out), - R_wire_dec_out(_R_wire_dec_out), - num_gates(0), num_gates_min(2), - delay(0), - //power(), - fully_assoc(fully_assoc_), is_dram(is_dram_), - is_wl_tr(is_wl_tr_), cell(cell_) -{ + : exist(false), + C_ld_dec_out(_C_ld_dec_out), + R_wire_dec_out(_R_wire_dec_out), + num_gates(0), num_gates_min(2), + delay(0), + //power(), + fully_assoc(fully_assoc_), is_dram(is_dram_), + is_wl_tr(is_wl_tr_), cell(cell_) { - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - w_dec_n[i] = 0; - w_dec_p[i] = 0; - } - - /* - * _num_dec_signals is the number of decoded signal as output - * num_addr_bits_dec is the number of signal to be decoded - * as the decoders input. - */ - int num_addr_bits_dec = _log2(_num_dec_signals); - - if (num_addr_bits_dec < 4) - { - if (flag_way_select) - { - exist = true; - num_in_signals = 2; + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + w_dec_n[i] = 0; + w_dec_p[i] = 0; } - else - { - num_in_signals = 0; - } - } - else - { - exist = true; - if (flag_way_select) - { - num_in_signals = 3; - } - else - { - num_in_signals = 2; - } - } + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); - assert(cell.h>0); - assert(cell.w>0); - // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; - //area.h = 4 * cell.h; - area.h = g_tp.h_dec * cell.h; + if (num_addr_bits_dec < 4) { + if (flag_way_select) { + exist = true; + num_in_signals = 2; + } else { + num_in_signals = 0; + } + } else { + exist = true; - compute_widths(); - compute_area(); + if (flag_way_select) { + num_in_signals = 3; + } else { + num_in_signals = 2; + } + } + + assert(cell.h > 0); + assert(cell.w > 0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + //area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell.h; + + compute_widths(); + compute_area(); } -void Decoder::compute_widths() -{ - double F; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); +void Decoder::compute_widths() { + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - if (exist) - { - if (num_in_signals == 2 || fully_assoc) - { - w_dec_n[0] = 2 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2; - } - else - { - w_dec_n[0] = 3 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3; - } + if (exist) { + if (num_in_signals == 2 || fully_assoc) { + w_dec_n[0] = 2 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2; + } else { + w_dec_n[0] = 3 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3; + } - F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + - gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); - num_gates = logical_effort( - num_gates_min, - num_in_signals == 2 ? gnand2 : gnand3, - F, - w_dec_n, - w_dec_p, - C_ld_dec_out, - p_to_n_sz_ratio, - is_dram, - is_wl_tr, - g_tp.max_w_nmos_dec); - } + F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); + num_gates = logical_effort( + num_gates_min, + num_in_signals == 2 ? gnand2 : gnand3, + F, + w_dec_n, + w_dec_p, + C_ld_dec_out, + p_to_n_sz_ratio, + is_dram, + is_wl_tr, + g_tp.max_w_nmos_dec); + } } -void Decoder::compute_area() -{ - double cumulative_area = 0; - double cumulative_curr = 0; // cumulative leakage current - double cumulative_curr_Ig = 0; // cumulative leakage current +void Decoder::compute_area() { + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current - if (exist) - { // First check if this decoder exists - if (num_in_signals == 2) - { - cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - } - else if (num_in_signals == 3) - { - cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); - } + if (exist) { // First check if this decoder exists + if (num_in_signals == 2) { + cumulative_area = + compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + } else if (num_in_signals == 3) { + cumulative_area = + compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + } - for (int i = 1; i < num_gates; i++) - { - cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); - cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - } - power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; - power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; + for (int i = 1; i < num_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); + cumulative_curr += + cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + } + power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; - area.w = (cumulative_area / area.h); - } + area.w = (cumulative_area / area.h); + } } -double Decoder::compute_delays(double inrisetime) -{ - if (exist) - { - double ret_val = 0; // outrisetime - int i; - double rd, tf, this_delay, c_load, c_intrinsic, Vpp; - double Vdd = g_tp.peri_global.Vdd; +double Decoder::compute_delays(double inrisetime) { + if (exist) { + double ret_val = 0; // outrisetime + int i; + double rd, tf, this_delay, c_load, c_intrinsic, Vpp; + double Vdd = g_tp.peri_global.Vdd; - if ((is_wl_tr) && (is_dram)) - { - Vpp = g_tp.vpp; + if ((is_wl_tr) && (is_dram)) { + Vpp = g_tp.vpp; + } else if (is_wl_tr) { + Vpp = g_tp.sram_cell.Vdd; + } else { + Vpp = g_tp.peri_global.Vdd; + } + + // first check whether a decoder is required at all + rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + for (i = 1; i < num_gates - 1; ++i) { + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + } + + // add delay of final inverter that drives the wordline + i = num_gates - 1; + c_load = C_ld_dec_out; + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + ret_val = this_delay / (1.0 - 0.5); + power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; + + return ret_val; + } else { + return 0.0; } - else if (is_wl_tr) - { - Vpp = g_tp.sram_cell.Vdd; - } - else - { - Vpp = g_tp.peri_global.Vdd; - } - - // first check whether a decoder is required at all - rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + - drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - - for (i = 1; i < num_gates - 1; ++i) - { - rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + - drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - } - - // add delay of final inverter that drives the wordline - i = num_gates - 1; - c_load = C_ld_dec_out; - rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + - drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - ret_val = this_delay / (1.0 - 0.5); - power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; - - return ret_val; - } - else - { - return 0.0; - } } void Decoder::leakage_feedback(double temperature) @@ -291,610 +269,568 @@ PredecBlk::PredecBlk( int num_dec_per_predec, bool is_dram, bool is_blk1) - :dec(dec_), - exist(false), - number_input_addr_bits(0), - C_ld_predec_blk_out(0), - R_wire_predec_blk_out(0), - branch_effort_nand2_gate_output(1), - branch_effort_nand3_gate_output(1), - flag_two_unique_paths(false), - flag_L2_gate(0), - number_inputs_L1_gate(0), - number_gates_L1_nand2_path(0), - number_gates_L1_nand3_path(0), - number_gates_L2(0), - min_number_gates_L1(2), - min_number_gates_L2(2), - num_L1_active_nand2_path(0), - num_L1_active_nand3_path(0), - delay_nand2_path(0), - delay_nand3_path(0), - power_nand2_path(), - power_nand3_path(), - power_L2(), - is_dram_(is_dram) -{ - int branch_effort_predec_out; - double C_ld_dec_gate; - int num_addr_bits_dec = _log2(num_dec_signals); - int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; - int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + : dec(dec_), + exist(false), + number_input_addr_bits(0), + C_ld_predec_blk_out(0), + R_wire_predec_blk_out(0), + branch_effort_nand2_gate_output(1), + branch_effort_nand3_gate_output(1), + flag_two_unique_paths(false), + flag_L2_gate(0), + number_inputs_L1_gate(0), + number_gates_L1_nand2_path(0), + number_gates_L1_nand3_path(0), + number_gates_L2(0), + min_number_gates_L1(2), + min_number_gates_L2(2), + num_L1_active_nand2_path(0), + num_L1_active_nand3_path(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + power_L2(), + is_dram_(is_dram) { + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; - w_L1_nand2_n[0] = 0; - w_L1_nand2_p[0] = 0; - w_L1_nand3_n[0] = 0; - w_L1_nand3_p[0] = 0; + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; - if (is_blk1 == true) - { - if (num_addr_bits_dec <= 0) - { - return; + if (is_blk1 == true) { + if (num_addr_bits_dec <= 0) { + return; + } else if (num_addr_bits_dec < 4) { + // Just one predecoder block is required with NAND2 gates. No decoder required. + // The first level of predecoding directly drives the decoder output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } else { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } else { + if (num_addr_bits_dec >= 4) { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } } - else if (num_addr_bits_dec < 4) - { - // Just one predecoder block is required with NAND2 gates. No decoder required. - // The first level of predecoding directly drives the decoder output load - exist = true; - number_input_addr_bits = num_addr_bits_dec; - R_wire_predec_blk_out = dec->R_wire_dec_out; - C_ld_predec_blk_out = dec->C_ld_dec_out; - } - else - { - exist = true; - number_input_addr_bits = blk1_num_input_addr_bits; - branch_effort_predec_out = (1 << blk2_num_input_addr_bits); - C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); - R_wire_predec_blk_out = R_wire_predec_blk_out_; - C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; - } - } - else - { - if (num_addr_bits_dec >= 4) - { - exist = true; - number_input_addr_bits = blk2_num_input_addr_bits; - branch_effort_predec_out = (1 << blk1_num_input_addr_bits); - C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); - R_wire_predec_blk_out = R_wire_predec_blk_out_; - C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; - } - } - compute_widths(); - compute_area(); + compute_widths(); + compute_area(); } -void PredecBlk::compute_widths() -{ - double F, c_load_nand3_path, c_load_nand2_path; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); +void PredecBlk::compute_widths() { + double F, c_load_nand3_path, c_load_nand2_path; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - if (exist == false) return; + if (exist == false) return; - switch (number_input_addr_bits) - { + switch (number_input_addr_bits) { case 1: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 0; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; case 2: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 0; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; + break; case 3: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 0; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 0; + break; case 4: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 2; - branch_effort_nand2_gate_output = 4; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 4; + break; case 5: - flag_two_unique_paths = true; - flag_L2_gate = 2; - branch_effort_nand2_gate_output = 8; - branch_effort_nand3_gate_output = 4; - break; + flag_two_unique_paths = true; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 8; + branch_effort_nand3_gate_output = 4; + break; case 6: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 2; - branch_effort_nand3_gate_output = 8; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 2; + branch_effort_nand3_gate_output = 8; + break; case 7: - flag_two_unique_paths = true; - flag_L2_gate = 3; - branch_effort_nand2_gate_output = 32; - branch_effort_nand3_gate_output = 16; - break; + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 32; + branch_effort_nand3_gate_output = 16; + break; case 8: - flag_two_unique_paths = true; - flag_L2_gate = 3; - branch_effort_nand2_gate_output = 64; - branch_effort_nand3_gate_output = 32; - break; + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 64; + branch_effort_nand3_gate_output = 32; + break; case 9: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 3; - branch_effort_nand3_gate_output = 64; - break; + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 3; + branch_effort_nand3_gate_output = 64; + break; default: - assert(0); - break; - } - - // find the number of gates and sizing in second level of predecoder (if there is a second level) - if (flag_L2_gate) - { - if (flag_L2_gate == 2) - { // 2nd level is a NAND2 gate - w_L2_n[0] = 2 * g_tp.min_w_nmos_; - F = gnand2; - } - else - { // 2nd level is a NAND3 gate - w_L2_n[0] = 3 * g_tp.min_w_nmos_; - F = gnand3; - } - w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - number_gates_L2 = logical_effort( - min_number_gates_L2, - flag_L2_gate == 2 ? gnand2 : gnand3, - F, - w_L2_n, - w_L2_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - - // Now find the number of gates and widths in first level of predecoder - if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) - { // Whenever flag_two_unique_paths is true, it means first level of decoder employs - // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means - // a NAND2 gate is used in the first level of the predecoder - c_load_nand2_path = branch_effort_nand2_gate_output * - (gate_C(w_L2_n[0], 0, is_dram_) + - gate_C(w_L2_p[0], 0, is_dram_)); - w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; - w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2 * c_load_nand2_path / - (gate_C(w_L1_nand2_n[0], 0, is_dram_) + - gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = logical_effort( - min_number_gates_L1, - gnand2, - F, - w_L1_nand2_n, - w_L1_nand2_p, - c_load_nand2_path, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); + assert(0); + break; } - //Now find widths of gates along path in which first gate is a NAND3 - if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) - { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs - // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means - // a NAND3 gate is used in the first level of the predecoder - c_load_nand3_path = branch_effort_nand3_gate_output * - (gate_C(w_L2_n[0], 0, is_dram_) + - gate_C(w_L2_p[0], 0, is_dram_)); - w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; - w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3 * c_load_nand3_path / - (gate_C(w_L1_nand3_n[0], 0, is_dram_) + - gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = logical_effort( - min_number_gates_L1, - gnand3, - F, - w_L1_nand3_n, - w_L1_nand3_p, - c_load_nand3_path, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); + // find the number of gates and sizing in second level of predecoder (if there is a second level) + if (flag_L2_gate) { + if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate + w_L2_n[0] = 2 * g_tp.min_w_nmos_; + F = gnand2; + } else { // 2nd level is a NAND3 gate + w_L2_n[0] = 3 * g_tp.min_w_nmos_; + F = gnand3; + } + w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + number_gates_L2 = logical_effort( + min_number_gates_L2, + flag_L2_gate == 2 ? gnand2 : gnand3, + F, + w_L2_n, + w_L2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + + // Now find the number of gates and widths in first level of predecoder + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { + // Whenever flag_two_unique_paths is true, it means first level of + // decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, + // it means + // a NAND2 gate is used in the first level of the predecoder + c_load_nand2_path = branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * c_load_nand2_path / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + c_load_nand2_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + + //Now find widths of gates along path in which first gate is a NAND3 + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means + // a NAND3 gate is used in the first level of the predecoder + c_load_nand3_path = branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * c_load_nand3_path / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + c_load_nand3_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + } else { // find number of gates and widths in first level of predecoder block when there is no second level + if (number_inputs_L1_gate == 2) { + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * C_ld_predec_blk_out / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } else if (number_inputs_L1_gate == 3) { + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * C_ld_predec_blk_out / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } } - } - else - { // find number of gates and widths in first level of predecoder block when there is no second level - if (number_inputs_L1_gate == 2) - { - w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; - w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2*C_ld_predec_blk_out / - (gate_C(w_L1_nand2_n[0], 0, is_dram_) + - gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = logical_effort( - min_number_gates_L1, - gnand2, - F, - w_L1_nand2_n, - w_L1_nand2_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - else if (number_inputs_L1_gate == 3) - { - w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; - w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3*C_ld_predec_blk_out / - (gate_C(w_L1_nand3_n[0], 0, is_dram_) + - gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = logical_effort( - min_number_gates_L1, - gnand3, - F, - w_L1_nand3_n, - w_L1_nand3_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - } } -void PredecBlk::compute_area() -{ - if (exist) - { // First check whether a predecoder block is needed - int num_L1_nand2 = 0; - int num_L1_nand3 = 0; - int num_L2 = 0; - double tot_area_L1_nand3 =0; - double leak_L1_nand3 =0; - double gate_leak_L1_nand3 =0; +void PredecBlk::compute_area() { + if (exist) { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double tot_area_L1_nand3 = 0; + double leak_L1_nand3 = 0; + double gate_leak_L1_nand3 = 0; - double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); - double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); - double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); - if (number_inputs_L1_gate != 3) { - tot_area_L1_nand3 = 0; - leak_L1_nand3 = 0; - gate_leak_L1_nand3 =0; - } - else { - tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); - leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); - gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); - } + double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); + double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + tot_area_L1_nand3 = 0; + leak_L1_nand3 = 0; + gate_leak_L1_nand3 = 0; + } else { + tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); + leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } - switch (number_input_addr_bits) - { - case 1: //2 NAND2 gates - num_L1_nand2 = 2; - num_L2 = 0; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =0; - break; - case 2: //4 NAND2 gates - num_L1_nand2 = 4; - num_L2 = 0; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =0; - break; - case 3: //8 NAND3 gates - num_L1_nand3 = 8; - num_L2 = 0; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =1; - break; - case 4: //4 + 4 NAND2 gates - num_L1_nand2 = 8; - num_L2 = 16; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =0; - break; - case 5: //4 NAND2 gates, 8 NAND3 gates - num_L1_nand2 = 4; - num_L1_nand3 = 8; - num_L2 = 32; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =1; - break; - case 6: //8 + 8 NAND3 gates - num_L1_nand3 = 16; - num_L2 = 64; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =2; - break; - case 7: //4 + 4 NAND2 gates, 8 NAND3 gates - num_L1_nand2 = 8; - num_L1_nand3 = 8; - num_L2 = 128; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =1; - break; - case 8: //4 NAND2 gates, 8 + 8 NAND3 gates - num_L1_nand2 = 4; - num_L1_nand3 = 16; - num_L2 = 256; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =2; - break; - case 9: //8 + 8 + 8 NAND3 gates - num_L1_nand3 = 24; - num_L2 = 512; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =3; - break; - default: - break; - } + switch (number_input_addr_bits) { + case 1: //2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 2: //4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 3: //8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 1; + break; + case 4: //4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 0; + break; + case 5: //4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 1; + break; + case 6: //8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 2; + break; + case 7: //4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 1; + break; + case 8: //4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 2; + break; + case 9: //8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 3; + break; + default: + break; + } - for (int i = 1; i < number_gates_L1_nand2_path; ++i) - { - tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); - leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); - gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); - } - tot_area_L1_nand2 *= num_L1_nand2; - leak_L1_nand2 *= num_L1_nand2; - gate_leak_L1_nand2 *= num_L1_nand2; + for (int i = 1; i < number_gates_L1_nand2_path; ++i) { + tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); + leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + tot_area_L1_nand2 *= num_L1_nand2; + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; - for (int i = 1; i < number_gates_L1_nand3_path; ++i) - { - tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); - leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); - gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); - } - tot_area_L1_nand3 *= num_L1_nand3; - leak_L1_nand3 *= num_L1_nand3; - gate_leak_L1_nand3 *= num_L1_nand3; + for (int i = 1; i < number_gates_L1_nand3_path; ++i) { + tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); + leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + tot_area_L1_nand3 *= num_L1_nand3; + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; - double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; - double cumulative_area_L2 = 0.0; - double leakage_L2 = 0.0; - double gate_leakage_L2 = 0.0; + double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; + double cumulative_area_L2 = 0.0; + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; - if (flag_L2_gate == 2) - { - cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); - leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); - gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); - } - else if (flag_L2_gate == 3) - { - cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); - leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); - gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); - } + if (flag_L2_gate == 2) { + cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } else if (flag_L2_gate == 3) { + cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } - for (int i = 1; i < number_gates_L2; ++i) - { - cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); - leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); - gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); - } - cumulative_area_L2 *= num_L2; - leakage_L2 *= num_L2; - gate_leakage_L2 *= num_L2; + for (int i = 1; i < number_gates_L2; ++i) { + cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + cumulative_area_L2 *= num_L2; + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; - power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; - power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; - power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; - area.set_area(cumulative_area_L1 + cumulative_area_L2); - power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; - power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; - } + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + area.set_area(cumulative_area_L1 + cumulative_area_L2); + power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; + } } pair PredecBlk::compute_delays( - pair inrisetime) // -{ - pair ret_val; - ret_val.first = 0; // outrisetime_nand2_path - ret_val.second = 0; // outrisetime_nand3_path + pair inrisetime) { // + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path - double inrisetime_nand2_path = inrisetime.first; - double inrisetime_nand3_path = inrisetime.second; - int i; - double rd, c_load, c_intrinsic, tf, this_delay; - double Vdd = g_tp.peri_global.Vdd; + double inrisetime_nand2_path = inrisetime.first; + double inrisetime_nand3_path = inrisetime.second; + int i; + double rd, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; - // TODO: following delay calculation part can be greatly simplified. - // first check whether a predecoder block is required - if (exist) - { - //Find delay in first level of predecoder block - //First find delay in path - if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) - { - //First gate is a NAND2 gate - rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); - c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); - c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + // TODO: following delay calculation part can be greatly simplified. + // first check whether a predecoder block is required + if (exist) { + //Find delay in first level of predecoder block + //First find delay in path + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { + //First gate is a NAND2 gate + rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - //Add delays of all but the last inverter in the chain - for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) - { - rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) { + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } - //Add delay of the last inverter - i = number_gates_L1_nand2_path - 1; - rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); - if (flag_L2_gate) - { - c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { //First level directly drives decoder output load - c_load = C_ld_predec_blk_out; - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } + //Add delay of the last inverter + i = number_gates_L1_nand2_path - 1; + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { + //Check if the number of gates in the first level is more than 1. + //First gate is a NAND3 gate + rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) { + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of the last inverter + i = number_gates_L1_nand3_path - 1; + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, + is_dram_)); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + // Find delay through second level + if (flag_L2_gate) { + if (flag_L2_gate == 2) { + rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { // flag_L2_gate = 3 + rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + for (i = 1; i < number_gates_L2 - 1; ++i) { + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of final inverter that drives the wordline decoders + i = number_gates_L2 - 1; + c_load = C_ld_predec_blk_out; + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } } - if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) - { //Check if the number of gates in the first level is more than 1. - //First gate is a NAND3 gate - rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); - c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); - c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - - //Add delays of all but the last inverter in the chain - for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) - { - rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - //Add delay of the last inverter - i = number_gates_L1_nand3_path - 1; - rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); - if (flag_L2_gate) - { - c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { //First level directly drives decoder output load - c_load = C_ld_predec_blk_out; - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - ret_val.second = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - } - - // Find delay through second level - if (flag_L2_gate) - { - if (flag_L2_gate == 2) - { - rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); - c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); - c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { // flag_L2_gate = 3 - rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); - c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); - c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - for (i = 1; i < number_gates_L2 - 1; ++i) - { - rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - //Add delay of final inverter that drives the wordline decoders - i = number_gates_L2 - 1; - c_load = C_ld_predec_blk_out; - rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - ret_val.second = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - } - - delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; - return ret_val; + delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; + return ret_val; } void PredecBlk::leakage_feedback(double temperature) @@ -1033,302 +969,287 @@ PredecBlkDrv::PredecBlkDrv( int way_select_, PredecBlk * blk_, bool is_dram) - :flag_driver_exists(0), - number_gates_nand2_path(0), - number_gates_nand3_path(0), - min_number_gates(2), - num_buffers_driving_1_nand2_load(0), - num_buffers_driving_2_nand2_load(0), - num_buffers_driving_4_nand2_load(0), - num_buffers_driving_2_nand3_load(0), - num_buffers_driving_8_nand3_load(0), - num_buffers_nand3_path(0), - c_load_nand2_path_out(0), - c_load_nand3_path_out(0), - r_load_nand2_path_out(0), - r_load_nand3_path_out(0), - delay_nand2_path(0), - delay_nand3_path(0), - power_nand2_path(), - power_nand3_path(), - blk(blk_), dec(blk->dec), - is_dram_(is_dram), - way_select(way_select_) -{ - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - width_nand2_path_n[i] = 0; - width_nand2_path_p[i] = 0; - width_nand3_path_n[i] = 0; - width_nand3_path_p[i] = 0; - } - - number_input_addr_bits = blk->number_input_addr_bits; - - if (way_select > 1) - { - flag_driver_exists = 1; - number_input_addr_bits = way_select; - if (dec->num_in_signals == 2) - { - c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); - num_buffers_driving_2_nand2_load = number_input_addr_bits; + : flag_driver_exists(0), + number_gates_nand2_path(0), + number_gates_nand3_path(0), + min_number_gates(2), + num_buffers_driving_1_nand2_load(0), + num_buffers_driving_2_nand2_load(0), + num_buffers_driving_4_nand2_load(0), + num_buffers_driving_2_nand3_load(0), + num_buffers_driving_8_nand3_load(0), + num_buffers_nand3_path(0), + c_load_nand2_path_out(0), + c_load_nand3_path_out(0), + r_load_nand2_path_out(0), + r_load_nand3_path_out(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + blk(blk_), dec(blk->dec), + is_dram_(is_dram), + way_select(way_select_) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; } - else if (dec->num_in_signals == 3) - { - c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); - num_buffers_driving_2_nand3_load = number_input_addr_bits; - } - } - else if (way_select == 0) - { - if (blk->exist) - { - flag_driver_exists = 1; - } - } - compute_widths(); - compute_area(); + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) { + c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } else if (dec->num_in_signals == 3) { + c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } else if (way_select == 0) { + if (blk->exist) { + flag_driver_exists = 1; + } + } + + compute_widths(); + compute_area(); } -void PredecBlkDrv::compute_widths() -{ - // The predecode block driver accepts as input the address bits from the h-tree network. For - // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of - // inversion to generate addrbar and simply treat addrbar as addr. +void PredecBlkDrv::compute_widths() { + // The predecode block driver accepts as input the address bits from the h-tree network. For + // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of + // inversion to generate addrbar and simply treat addrbar as addr. - double F; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - if (flag_driver_exists) - { - double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); - double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); + if (flag_driver_exists) { + double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); + double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); - if (way_select == 0) - { - if (blk->number_input_addr_bits == 1) - { //2 NAND2 gates - num_buffers_driving_2_nand2_load = 1; - c_load_nand2_path_out = 2 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 2) - { //4 NAND2 gates one 2-4 decoder - num_buffers_driving_4_nand2_load = 2; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 3) - { //8 NAND3 gates one 3-8 decoder - num_buffers_driving_8_nand3_load = 3; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 4) - { //4 + 4 NAND2 gates two 2-4 decoder - num_buffers_driving_4_nand2_load = 4; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 5) - { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder - num_buffers_driving_4_nand2_load = 2; - num_buffers_driving_8_nand3_load = 3; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 6) - { //8 + 8 NAND3 gates two 3-8 decoder - num_buffers_driving_8_nand3_load = 6; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 7) - { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder - num_buffers_driving_4_nand2_load = 4; - num_buffers_driving_8_nand3_load = 3; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 8) - { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder - num_buffers_driving_4_nand2_load = 2; - num_buffers_driving_8_nand3_load = 6; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 9) - { //8 + 8 + 8 NAND3 gates three 3-8 decoder - num_buffers_driving_8_nand3_load = 9; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } + if (way_select == 0) { + if (blk->number_input_addr_bits == 1) { + //2 NAND2 gates + num_buffers_driving_2_nand2_load = 1; + c_load_nand2_path_out = 2 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 2) { + //4 NAND2 gates one 2-4 decoder + num_buffers_driving_4_nand2_load = 2; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 3) { + //8 NAND3 gates one 3-8 decoder + num_buffers_driving_8_nand3_load = 3; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 4) { + //4 + 4 NAND2 gates two 2-4 decoder + num_buffers_driving_4_nand2_load = 4; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 5) { + //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 + //decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 6) { + //8 + 8 NAND3 gates two 3-8 decoder + num_buffers_driving_8_nand3_load = 6; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 7) { + //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 + //decoder + num_buffers_driving_4_nand2_load = 4; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 8) { + //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 + //decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 6; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 9) { + //8 + 8 + 8 NAND3 gates three 3-8 decoder + num_buffers_driving_8_nand3_load = 9; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 2) || + (number_input_addr_bits == 0) || + ((way_select) && (dec->num_in_signals == 2))) { + //this means that way_select is driving NAND2 in decoder. + width_nand2_path_n[0] = g_tp.min_w_nmos_; + width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; + F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); + number_gates_nand2_path = logical_effort( + min_number_gates, + 1, + F, + width_nand2_path_n, + width_nand2_path_p, + c_load_nand2_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 3) || + ((way_select) && (dec->num_in_signals == 3))) { + //this means that way_select is driving NAND3 in decoder. + width_nand3_path_n[0] = g_tp.min_w_nmos_; + width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; + F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); + number_gates_nand3_path = logical_effort( + min_number_gates, + 1, + F, + width_nand3_path_n, + width_nand3_path_p, + c_load_nand3_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } } - - if ((blk->flag_two_unique_paths) || - (blk->number_inputs_L1_gate == 2) || - (number_input_addr_bits == 0) || - ((way_select)&&(dec->num_in_signals == 2))) - { //this means that way_select is driving NAND2 in decoder. - width_nand2_path_n[0] = g_tp.min_w_nmos_; - width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; - F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); - number_gates_nand2_path = logical_effort( - min_number_gates, - 1, - F, - width_nand2_path_n, - width_nand2_path_p, - c_load_nand2_path_out, - p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); - } - - if ((blk->flag_two_unique_paths) || - (blk->number_inputs_L1_gate == 3) || - ((way_select)&&(dec->num_in_signals == 3))) - { //this means that way_select is driving NAND3 in decoder. - width_nand3_path_n[0] = g_tp.min_w_nmos_; - width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; - F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); - number_gates_nand3_path = logical_effort( - min_number_gates, - 1, - F, - width_nand3_path_n, - width_nand3_path_p, - c_load_nand3_path_out, - p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); - } - } } -void PredecBlkDrv::compute_area() -{ - double area_nand2_path = 0; - double area_nand3_path = 0; - double leak_nand2_path = 0; - double leak_nand3_path = 0; - double gate_leak_nand2_path = 0; - double gate_leak_nand3_path = 0; +void PredecBlkDrv::compute_area() { + double area_nand2_path = 0; + double area_nand3_path = 0; + double leak_nand2_path = 0; + double leak_nand3_path = 0; + double gate_leak_nand2_path = 0; + double gate_leak_nand3_path = 0; - if (flag_driver_exists) - { // first check whether a predecoder block driver is needed - for (int i = 0; i < number_gates_nand2_path; ++i) - { - area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); - leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); - gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); - } - area_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - leak_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + + if (flag_driver_exists) { + // first check whether a predecoder block driver is needed + for (int i = 0; i < number_gates_nand2_path; ++i) { + area_nand2_path += + compute_gate_area(INV, 1, width_nand2_path_p[i], + width_nand2_path_n[i], g_tp.cell_h_def); + leak_nand2_path += + cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], + 1, inv, is_dram_); + gate_leak_nand2_path += + cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], + 1, inv, is_dram_); + } + area_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); + leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); - for (int i = 0; i < number_gates_nand3_path; ++i) - { - area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); - leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); - gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + for (int i = 0; i < number_gates_nand3_path; ++i) { + area_nand3_path += + compute_gate_area(INV, 1, width_nand3_path_p[i], + width_nand3_path_n[i], g_tp.cell_h_def); + leak_nand3_path += + cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], + 1, inv, is_dram_); + gate_leak_nand3_path += + cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], + 1, inv, is_dram_); + } + area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + + power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; + area.set_area(area_nand2_path + area_nand3_path); } - area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - - power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; - power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; - area.set_area(area_nand2_path + area_nand3_path); - } } pair PredecBlkDrv::compute_delays( double inrisetime_nand2_path, - double inrisetime_nand3_path) -{ - pair ret_val; - ret_val.first = 0; // outrisetime_nand2_path - ret_val.second = 0; // outrisetime_nand3_path - int i; - double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; - double Vdd = g_tp.peri_global.Vdd; + double inrisetime_nand3_path) { + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + int i; + double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; - if (flag_driver_exists) - { - for (i = 0; i < number_gates_nand2_path - 1; ++i) - { - rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); - c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; - } + if (flag_driver_exists) { + for (i = 0; i < number_gates_nand2_path - 1; ++i) { + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; + } - // Final inverter drives the predecoder block or the decoder output load - if (number_gates_nand2_path != 0) - { - i = number_gates_nand2_path - 1; - rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - c_load = c_load_nand2_path_out; - tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; + // Final inverter drives the predecoder block or the decoder output load + if (number_gates_nand2_path != 0) { + i = number_gates_nand2_path - 1; + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + c_load = c_load_nand2_path_out; + tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; // cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) -{ - driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + - drv1->power_nand3_path.readOp.leakage + - drv2->power_nand2_path.readOp.leakage + - drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + - blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + - blk2->power_L2.readOp.leakage; - power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; + : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + + blk2->power_L2.readOp.leakage; + power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; - driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; - block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; + driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } void PredecBlkDrv::leakage_feedback(double temperature) @@ -1399,37 +1319,35 @@ void PredecBlkDrv::leakage_feedback(double temperature) } } -double Predec::compute_delays(double inrisetime) -{ - // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. - pair tmp_pair1, tmp_pair2; - tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); - tmp_pair1 = blk1->compute_delays(tmp_pair1); - tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); - tmp_pair2 = blk2->compute_delays(tmp_pair2); - tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); +double Predec::compute_delays(double inrisetime) { + // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. + pair tmp_pair1, tmp_pair2; + tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); + tmp_pair1 = blk1->compute_delays(tmp_pair1); + tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); + tmp_pair2 = blk2->compute_delays(tmp_pair2); + tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); - driver_power.readOp.dynamic = - drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + - drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + - drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + - drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; + driver_power.readOp.dynamic = + drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; - block_power.readOp.dynamic = - blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk1->power_L2.readOp.dynamic + - blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk2->power_L2.readOp.dynamic; + block_power.readOp.dynamic = + blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk1->power_L2.readOp.dynamic + + blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk2->power_L2.readOp.dynamic; - power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; + power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; - delay = tmp_pair1.first; - return tmp_pair1.second; + delay = tmp_pair1.first; + return tmp_pair1.second; } - void Predec::leakage_feedback(double temperature) { drv1->leakage_feedback(temperature); @@ -1465,113 +1383,116 @@ void Predec::leakage_feedback(double temperature) // returns pair Predec::get_max_delay_before_decoder( pair input_pair1, - pair input_pair2) -{ - pair ret_val; - double delay; + pair input_pair2) { + pair ret_val; + double delay; - delay = drv1->delay_nand2_path + blk1->delay_nand2_path; - ret_val.first = delay; - ret_val.second = input_pair1.first; - delay = drv1->delay_nand3_path + blk1->delay_nand3_path; - if (ret_val.first < delay) - { + delay = drv1->delay_nand2_path + blk1->delay_nand2_path; ret_val.first = delay; - ret_val.second = input_pair1.second; - } - delay = drv2->delay_nand2_path + blk2->delay_nand2_path; - if (ret_val.first < delay) - { - ret_val.first = delay; - ret_val.second = input_pair2.first; - } - delay = drv2->delay_nand3_path + blk2->delay_nand3_path; - if (ret_val.first < delay) - { - ret_val.first = delay; - ret_val.second = input_pair2.second; - } + ret_val.second = input_pair1.first; + delay = drv1->delay_nand3_path + blk1->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair1.second; + } + delay = drv2->delay_nand2_path + blk2->delay_nand2_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.first; + } + delay = drv2->delay_nand3_path + blk2->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.second; + } - return ret_val; + return ret_val; } -Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) -:number_gates(0), - min_number_gates(2), - c_gate_load(c_gate_load_), - c_wire_load(c_wire_load_), - r_wire_load(r_wire_load_), - delay(0), - power(), - is_dram_(is_dram) -{ - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - width_n[i] = 0; - width_p[i] = 0; - } +Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, + bool is_dram) + : number_gates(0), + min_number_gates(2), + c_gate_load(c_gate_load_), + c_wire_load(c_wire_load_), + r_wire_load(r_wire_load_), + delay(0), + power(), + is_dram_(is_dram) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_n[i] = 0; + width_p[i] = 0; + } - compute_widths(); + compute_widths(); } -void Driver::compute_widths() -{ - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - double c_load = c_gate_load + c_wire_load; - width_n[0] = g_tp.min_w_nmos_; - width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; +void Driver::compute_widths() { + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double c_load = c_gate_load + c_wire_load; + width_n[0] = g_tp.min_w_nmos_; + width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); - number_gates = logical_effort( - min_number_gates, - 1, - F, - width_n, - width_p, - c_load, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); + double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); + number_gates = logical_effort( + min_number_gates, + 1, + F, + width_n, + width_p, + c_load, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); } -double Driver::compute_delay(double inrisetime) -{ - int i; - double rd, c_load, c_intrinsic, tf; - double this_delay = 0; +double Driver::compute_delay(double inrisetime) { + int i; + double rd, c_load, c_intrinsic, tf; + double this_delay = 0; - for (i = 0; i < number_gates - 1; ++i) - { + for (i = 0; i < number_gates - 1; ++i) { + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + } + + i = number_gates - 1; + c_load = c_gate_load + c_wire_load; rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + r_wire_load * + (c_wire_load / 2 + c_gate_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; - } + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; - i = number_gates - 1; - c_load = c_gate_load + c_wire_load; - rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; - - return this_delay / (1.0 - 0.5); + return this_delay / (1.0 - 0.5); } diff --git a/ext/mcpat/cacti/decoder.h b/ext/mcpat/cacti/decoder.h index 35631e84b..a2ddf722c 100644 --- a/ext/mcpat/cacti/decoder.h +++ b/ext/mcpat/cacti/decoder.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,9 +43,8 @@ using namespace std; -class Decoder : public Component -{ - public: +class Decoder : public Component { +public: Decoder( int _num_dec_signals, bool flag_way_select, @@ -80,125 +80,120 @@ class Decoder : public Component -class PredecBlk : public Component -{ - public: - PredecBlk( - int num_dec_signals, - Decoder * dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); +class PredecBlk : public Component { +public: + PredecBlk( + int num_dec_signals, + Decoder * dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); - Decoder * dec; - bool exist; - int number_input_addr_bits; - double C_ld_predec_blk_out; - double R_wire_predec_blk_out; - int branch_effort_nand2_gate_output; - int branch_effort_nand3_gate_output; - bool flag_two_unique_paths; - int flag_L2_gate; - int number_inputs_L1_gate; - int number_gates_L1_nand2_path; - int number_gates_L1_nand3_path; - int number_gates_L2; - int min_number_gates_L1; - int min_number_gates_L2; - int num_L1_active_nand2_path; - int num_L1_active_nand3_path; - double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; - double w_L2_n[MAX_NUMBER_GATES_STAGE]; - double w_L2_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; - powerDef power_L2; + Decoder * dec; + bool exist; + int number_input_addr_bits; + double C_ld_predec_blk_out; + double R_wire_predec_blk_out; + int branch_effort_nand2_gate_output; + int branch_effort_nand3_gate_output; + bool flag_two_unique_paths; + int flag_L2_gate; + int number_inputs_L1_gate; + int number_gates_L1_nand2_path; + int number_gates_L1_nand3_path; + int number_gates_L2; + int min_number_gates_L1; + int min_number_gates_L2; + int num_L1_active_nand2_path; + int num_L1_active_nand3_path; + double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; + double w_L2_n[MAX_NUMBER_GATES_STAGE]; + double w_L2_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + powerDef power_L2; - bool is_dram_; + bool is_dram_; - void compute_widths(); - void compute_area(); + void compute_widths(); + void compute_area(); - void leakage_feedback(double temperature); + void leakage_feedback(double temperature); - pair compute_delays(pair inrisetime); // - // return + pair compute_delays(pair inrisetime); // + // return }; -class PredecBlkDrv : public Component -{ - public: - PredecBlkDrv( - int way_select, - PredecBlk * blk_, - bool is_dram); +class PredecBlkDrv : public Component { +public: + PredecBlkDrv( + int way_select, + PredecBlk * blk_, + bool is_dram); - int flag_driver_exists; - int number_input_addr_bits; - int number_gates_nand2_path; - int number_gates_nand3_path; - int min_number_gates; - int num_buffers_driving_1_nand2_load; - int num_buffers_driving_2_nand2_load; - int num_buffers_driving_4_nand2_load; - int num_buffers_driving_2_nand3_load; - int num_buffers_driving_8_nand3_load; - int num_buffers_nand3_path; - double c_load_nand2_path_out; - double c_load_nand3_path_out; - double r_load_nand2_path_out; - double r_load_nand3_path_out; - double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; + int flag_driver_exists; + int number_input_addr_bits; + int number_gates_nand2_path; + int number_gates_nand3_path; + int min_number_gates; + int num_buffers_driving_1_nand2_load; + int num_buffers_driving_2_nand2_load; + int num_buffers_driving_4_nand2_load; + int num_buffers_driving_2_nand3_load; + int num_buffers_driving_8_nand3_load; + int num_buffers_nand3_path; + double c_load_nand2_path_out; + double c_load_nand3_path_out; + double r_load_nand2_path_out; + double r_load_nand3_path_out; + double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; - PredecBlk * blk; - Decoder * dec; - bool is_dram_; - int way_select; + PredecBlk * blk; + Decoder * dec; + bool is_dram_; + int way_select; - void compute_widths(); - void compute_area(); + void compute_widths(); + void compute_area(); - void leakage_feedback(double temperature); + void leakage_feedback(double temperature); - pair compute_delays( - double inrisetime_nand2_path, - double inrisetime_nand3_path); // return + pair compute_delays( + double inrisetime_nand2_path, + double inrisetime_nand3_path); // return - inline int num_addr_bits_nand2_path() - { - return num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load; - } - inline int num_addr_bits_nand3_path() - { - return num_buffers_driving_2_nand3_load + - num_buffers_driving_8_nand3_load; - } - double get_rdOp_dynamic_E(int num_act_mats_hor_dir); + inline int num_addr_bits_nand2_path() { + return num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load; + } + inline int num_addr_bits_nand3_path() { + return num_buffers_driving_2_nand3_load + + num_buffers_driving_8_nand3_load; + } + double get_rdOp_dynamic_E(int num_act_mats_hor_dir); }; -class Predec : public Component -{ - public: +class Predec : public Component { +public: Predec( PredecBlkDrv * drv1, PredecBlkDrv * drv2); @@ -214,7 +209,7 @@ class Predec : public Component powerDef block_power; powerDef driver_power; - private: +private: // returns pair get_max_delay_before_decoder( pair input_pair1, @@ -223,24 +218,23 @@ class Predec : public Component -class Driver : public Component -{ - public: - Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram); +class Driver : public Component { +public: + Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram); - int number_gates; - int min_number_gates; - double width_n[MAX_NUMBER_GATES_STAGE]; - double width_p[MAX_NUMBER_GATES_STAGE]; - double c_gate_load; - double c_wire_load; - double r_wire_load; - double delay; - powerDef power; - bool is_dram_; + int number_gates; + int min_number_gates; + double width_n[MAX_NUMBER_GATES_STAGE]; + double width_p[MAX_NUMBER_GATES_STAGE]; + double c_gate_load; + double c_wire_load; + double r_wire_load; + double delay; + powerDef power; + bool is_dram_; - void compute_widths(); - double compute_delay(double inrisetime); + void compute_widths(); + double compute_delay(double inrisetime); }; diff --git a/ext/mcpat/cacti/htree2.cc b/ext/mcpat/cacti/htree2.cc index 817ea6a7c..55724c397 100644 --- a/ext/mcpat/cacti/htree2.cc +++ b/ext/mcpat/cacti/htree2.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,15 +40,17 @@ Htree2::Htree2( enum Wire_type wire_model, double mat_w, double mat_h, - int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type, + int a_bits, int d_inbits, int search_data_in, int d_outbits, + int search_data_out, int bl, int wl, enum Htree_type htree_type, bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt) - :in_rise_time(0), out_rise_time(0), - tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), - add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits), - search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), - uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt) -{ - assert(ndbl >= 2 && ndwl >= 2); + : in_rise_time(0), out_rise_time(0), + tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), + add_bits(a_bits), data_in_bits(d_inbits), + search_data_in_bits(search_data_in), data_out_bits(d_outbits), + search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), + uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), + deviceType(dt) { + assert(ndbl >= 2 && ndwl >= 2); // if (ndbl == 1 && ndwl == 1) // { @@ -61,177 +64,211 @@ Htree2::Htree2( // if (ndwl == 1) ndwl++; // if (ndbl == 1) ndbl++; - max_unpipelined_link_delay = 0; //TODO - min_w_nmos = g_tp.min_w_nmos_; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + max_unpipelined_link_delay = 0; //TODO + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - switch (htree_type) - { + switch (htree_type) { case Add_htree: - wire_bw = init_wire_bw = add_bits; - in_htree(); - break; + wire_bw = init_wire_bw = add_bits; + in_htree(); + break; case Data_in_htree: - wire_bw = init_wire_bw = data_in_bits; - in_htree(); - break; + wire_bw = init_wire_bw = data_in_bits; + in_htree(); + break; case Data_out_htree: - wire_bw = init_wire_bw = data_out_bits; - out_htree(); - break; + wire_bw = init_wire_bw = data_out_bits; + out_htree(); + break; case Search_in_htree: - wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. - in_htree(); - break; + wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. + in_htree(); + break; case Search_out_htree: - wire_bw = init_wire_bw = search_data_out_bits; - out_htree(); - break; + wire_bw = init_wire_bw = search_data_out_bits; + out_htree(); + break; default: - assert(0); - break; - } + assert(0); + break; + } - power_bit = power; - power.readOp.dynamic *= init_wire_bw; + power_bit = power; + power.readOp.dynamic *= init_wire_bw; - assert(power.readOp.dynamic >= 0); - assert(power.readOp.leakage >= 0); + assert(power.readOp.dynamic >= 0); + assert(power.readOp.leakage >= 0); } // nand gate sizing calculation -void Htree2::input_nand(double s1, double s2, double l_eff) -{ - Wire w1(wt, l_eff); - double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; - // input capacitance of a repeater = input capacitance of nand. - double nsize = s1*(1 + pton_size)/(2 + pton_size); - nsize = (nsize < 1) ? 1 : nsize; +void Htree2::input_nand(double s1, double s2, double l_eff) { + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of a repeater = input capacitance of nand. + double nsize = s1 * (1 + pton_size) / (2 + pton_size); + nsize = (nsize < 1) ? 1 : nsize; - double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) * - (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0)); - delay+= horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - power.readOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) * + (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)); + delay += horowitz(w1.out_rise_time, tc, + deviceType->Vth / deviceType->Vdd, deviceType->Vth / + deviceType->Vdd, RISE); + power.readOp.dynamic += 0.5 * + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd * wire_bw ; - power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; - power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * wire_bw ; + power.readOp.leakage += (wire_bw * + cmos_Isub_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, + nand)) * deviceType->Vdd; + power.readOp.gate_leakage += (wire_bw * + cmos_Ig_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, + nand)) * deviceType->Vdd; } // tristate buffer model consisting of not, nand, nor, and driver transistors -void Htree2::output_buffer(double s1, double s2, double l_eff) -{ - Wire w1(wt, l_eff); - double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; - // input capacitance of repeater = input capacitance of nand + nor. - double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - double s_eff = //stage eff of a repeater in a wire - (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/ - gate_C(s2*(min_w_nmos + min_w_pmos), 0); - double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0)); - size = (size < 1) ? 1 : size; +void Htree2::output_buffer(double s1, double s2, double l_eff) { + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of repeater = input capacitance of nand + nor. + double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + double s_eff = //stage eff of a repeater in a wire + (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6, + true)) / + gate_C(s2 * (min_w_nmos + min_w_pmos), 0); + double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 / + (s_eff * gate_C(min_w_pmos, 0)); + size = (size < 1) ? 1 : size; - double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1); - double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1); - double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(tr_size*min_w_pmos, 0); - double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + - gate_C(s1*(min_w_nmos + min_w_pmos), 0); + double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1); + double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1); + double cap_nand_out = + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(tr_size * min_w_pmos, 0); + double cap_ptrans_out = 2 * + (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0); - double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; + double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; - delay += horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); + delay += horowitz(w1.out_rise_time, tc, + deviceType->Vth / deviceType->Vdd, deviceType->Vth / + deviceType->Vdd, RISE); - //nand - power.readOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + //nand + power.readOp.dynamic += 0.5 * + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; - //not - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + //not + power.readOp.dynamic += 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; - //nor - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + //nor + power.readOp.dynamic += 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; - //output transistor - power.readOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + //output transistor + power.readOp.dynamic += 0.5 * + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; - if(uca_tree) { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + if (uca_tree) { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * + 2, 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=; - } - else { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, + 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + } else { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * + 2, 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; - } + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, + 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + } } @@ -250,192 +287,200 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) * hor. links left. After this it goes through the remaining vertical * links. */ - void -Htree2::in_htree() -{ - //temp var - double s1 = 0, s2 = 0, s3 = 0; - double l_eff = 0; - Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; - double len = 0, ht = 0; - int option = 0; +void +Htree2::in_htree() { + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; - int h = (int) _log2(ndwl/2); // horizontal nodes - int v = (int) _log2(ndbl/2); // vertical nodes - double len_temp; - double ht_temp; - if (uca_tree) - {//Sheng: this computation do not consider the wires that route from edge to middle. - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { - if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + int h = (int) _log2(ndwl / 2); // horizontal nodes + int v = (int) _log2(ndbl / 2); // vertical nodes + double len_temp; + double ht_temp; + if (uca_tree) { + //Sheng: this computation do not consider the wires that route from + //edge to middle. + ht_temp = (mat_height * ndbl / 2 + + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, h)))) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, v)))) / 2; + } else { + if (ndwl == ndbl) { + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndbl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndwl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / 2; + } } - else if (ndwl > ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; - } - } - area.h = ht_temp * 2; - area.w = len_temp * 2; - delay = 0; - power.readOp.dynamic = 0; - power.readOp.leakage = 0; - power.searchOp.dynamic =0; - len = len_temp; - ht = ht_temp/2; + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.searchOp.dynamic = 0; + len = len_temp; + ht = ht_temp / 2; + + while (v > 0 || h > 0) { + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if (h > v) { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } else if (v > 0 && h > 0) { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } else { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // ver + wtemp2 = new Wire(wt, ht / 2); // hor + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + if ((uca_tree == false && option == 2) || search_tree == true) { + wire_bw *= 2; // wire bandwidth doubles only for vertical branches + } + + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * + wtemp1->repeater_size; + l_eff = len_temp; + } + + if (ht_temp > wtemp2->repeater_spacing) { + s2 = wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * + wtemp2->repeater_size; + } + // first level + input_nand(s1, s2, l_eff); + } + + + if (option != 1) { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + + if (uca_tree) { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * + wtemp3->repeater_size; + l_eff = ht_temp; + } + + input_nand(s2, s3, l_eff); + } + } - while (v > 0 || h > 0) - { if (wtemp1) delete wtemp1; if (wtemp2) delete wtemp2; if (wtemp3) delete wtemp3; - - if (h > v) - { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver - len_temp = len; - len /= 2; - wtemp3 = 0; - h--; - option = 0; - } - else if (v>0 && h>0) - { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor - len_temp = len; - ht_temp = ht; - len /= 2; - ht /= 2; - v--; - h--; - option = 1; - } - else - { - // considers only one vertical link - assert(h == 0); - wtemp1 = new Wire(wt, ht); // ver - wtemp2 = new Wire(wt, ht/2); // hor - ht_temp = ht; - ht /= 2; - wtemp3 = 0; - v--; - option = 2; - } - - delay += wtemp1->delay; - power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - if ((uca_tree == false && option == 2) || search_tree==true) - { - wire_bw*=2; // wire bandwidth doubles only for vertical branches - } - - if (uca_tree == false) - { - if (len_temp > wtemp1->repeater_spacing) - { - s1 = wtemp1->repeater_size; - l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; - l_eff = len_temp; - } - - if (ht_temp > wtemp2->repeater_spacing) - { - s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; - } - // first level - input_nand(s1, s2, l_eff); - } - - - if (option != 1) - { - continue; - } - - // second level - delay += wtemp2->delay; - power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - - if (uca_tree) - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { - s3 = wtemp3->repeater_size; - l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; - l_eff = ht_temp; - } - - input_nand(s2, s3, l_eff); - } - } - - if (wtemp1) delete wtemp1; - if (wtemp2) delete wtemp2; - if (wtemp3) delete wtemp3; } @@ -452,190 +497,198 @@ Htree2::in_htree() * hor. links left. After this it goes through the remaining vertical * links. */ -void Htree2::out_htree() -{ - //temp var - double s1 = 0, s2 = 0, s3 = 0; - double l_eff = 0; - Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; - double len = 0, ht = 0; - int option = 0; +void Htree2::out_htree() { + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; - int h = (int) _log2(ndwl/2); - int v = (int) _log2(ndbl/2); - double len_temp; - double ht_temp; - if (uca_tree) - { - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { - if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; + int h = (int) _log2(ndwl / 2); + int v = (int) _log2(ndbl / 2); + double len_temp; + double ht_temp; + if (uca_tree) { + ht_temp = (mat_height * ndbl / 2 + + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, h)))) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, v)))) / 2; + } else { + if (ndwl == ndbl) { + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * + (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndwl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / 2; + } + } + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.readOp.gate_leakage = 0; + //cout<<"power.readOp.gate_leakage"< 0 || h > 0) { //finds delay/power of each link in the tree + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if (h > v) { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } else if (v > 0 && h > 0) { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } else { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // hor + wtemp2 = new Wire(wt, ht / 2); // ver + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + if ((uca_tree == false && option == 2) || search_tree == true) { + wire_bw *= 2; + } + + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * + wtemp1->repeater_size; + l_eff = len_temp; + } + if (ht_temp > wtemp2->repeater_spacing) { + s2 = wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * + wtemp2->repeater_size; + } + // first level + output_buffer(s1, s2, l_eff); + } + + + if (option != 1) { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + //cout<<"power.readOp.gate_leakage"<power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * + wtemp3->repeater_size; + l_eff = ht_temp; + } + + output_buffer(s2, s3, l_eff); + } + //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage< ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; - } - } - area.h = ht_temp * 2; - area.w = len_temp * 2; - delay = 0; - power.readOp.dynamic = 0; - power.readOp.leakage = 0; - power.readOp.gate_leakage = 0; - //cout<<"power.readOp.gate_leakage"< 0 || h > 0) - { //finds delay/power of each link in the tree if (wtemp1) delete wtemp1; if (wtemp2) delete wtemp2; if (wtemp3) delete wtemp3; - - if(h > v) { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver - len_temp = len; - len /= 2; - wtemp3 = 0; - h--; - option = 0; - } - else if (v>0 && h>0) { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor - len_temp = len; - ht_temp = ht; - len /= 2; - ht /= 2; - v--; - h--; - option = 1; - } - else { - // considers only one vertical link - assert(h == 0); - wtemp1 = new Wire(wt, ht); // hor - wtemp2 = new Wire(wt, ht/2); // ver - ht_temp = ht; - ht /= 2; - wtemp3 = 0; - v--; - option = 2; - } - delay += wtemp1->delay; - power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"< wtemp1->repeater_spacing) - { - s1 = wtemp1->repeater_size; - l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; - l_eff = len_temp; - } - if (ht_temp > wtemp2->repeater_spacing) - { - s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; - } - // first level - output_buffer(s1, s2, l_eff); - } - - - if (option != 1) - { - continue; - } - - // second level - delay += wtemp2->delay; - power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"<power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { - s3 = wtemp3->repeater_size; - l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; - l_eff = ht_temp; - } - - output_buffer(s2, s3, l_eff); - } - //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage< 16) { + printf("No. of cores should be less than 16!\n"); + } + continue; + } + + if (!strncmp("-Cache level", line, strlen("-Cache level"))) { + sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("L2", temp_var, strlen("L2"))) { + cache_level = 0; + } else { + cache_level = 1; + } + } + + if (!strncmp("-Print level", line, strlen("-Print level"))) { + sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { + print_detail = 1; + } else { + print_detail = 0; + } + + } + if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) { + sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + add_ecc_b_ = true; + } else { + add_ecc_b_ = false; + } + } + + if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { + sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + print_input_args = true; + } else { + print_input_args = false; + } + } + + if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) { + sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + force_cache_config = true; + } else { + force_cache_config = false; + } + } + + if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) { + sscanf(line, "-Ndbl %d\n", &(ndbl)); + continue; + } + if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) { + sscanf(line, "-Ndwl %d\n", &(ndwl)); + continue; + } + if (!strncmp("-Nspd", line, strlen("-Nspd"))) { + sscanf(line, "-Nspd %d\n", &(nspd)); + continue; + } + if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { + sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); + continue; + } + if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { + sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); + continue; + } + if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) { + sscanf(line, "-Ndcm %d\n", &(ndcm)); + continue; + } + + } + rpters_in_htree = true; + fclose(fp); +} + +void +InputParameter::display_ip() { + cout << "Cache size : " << cache_sz << endl; + cout << "Block size : " << line_sz << endl; + cout << "Associativity : " << assoc << endl; + cout << "Read only ports : " << num_rd_ports << endl; + cout << "Write only ports : " << num_wr_ports << endl; + cout << "Read write ports : " << num_rw_ports << endl; + cout << "Single ended read ports : " << num_se_rd_ports << endl; + if (fully_assoc || pure_cam) { + cout << "Search ports : " << num_search_ports << endl; + } + cout << "Cache banks (UCA) : " << nbanks << endl; + cout << "Technology : " << F_sz_um << endl; + cout << "Temperature : " << temp << endl; + cout << "Tag size : " << tag_w << endl; + if (is_cache) { + cout << "array type : " << "Cache" << endl; + } + if (pure_ram) { + cout << "array type : " << "Scratch RAM" << endl; + } + if (pure_cam) { + cout << "array type : " << "CAM" << endl; + } + cout << "Model as memory : " << is_main_mem << endl; + cout << "Access mode : " << access_mode << endl; + cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; + cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; + cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; + cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; + cout << "Optimization target : " << ed << endl; + cout << "Design objective (UCA wt) : " << delay_wt << " " + << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt + << " " << area_wt << endl; + cout << "Design objective (UCA dev) : " << delay_dev << " " + << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev + << " " << area_dev << endl; + if (nuca) { + cout << "Cores : " << cores << endl; + + + cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " + << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca + << " " << area_wt_nuca << endl; + cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " + << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca + << " " << area_dev_nuca << endl; + } + cout << "Cache model : " << nuca << endl; + cout << "Nuca bank : " << nuca_bank_count << endl; + cout << "Wire inside mat : " << wire_is_mat_type << endl; + cout << "Wire outside mat : " << wire_os_mat_type << endl; + cout << "Interconnect projection : " << ic_proj_type << endl; + cout << "Wire signalling : " << force_wiretype << endl; + cout << "Print level : " << print_detail << endl; + cout << "ECC overhead : " << add_ecc_b_ << endl; + cout << "Page size : " << page_sz_bits << endl; + cout << "Burst length : " << burst_len << endl; + cout << "Internal prefetch width : " << int_prefetch_w << endl; + cout << "Force cache config : " << g_ip->force_cache_config << endl; + if (g_ip->force_cache_config) { + cout << "Ndwl : " << g_ip->ndwl << endl; + cout << "Ndbl : " << g_ip->ndbl << endl; + cout << "Nspd : " << g_ip->nspd << endl; + cout << "Ndcm : " << g_ip->ndcm << endl; + cout << "Ndsam1 : " << g_ip->ndsam1 << endl; + cout << "Ndsam2 : " << g_ip->ndsam2 << endl; + } +} + + + +powerComponents operator+(const powerComponents & x, const powerComponents & y) { + powerComponents z; + + z.dynamic = x.dynamic + y.dynamic; + z.leakage = x.leakage + y.leakage; + z.gate_leakage = x.gate_leakage + y.gate_leakage; + z.short_circuit = x.short_circuit + y.short_circuit; + z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; + + return z; +} + +powerComponents operator*(const powerComponents & x, double const * const y) { + powerComponents z; + + z.dynamic = x.dynamic * y[0]; + z.leakage = x.leakage * y[1]; + z.gate_leakage = x.gate_leakage * y[2]; + z.short_circuit = x.short_circuit * y[3]; + //longer channel leakage has the same behavior as normal leakage + z.longer_channel_leakage = x.longer_channel_leakage * y[1]; + + return z; +} + + +powerDef operator+(const powerDef & x, const powerDef & y) { + powerDef z; + + z.readOp = x.readOp + y.readOp; + z.writeOp = x.writeOp + y.writeOp; + z.searchOp = x.searchOp + y.searchOp; + return z; +} + +powerDef operator*(const powerDef & x, double const * const y) { + powerDef z; + + z.readOp = x.readOp * y; + z.writeOp = x.writeOp * y; + z.searchOp = x.searchOp * y; + return z; +} + +uca_org_t cacti_interface(const string & infile_name) { + + uca_org_t fin_res; + //uca_org_t result; + fin_res.valid = false; + + g_ip = new InputParameter(); + g_ip->parse_cfg(infile_name); + if (!g_ip->error_checking(infile_name)) exit(0); - } - continue; - } + if (g_ip->print_input_args) + g_ip->display_ip(); - if (!strncmp("-Data array cell type", line, strlen("-Data array cell type"))) { - sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var); - - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { - data_arr_ram_cell_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { - data_arr_ram_cell_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { - data_arr_ram_cell_tech_type = 2; - } - else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { - data_arr_ram_cell_tech_type = 3; - } - else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { - data_arr_ram_cell_tech_type = 4; - } - else { - cout << "ERROR: Invalid type!\n"; - exit(0); - } - continue; - } - - if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) { - sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); - - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { - data_arr_peri_global_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { - data_arr_peri_global_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { - data_arr_peri_global_tech_type = 2; - } - else { - cout << "ERROR: Invalid type!\n"; - exit(0); - } - continue; - } - - if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) { - sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var); - - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { - tag_arr_ram_cell_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { - tag_arr_ram_cell_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { - tag_arr_ram_cell_tech_type = 2; - } - else if(!strncmp("lp-dram", temp_var, strlen("lp-dram"))) { - tag_arr_ram_cell_tech_type = 3; - } - else if(!strncmp("comm-dram", temp_var, strlen("comm-dram"))) { - tag_arr_ram_cell_tech_type = 4; - } - else { - cout << "ERROR: Invalid type!\n"; - exit(0); - } - continue; - } - - if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) { - sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var); - - if(!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) { - tag_arr_peri_global_tech_type = 0; - } - else if(!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) { - tag_arr_peri_global_tech_type = 1; - } - else if(!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) { - tag_arr_peri_global_tech_type = 2; - } - else { - cout << "ERROR: Invalid type!\n"; - exit(0); - } - continue; - } - if(!strncmp("-design", line, strlen("-design"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_wt), &(dynamic_power_wt), - &(leakage_power_wt), - &(cycle_time_wt), &(area_wt)); - continue; - } - - if(!strncmp("-deviate", line, strlen("-deviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_dev), &(dynamic_power_dev), - &(leakage_power_dev), - &(cycle_time_dev), &(area_dev)); - continue; - } - - if(!strncmp("-Optimize", line, strlen("-Optimize"))) { - sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var); - - if(!strncmp("ED^2", temp_var, strlen("ED^2"))) { - ed = 2; - } - else if(!strncmp("ED", temp_var, strlen("ED"))) { - ed = 1; - } - else { - ed = 0; - } - } - - if(!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_wt_nuca), &(dynamic_power_wt_nuca), - &(leakage_power_wt_nuca), - &(cycle_time_wt_nuca), &(area_wt_nuca)); - continue; - } - - if(!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) { - sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk, - &(delay_dev_nuca), &(dynamic_power_dev_nuca), - &(leakage_power_dev_nuca), - &(cycle_time_dev_nuca), &(area_dev_nuca)); - continue; - } - - if(!strncmp("-Cache model", line, strlen("-cache model"))) { - sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var); - - if (!strncmp("UCA", temp_var, strlen("UCA"))) { - nuca = 0; - } - else { - nuca = 1; - } - continue; - } - - if(!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) { - sscanf(line, "-NUCA bank count %d", &(nuca_bank_count)); - - if (nuca_bank_count != 0) { - force_nuca_bank = 1; - } - continue; - } - - if(!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) { - sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); - - if (!strncmp("global", temp_var, strlen("global"))) { - wire_is_mat_type = 2; - continue; - } - else if (!strncmp("local", temp_var, strlen("local"))) { - wire_is_mat_type = 0; - continue; - } - else { - wire_is_mat_type = 1; - continue; - } - } - - if(!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) { - sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); - - if (!strncmp("global", temp_var, strlen("global"))) { - wire_os_mat_type = 2; - } - else { - wire_os_mat_type = 1; - } - continue; - } - - if(!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) { - sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var); - - if (!strncmp("aggressive", temp_var, strlen("aggressive"))) { - ic_proj_type = 0; - } - else { - ic_proj_type = 1; - } - continue; - } - - if(!strncmp("-Wire signalling", line, strlen("-wire signalling"))) { - sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var); - - if (!strncmp("default", temp_var, strlen("default"))) { - force_wiretype = 0; - wt = Global; - } - else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) { - force_wiretype = 1; - wt = Global_10; - } - else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) { - force_wiretype = 1; - wt = Global_20; - } - else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) { - force_wiretype = 1; - wt = Global_30; - } - else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) { - force_wiretype = 1; - wt = Global_5; - } - else if (!(strncmp("Global", temp_var, strlen("Global")))) { - force_wiretype = 1; - wt = Global; - } - else { - wt = Low_swing; - force_wiretype = 1; - } - continue; - } - - - - if(!strncmp("-Core", line, strlen("-Core"))) { - sscanf(line, "-Core count %d\n", &(cores)); - if (cores > 16) { - printf("No. of cores should be less than 16!\n"); - } - continue; - } - - if(!strncmp("-Cache level", line, strlen("-Cache level"))) { - sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("L2", temp_var, strlen("L2"))) { - cache_level = 0; - } - else { - cache_level = 1; - } - } - - if(!strncmp("-Print level", line, strlen("-Print level"))) { - sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { - print_detail = 1; - } - else { - print_detail = 0; - } - - } - if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) { - sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - add_ecc_b_ = true; - } - else { - add_ecc_b_ = false; - } - } - - if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { - sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - print_input_args = true; - } - else { - print_input_args = false; - } - } - - if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) { - sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - force_cache_config = true; - } - else { - force_cache_config = false; - } - } - - if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) { - sscanf(line, "-Ndbl %d\n", &(ndbl)); - continue; - } - if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) { - sscanf(line, "-Ndwl %d\n", &(ndwl)); - continue; - } - if(!strncmp("-Nspd", line, strlen("-Nspd"))) { - sscanf(line, "-Nspd %d\n", &(nspd)); - continue; - } - if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { - sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); - continue; - } - if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { - sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); - continue; - } - if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) { - sscanf(line, "-Ndcm %d\n", &(ndcm)); - continue; - } - - } - rpters_in_htree = true; - fclose(fp); -} - - void -InputParameter::display_ip() -{ - cout << "Cache size : " << cache_sz << endl; - cout << "Block size : " << line_sz << endl; - cout << "Associativity : " << assoc << endl; - cout << "Read only ports : " << num_rd_ports << endl; - cout << "Write only ports : " << num_wr_ports << endl; - cout << "Read write ports : " << num_rw_ports << endl; - cout << "Single ended read ports : " << num_se_rd_ports << endl; - if (fully_assoc||pure_cam) - { - cout << "Search ports : " << num_search_ports << endl; - } - cout << "Cache banks (UCA) : " << nbanks << endl; - cout << "Technology : " << F_sz_um << endl; - cout << "Temperature : " << temp << endl; - cout << "Tag size : " << tag_w << endl; - if (is_cache) { - cout << "array type : " << "Cache" << endl; - } - if (pure_ram) { - cout << "array type : " << "Scratch RAM" << endl; - } - if (pure_cam) - { - cout << "array type : " << "CAM" << endl; - } - cout << "Model as memory : " << is_main_mem << endl; - cout << "Access mode : " << access_mode << endl; - cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; - cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; - cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; - cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; - cout << "Optimization target : " << ed << endl; - cout << "Design objective (UCA wt) : " << delay_wt << " " - << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt - << " " << area_wt << endl; - cout << "Design objective (UCA dev) : " << delay_dev << " " - << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev - << " " << area_dev << endl; - if (nuca) - { - cout << "Cores : " << cores << endl; - - - cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " - << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca - << " " << area_wt_nuca << endl; - cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " - << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca - << " " << area_dev_nuca << endl; - } - cout << "Cache model : " << nuca << endl; - cout << "Nuca bank : " << nuca_bank_count << endl; - cout << "Wire inside mat : " << wire_is_mat_type << endl; - cout << "Wire outside mat : " << wire_os_mat_type << endl; - cout << "Interconnect projection : " << ic_proj_type << endl; - cout << "Wire signalling : " << force_wiretype << endl; - cout << "Print level : " << print_detail << endl; - cout << "ECC overhead : " << add_ecc_b_ << endl; - cout << "Page size : " << page_sz_bits << endl; - cout << "Burst length : " << burst_len << endl; - cout << "Internal prefetch width : " << int_prefetch_w << endl; - cout << "Force cache config : " << g_ip->force_cache_config << endl; - if (g_ip->force_cache_config) { - cout << "Ndwl : " << g_ip->ndwl << endl; - cout << "Ndbl : " << g_ip->ndbl << endl; - cout << "Nspd : " << g_ip->nspd << endl; - cout << "Ndcm : " << g_ip->ndcm << endl; - cout << "Ndsam1 : " << g_ip->ndsam1 << endl; - cout << "Ndsam2 : " << g_ip->ndsam2 << endl; - } -} - - - -powerComponents operator+(const powerComponents & x, const powerComponents & y) -{ - powerComponents z; - - z.dynamic = x.dynamic + y.dynamic; - z.leakage = x.leakage + y.leakage; - z.gate_leakage = x.gate_leakage + y.gate_leakage; - z.short_circuit = x.short_circuit + y.short_circuit; - z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; - - return z; -} - -powerComponents operator*(const powerComponents & x, double const * const y) -{ - powerComponents z; - - z.dynamic = x.dynamic*y[0]; - z.leakage = x.leakage*y[1]; - z.gate_leakage = x.gate_leakage*y[2]; - z.short_circuit = x.short_circuit*y[3]; - z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage - - return z; -} - - -powerDef operator+(const powerDef & x, const powerDef & y) -{ - powerDef z; - - z.readOp = x.readOp + y.readOp; - z.writeOp = x.writeOp + y.writeOp; - z.searchOp = x.searchOp + y.searchOp; - return z; -} - -powerDef operator*(const powerDef & x, double const * const y) -{ - powerDef z; - - z.readOp = x.readOp*y; - z.writeOp = x.writeOp*y; - z.searchOp = x.searchOp*y; - return z; -} - -uca_org_t cacti_interface(const string & infile_name) -{ - - uca_org_t fin_res; - //uca_org_t result; - fin_res.valid = false; - - g_ip = new InputParameter(); - g_ip->parse_cfg(infile_name); - if(!g_ip->error_checking()) - exit(0); - if (g_ip->print_input_args) - g_ip->display_ip(); - - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. // For HighRadix Only @@ -703,19 +653,18 @@ uca_org_t cacti_interface(const string & infile_name) // exit(0); // For HighRadix Only End - if (g_ip->nuca == 1) - { - Nuca n(&g_tp.peri_global); - n.sim_nuca(); - } - g_ip->display_ip(); - solve(&fin_res); + if (g_ip->nuca == 1) { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + g_ip->display_ip(); + solve(&fin_res); - output_UCA(&fin_res); - output_data_csv(fin_res); + output_UCA(&fin_res); + output_data_csv(fin_res); - delete (g_ip); - return fin_res; + delete (g_ip); + return fin_res; } //cacti6.5's plain interface, please keep !!! @@ -773,142 +722,139 @@ uca_org_t cacti_interface( int nuca_dev_func_area, int nuca_dev_func_cycle_time, int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported - int p_input) -{ - g_ip = new InputParameter(); - g_ip->add_ecc_b_ = true; + int p_input) { + g_ip = new InputParameter(); + g_ip->add_ecc_b_ = true; - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; - g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - g_ip->ic_proj_type = interconnect_projection_type_in; - g_ip->wire_is_mat_type = wire_inside_mat_type_in; - g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = burst_length; - g_ip->int_prefetch_w = pre_width; - g_ip->page_sz_bits = page_sz; + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = burst_length; + g_ip->int_prefetch_w = pre_width; + g_ip->page_sz_bits = page_sz; - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; - if (tag_width == 0) { - g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; - } + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (tag_width == 0) { + g_ip->tag_w = 42; + } else { + g_ip->tag_w = tag_width; + } - g_ip->access_mode = access_mode; - g_ip->delay_wt = obj_func_delay; - g_ip->dynamic_power_wt = obj_func_dynamic_power; - g_ip->leakage_power_wt = obj_func_leakage_power; - g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; - g_ip->delay_dev = dev_func_delay; - g_ip->dynamic_power_dev = dev_func_dynamic_power; - g_ip->leakage_power_dev = dev_func_leakage_power; - g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; - g_ip->ed = ed_ed2_none; + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->ed = ed_ed2_none; - switch(wt) { + switch (wt) { case (0): - g_ip->force_wiretype = 0; - g_ip->wt = Global; - break; + g_ip->force_wiretype = 0; + g_ip->wt = Global; + break; case (1): - g_ip->force_wiretype = 1; - g_ip->wt = Global; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global; + break; case (2): - g_ip->force_wiretype = 1; - g_ip->wt = Global_5; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_5; + break; case (3): - g_ip->force_wiretype = 1; - g_ip->wt = Global_10; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_10; + break; case (4): - g_ip->force_wiretype = 1; - g_ip->wt = Global_20; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_20; + break; case (5): - g_ip->force_wiretype = 1; - g_ip->wt = Global_30; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_30; + break; case (6): - g_ip->force_wiretype = 1; - g_ip->wt = Low_swing; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Low_swing; + break; default: - cout << "Unknown wire type!\n"; - exit(0); - } + cout << "Unknown wire type!\n"; + exit(0); + } - g_ip->delay_wt_nuca = nuca_obj_func_delay; - g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; - g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; - g_ip->area_wt_nuca = nuca_obj_func_area; - g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; - g_ip->delay_dev_nuca = dev_func_delay; - g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; - g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; - g_ip->area_dev_nuca = nuca_dev_func_area; - g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; - g_ip->nuca = is_nuca; - g_ip->nuca_bank_count = nuca_bank_count; - if(nuca_bank_count > 0) { - g_ip->force_nuca_bank = 1; - } - g_ip->cores = core_count; - g_ip->cache_level = cache_level; + g_ip->delay_wt_nuca = nuca_obj_func_delay; + g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; + g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; + g_ip->area_wt_nuca = nuca_obj_func_area; + g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; + g_ip->delay_dev_nuca = dev_func_delay; + g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; + g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; + g_ip->area_dev_nuca = nuca_dev_func_area; + g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; + g_ip->nuca = is_nuca; + g_ip->nuca_bank_count = nuca_bank_count; + if (nuca_bank_count > 0) { + g_ip->force_nuca_bank = 1; + } + g_ip->cores = core_count; + g_ip->cache_level = cache_level; - g_ip->temp = temp; + g_ip->temp = temp; - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache != 0) ? true : false; - g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache != 0) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; - g_ip->num_se_rd_ports = single_ended_read_ports; - g_ip->print_detail = 1; - g_ip->nuca = 0; + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->print_detail = 1; + g_ip->nuca = 0; - g_ip->wt = Global_5; - g_ip->force_cache_config = false; - g_ip->force_wiretype = false; - g_ip->print_input_args = p_input; + g_ip->wt = Global_5; + g_ip->force_cache_config = false; + g_ip->force_wiretype = false; + g_ip->print_input_args = p_input; - uca_org_t fin_res; - fin_res.valid = false; + uca_org_t fin_res; + fin_res.valid = false; - if (g_ip->error_checking() == false) exit(0); - if (g_ip->print_input_args) - g_ip->display_ip(); - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + if (g_ip->error_checking() == false) exit(0); + if (g_ip->print_input_args) + g_ip->display_ip(); + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. - if (g_ip->nuca == 1) - { - Nuca n(&g_tp.peri_global); - n.sim_nuca(); - } - solve(&fin_res); + if (g_ip->nuca == 1) { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + solve(&fin_res); - output_UCA(&fin_res); + output_UCA(&fin_res); - delete (g_ip); - return fin_res; + delete (g_ip); + return fin_res; } //McPAT's plain interface, please keep !!! @@ -964,200 +910,187 @@ uca_org_t cacti_interface( int ndcm, int ndsam1,//para50 int ndsam2, - int ecc) -{ - g_ip = new InputParameter(); + int ecc) { + g_ip = new InputParameter(); - uca_org_t fin_res; - fin_res.valid = false; + uca_org_t fin_res; + fin_res.valid = false; - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; - g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - g_ip->ic_proj_type = interconnect_projection_type_in; - g_ip->wire_is_mat_type = wire_inside_mat_type_in; - g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = BURST_LENGTH_in; - g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; - g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = BURST_LENGTH_in; + g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + g_ip->page_sz_bits = PAGE_SIZE_BITS_in; - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; - if (specific_tag == 0) { - g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; - } - - g_ip->access_mode = access_mode; - g_ip->delay_wt = obj_func_delay; - g_ip->dynamic_power_wt = obj_func_dynamic_power; - g_ip->leakage_power_wt = obj_func_leakage_power; - g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; - g_ip->delay_dev = dev_func_delay; - g_ip->dynamic_power_dev = dev_func_dynamic_power; - g_ip->leakage_power_dev = dev_func_leakage_power; - g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; - g_ip->temp = temp; - g_ip->ed = ed_ed2_none; - - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache ==1) ? true : false; - g_ip->pure_ram = (cache ==0) ? true : false; - g_ip->pure_cam = (cache ==2) ? true : false; - g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; - g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; - g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; - - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; - g_ip->num_se_rd_ports = single_ended_read_ports; - g_ip->num_search_ports = search_ports; - - g_ip->print_detail = 1; - g_ip->nuca = 0; - - if (force_wiretype == 0) - { - g_ip->wt = Global; - g_ip->force_wiretype = false; - } - else - { g_ip->force_wiretype = true; - if (wiretype==10) { - g_ip->wt = Global_10; - } - if (wiretype==20) { - g_ip->wt = Global_20; - } - if (wiretype==30) { - g_ip->wt = Global_30; - } - if (wiretype==5) { - g_ip->wt = Global_5; - } - if (wiretype==0) { - g_ip->wt = Low_swing; - } - } - //g_ip->wt = Global_5; - if (force_config == 0) - { - g_ip->force_cache_config = false; + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (specific_tag == 0) { + g_ip->tag_w = 42; + } else { + g_ip->tag_w = tag_width; } - else - { + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->temp = temp; + g_ip->ed = ed_ed2_none; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache == 1) ? true : false; + g_ip->pure_ram = (cache == 0) ? true : false; + g_ip->pure_cam = (cache == 2) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->num_search_ports = search_ports; + + g_ip->print_detail = 1; + g_ip->nuca = 0; + + if (force_wiretype == 0) { + g_ip->wt = Global; + g_ip->force_wiretype = false; + } else { + g_ip->force_wiretype = true; + if (wiretype == 10) { + g_ip->wt = Global_10; + } + if (wiretype == 20) { + g_ip->wt = Global_20; + } + if (wiretype == 30) { + g_ip->wt = Global_30; + } + if (wiretype == 5) { + g_ip->wt = Global_5; + } + if (wiretype == 0) { + g_ip->wt = Low_swing; + } + } + //g_ip->wt = Global_5; + if (force_config == 0) { + g_ip->force_cache_config = false; + } else { g_ip->force_cache_config = true; - g_ip->ndbl=ndbl; - g_ip->ndwl=ndwl; - g_ip->nspd=nspd; - g_ip->ndcm=ndcm; - g_ip->ndsam1=ndsam1; - g_ip->ndsam2=ndsam2; + g_ip->ndbl = ndbl; + g_ip->ndwl = ndwl; + g_ip->nspd = nspd; + g_ip->ndcm = ndcm; + g_ip->ndsam1 = ndsam1; + g_ip->ndsam2 = ndsam2; } - if (ecc==0){ - g_ip->add_ecc_b_=false; - } - else - { - g_ip->add_ecc_b_=true; - } + if (ecc == 0) { + g_ip->add_ecc_b_ = false; + } else { + g_ip->add_ecc_b_ = true; + } - if(!g_ip->error_checking()) - exit(0); + if (!g_ip->error_checking()) + exit(0); - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. - g_ip->display_ip(); - solve(&fin_res); - output_UCA(&fin_res); - output_data_csv(fin_res); - delete (g_ip); + g_ip->display_ip(); + solve(&fin_res); + output_UCA(&fin_res); + output_data_csv(fin_res); + delete (g_ip); - return fin_res; + return fin_res; } -bool InputParameter::error_checking() -{ - int A; - bool seq_access = false; - fast_access = true; +bool InputParameter::error_checking(string name) { + int A; + bool seq_access = false; + fast_access = true; - switch (access_mode) - { + switch (access_mode) { case 0: - seq_access = false; - fast_access = false; - break; + seq_access = false; + fast_access = false; + break; case 1: - seq_access = true; - fast_access = false; - break; + seq_access = true; + fast_access = false; + break; case 2: - seq_access = false; - fast_access = true; - break; - } - - if(is_main_mem) - { - if(ic_proj_type == 0) - { - cerr << "DRAM model supports only conservative interconnect projection!\n\n"; - return false; + seq_access = false; + fast_access = true; + break; + } + + if (is_main_mem) { + if (ic_proj_type == 0) { + cerr << name + << ": DRAM model supports only conservative interconnect " + << "projection but is set to aggressive!\n\n"; + return false; + } } - } - uint32_t B = line_sz; + uint32_t B = line_sz; - if (B < 1) - { - cerr << "Block size must >= 1" << endl; - return false; - } - else if (B*8 < out_w) - { - cerr << "Block size must be at least " << out_w/8 << endl; - return false; - } + if (B < 1) { + cerr << name << ": Block size must be >= 1, but is set to " << B + << endl; + return false; + } else if (B*8 < out_w) { + cerr << name << ": Block size must be at least " << out_w / 8 + << ", but is set to " << B << endl; + return false; + } - if (F_sz_um <= 0) - { - cerr << "Feature size must be > 0" << endl; - return false; - } - else if (F_sz_um > 0.091) - { - cerr << "Feature size must be <= 90 nm" << endl; - return false; - } + if (F_sz_um <= 0) { + cerr << name << ": Feature size must be > 0, but is set to " + << F_sz_um << endl; + return false; + } else if (F_sz_um > 0.091) { + cerr << name << ": Feature size must be <= 90 nm, but is set to " + << F_sz_um << endl; + return false; + } - uint32_t RWP = num_rw_ports; - uint32_t ERP = num_rd_ports; - uint32_t EWP = num_wr_ports; - uint32_t NSER = num_se_rd_ports; - uint32_t SCHP = num_search_ports; + uint32_t RWP = num_rw_ports; + uint32_t ERP = num_rd_ports; + uint32_t EWP = num_wr_ports; + uint32_t NSER = num_se_rd_ports; + uint32_t SCHP = num_search_ports; //TODO: revisit this. This is an important feature. Sheng thought this should be used // // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to @@ -1181,26 +1114,26 @@ bool InputParameter::error_checking() // return false; // } // else if ((RWP+ERP+EWP) < 1) - // Changed to new implementation: - // The number of ports specified at input is per bank - if ((RWP+ERP+EWP) < 1) - { - cerr << "Must have at least one port" << endl; - return false; - } + // Changed to new implementation: + // The number of ports specified at input is per bank + if ((RWP + ERP + EWP) < 1) { + cerr << name << ": Must have at least one port" << endl; + return false; + } - if (is_pow2(nbanks) == false) - { - cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl; - return false; - } + if (is_pow2(nbanks) == false) { + cerr << name << ": Number of subbanks should be greater than or " + << "equal to 1 and should be a power of 2, but is set to " + << nbanks << endl; + return false; + } - int C = cache_sz/nbanks; - if (C < 64) - { - cerr << "Cache size must >=64" << endl; - return false; - } + int C = cache_sz / nbanks; + if (C < 64) { + cerr << name << ": Cache size must be >=64, but is set to " << C + << endl; + return false; + } //TODO: revisit this // if (pure_ram==true && assoc!=1) @@ -1210,54 +1143,64 @@ bool InputParameter::error_checking() // } //fully assoc and cam check - if (is_cache && assoc==0) - fully_assoc =true; + if (is_cache && assoc == 0) + fully_assoc = true; else fully_assoc = false; - if (pure_cam==true && assoc!=0) - { - cerr << "Pure CAM must have associativity as 0" << endl; - return false; + if (pure_cam == true && assoc != 0) { + cerr << name + << ": Pure CAM must have associativity as 0, but is set to" + << assoc << endl; + return false; } - if (assoc==0 && (pure_cam==false && is_cache ==false)) - { - cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl; - return false; + if (assoc == 0 && (pure_cam == false && is_cache == false)) { + cerr << name + << ": Only CAM or Fully associative cache can have associativity " + << "as 0" << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type - || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type )) - { - cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type + || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) { + cerr << name + << ": CAM and fully associative cache must have same device type " + << "for both data and tag array" << endl; + cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type + << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl + << "\tData array peripheral = " << data_arr_peri_global_tech_type + << ", Tag array peripheral = " << tag_arr_peri_global_tech_type + << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram)) - { - cerr << "DRAM based CAM and fully associative cache are not supported" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram)) { + cerr << name << ": DRAM based CAM and fully associative cache are not " + << "supported" << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (is_main_mem==true)) - { - cerr << "CAM and fully associative cache cannot be as main memory" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (is_main_mem == true)) { + cerr << name + << ": CAM and fully associative cache cannot be as main memory" + << endl; + return false; } - if ((fully_assoc || pure_cam) && SCHP<1) - { - cerr << "CAM and fully associative must have at least 1 search port" << endl; - return false; + if ((fully_assoc || pure_cam) && SCHP < 1) { + cerr << name + << ": CAM and fully associative must have at least 1 search port," + << " but are set to " << SCHP << endl; + return false; } - if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam))) - { - ERP=SCHP; + if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) { + ERP = SCHP; } // if ((!(fully_assoc || pure_cam)) && SCHP>=1) @@ -1266,140 +1209,112 @@ bool InputParameter::error_checking() // return false; // } - if (assoc == 0) - { - A = C/B; - //fully_assoc = true; - } - else - { - if (assoc == 1) - { - A = 1; - //fully_assoc = false; + if (assoc == 0) { + A = C / B; + //fully_assoc = true; + } else { + if (assoc == 1) { + A = 1; + //fully_assoc = false; + } else { + //fully_assoc = false; + A = assoc; + if (is_pow2(A) == false) { + cerr << name + << ": Associativity must be a power of 2, but is set to " + << A << endl; + return false; + } + } } - else - { - //fully_assoc = false; - A = assoc; - if (is_pow2(A) == false) - { - cerr << "Associativity must be a power of 2" << endl; + + if (C / (B*A) <= 1 && assoc != 0) { + cerr << name << ": Number of sets (" << (C / (B * A)) + << ") is too small: " << endl; + cerr << " Need to either increase cache size, or decrease " + << "associativity or block size" << endl; + cerr << " (or use fully associative cache)" << endl; return false; - } } - } - if (C/(B*A) <= 1 && assoc!=0) - { - cerr << "Number of sets is too small: " << endl; - cerr << " Need to either increase cache size, or decrease associativity or block size" << endl; - cerr << " (or use fully associative cache)" << endl; - return false; - } + block_sz = B; - block_sz = B; + /*dt: testing sequential access mode*/ + if (seq_access) { + tag_assoc = A; + data_assoc = 1; + is_seq_acc = true; + } else { + tag_assoc = A; + data_assoc = A; + is_seq_acc = false; + } - /*dt: testing sequential access mode*/ - if(seq_access) - { - tag_assoc = A; - data_assoc = 1; - is_seq_acc = true; - } - else - { - tag_assoc = A; - data_assoc = A; - is_seq_acc = false; - } + if (assoc == 0) { + data_assoc = 1; + } + num_rw_ports = RWP; + num_rd_ports = ERP; + num_wr_ports = EWP; + num_se_rd_ports = NSER; + if (!(fully_assoc || pure_cam)) + num_search_ports = 0; + nsets = C / (B * A); - if (assoc==0) - { - data_assoc = 1; - } - num_rw_ports = RWP; - num_rd_ports = ERP; - num_wr_ports = EWP; - num_se_rd_ports = NSER; - if (!(fully_assoc || pure_cam)) - num_search_ports = 0; - nsets = C/(B*A); + if (temp < 300 || temp > 400 || temp % 10 != 0) { + cerr << name << ": " << temp + << " Temperature must be between 300 and 400 Kelvin and multiple " + << "of 10." << endl; + return false; + } - if (temp < 300 || temp > 400 || temp%10 != 0) - { - cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl; - return false; - } + if (nsets < 1) { + cerr << name << ": Less than one set..." << endl; + return false; + } - if (nsets < 1) - { - cerr << "Less than one set..." << endl; - return false; - } - - return true; + return true; } -void output_data_csv(const uca_org_t & fin_res) -{ - //TODO: the csv output should remain - fstream file("out.csv", ios::in); - bool print_index = file.fail(); - file.close(); +void output_data_csv(const uca_org_t & fin_res) { + //TODO: the csv output should remain + fstream file("out.csv", ios::in); + bool print_index = file.fail(); + file.close(); - file.open("out.csv", ios::out|ios::app); - if (file.fail() == true) - { - cerr << "File out.csv could not be opened successfully" << endl; - } - else - { - if (print_index == true) - { - file << "Tech node (nm), "; - file << "Capacity (bytes), "; - file << "Number of banks, "; - file << "Associativity, "; - file << "Output width (bits), "; - file << "Access time (ns), "; - file << "Random cycle time (ns), "; -// file << "Multisubbank interleave cycle time (ns), "; - -// file << "Delay request network (ns), "; -// file << "Delay inside mat (ns), "; -// file << "Delay reply network (ns), "; -// file << "Tag array access time (ns), "; -// file << "Data array access time (ns), "; -// file << "Refresh period (microsec), "; -// file << "DRAM array availability (%), "; - file << "Dynamic search energy (nJ), "; - file << "Dynamic read energy (nJ), "; - file << "Dynamic write energy (nJ), "; -// file << "Tag Dynamic read energy (nJ), "; -// file << "Data Dynamic read energy (nJ), "; -// file << "Dynamic read power (mW), "; - file << "Standby leakage per bank(mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Refresh power as percentage of standby leakage, "; - file << "Area (mm2), "; - file << "Ndwl, "; - file << "Ndbl, "; - file << "Nspd, "; - file << "Ndcm, "; - file << "Ndsam_level_1, "; - file << "Ndsam_level_2, "; - file << "Data arrary area efficiency %, "; - file << "Ntwl, "; - file << "Ntbl, "; - file << "Ntspd, "; - file << "Ntcm, "; - file << "Ntsam_level_1, "; - file << "Ntsam_level_2, "; - file << "Tag arrary area efficiency %, "; + file.open("out.csv", ios::out | ios::app); + if (file.fail() == true) { + cerr << "File out.csv could not be opened successfully" << endl; + } else { + if (print_index == true) { + file << "Tech node (nm), "; + file << "Capacity (bytes), "; + file << "Number of banks, "; + file << "Associativity, "; + file << "Output width (bits), "; + file << "Access time (ns), "; + file << "Random cycle time (ns), "; + file << "Dynamic search energy (nJ), "; + file << "Dynamic read energy (nJ), "; + file << "Dynamic write energy (nJ), "; + file << "Standby leakage per bank(mW), "; + file << "Area (mm2), "; + file << "Ndwl, "; + file << "Ndbl, "; + file << "Nspd, "; + file << "Ndcm, "; + file << "Ndsam_level_1, "; + file << "Ndsam_level_2, "; + file << "Data arrary area efficiency %, "; + file << "Ntwl, "; + file << "Ntbl, "; + file << "Ntspd, "; + file << "Ntcm, "; + file << "Ntsam_level_1, "; + file << "Ntsam_level_2, "; + file << "Tag arrary area efficiency %, "; // file << "Resistance per unit micron (ohm-micron), "; // file << "Capacitance per unit micron (fF per micron), "; @@ -1428,15 +1343,15 @@ void output_data_csv(const uca_org_t & fin_res) // file << "Delay opt (perc), "; // file << "Repeater opt (perc), "; // file << "Aspect ratio"; - file << endl; - } - file << g_ip->F_sz_nm << ", "; - file << g_ip->cache_sz << ", "; - file << g_ip->nbanks << ", "; - file << g_ip->tag_assoc << ", "; - file << g_ip->out_w << ", "; - file << fin_res.access_time*1e+9 << ", "; - file << fin_res.cycle_time*1e+9 << ", "; + file << endl; + } + file << g_ip->F_sz_nm << ", "; + file << g_ip->cache_sz << ", "; + file << g_ip->nbanks << ", "; + file << g_ip->tag_assoc << ", "; + file << g_ip->out_w << ", "; + file << fin_res.access_time*1e+9 << ", "; + file << fin_res.cycle_time*1e+9 << ", "; // file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; // file << fin_res.data_array2->delay_request_network*1e+9 << ", "; // file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; @@ -1453,16 +1368,13 @@ void output_data_csv(const uca_org_t & fin_res) // file << fin_res.data_array2->access_time*1e+9 << ", "; // file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; // file << fin_res.data_array2->dram_array_availability << ", "; - if (g_ip->fully_assoc || g_ip->pure_cam) - { - file << fin_res.power.searchOp.dynamic*1e+9 << ", "; - } - else - { - file << "N/A" << ", "; - } - file << fin_res.power.readOp.dynamic*1e+9 << ", "; - file << fin_res.power.writeOp.dynamic*1e+9 << ", "; + if (g_ip->fully_assoc || g_ip->pure_cam) { + file << fin_res.power.searchOp.dynamic*1e+9 << ", "; + } else { + file << "N/A" << ", "; + } + file << fin_res.power.readOp.dynamic*1e+9 << ", "; + file << fin_res.power.writeOp.dynamic*1e+9 << ", "; // if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) // { // file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; @@ -1484,27 +1396,24 @@ void output_data_csv(const uca_org_t & fin_res) file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; // file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; // file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; - file << fin_res.area*1e-6 << ", "; + file << fin_res.area*1e-6 << ", "; - file << fin_res.data_array2->Ndwl << ", "; - file << fin_res.data_array2->Ndbl << ", "; - file << fin_res.data_array2->Nspd << ", "; - file << fin_res.data_array2->deg_bl_muxing << ", "; - file << fin_res.data_array2->Ndsam_lev_1 << ", "; - file << fin_res.data_array2->Ndsam_lev_2 << ", "; - file << fin_res.data_array2->area_efficiency << ", "; - if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) - { - file << fin_res.tag_array2->Ndwl << ", "; - file << fin_res.tag_array2->Ndbl << ", "; - file << fin_res.tag_array2->Nspd << ", "; - file << fin_res.tag_array2->deg_bl_muxing << ", "; - file << fin_res.tag_array2->Ndsam_lev_1 << ", "; - file << fin_res.tag_array2->Ndsam_lev_2 << ", "; - file << fin_res.tag_array2->area_efficiency << ", "; - } - else - { + file << fin_res.data_array2->Ndwl << ", "; + file << fin_res.data_array2->Ndbl << ", "; + file << fin_res.data_array2->Nspd << ", "; + file << fin_res.data_array2->deg_bl_muxing << ", "; + file << fin_res.data_array2->Ndsam_lev_1 << ", "; + file << fin_res.data_array2->Ndsam_lev_2 << ", "; + file << fin_res.data_array2->area_efficiency << ", "; + if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) { + file << fin_res.tag_array2->Ndwl << ", "; + file << fin_res.tag_array2->Ndbl << ", "; + file << fin_res.tag_array2->Nspd << ", "; + file << fin_res.tag_array2->deg_bl_muxing << ", "; + file << fin_res.tag_array2->Ndsam_lev_1 << ", "; + file << fin_res.tag_array2->Ndsam_lev_2 << ", "; + file << fin_res.tag_array2->area_efficiency << ", "; + } else { file << "N/A" << ", "; file << "N/A"<< ", "; file << "N/A" << ", "; @@ -1535,803 +1444,552 @@ void output_data_csv(const uca_org_t & fin_res) // file << fin_res.data_array.cas_latency * 1e9 << ", " ; // file << fin_res.data_array.precharge_delay * 1e9 << ", " ; // file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; - file<cache_sz); - } - else { - if (g_ip->data_arr_ram_cell_tech_type == 3) { - cout << "\n---------- CACTI version 6.5, Uniform Cache Access " << - "Logic Process Based DRAM Model ----------\n"; - } - else if (g_ip->data_arr_ram_cell_tech_type == 4) { - cout << "\n---------- CACTI version 6.5, Uniform" << - "Cache Access Commodity DRAM Model ----------\n"; +void output_UCA(uca_org_t *fr) { + // if (NUCA) + if (0) { + cout << "\n\n Detailed Bank Stats:\n"; + cout << " Bank Size (bytes): %d\n" << + (int) (g_ip->cache_sz); + } else { + if (g_ip->data_arr_ram_cell_tech_type == 3) { + cout << "\n---------- CACTI version 6.5, Uniform Cache Access " << + "Logic Process Based DRAM Model ----------\n"; + } else if (g_ip->data_arr_ram_cell_tech_type == 4) { + cout << "\n---------- CACTI version 6.5, Uniform" << + "Cache Access Commodity DRAM Model ----------\n"; + } else { + cout << "\n---------- CACTI version 6.5, Uniform Cache Access " + "SRAM Model ----------\n"; + } + cout << "\nCache Parameters:\n"; + cout << " Total cache size (bytes): " << + (int) (g_ip->cache_sz) << endl; } + + cout << " Number of banks: " << (int) g_ip->nbanks << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) + cout << " Associativity: fully associative\n"; else { - cout << "\n---------- CACTI version 6.5, Uniform Cache Access " - "SRAM Model ----------\n"; + if (g_ip->tag_assoc == 1) + cout << " Associativity: direct mapped\n"; + else + cout << " Associativity: " << + g_ip->tag_assoc << endl; } - cout << "\nCache Parameters:\n"; - cout << " Total cache size (bytes): " << - (int) (g_ip->cache_sz) << endl; - } - - cout << " Number of banks: " << (int) g_ip->nbanks << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) - cout << " Associativity: fully associative\n"; - else { - if (g_ip->tag_assoc == 1) - cout << " Associativity: direct mapped\n"; - else - cout << " Associativity: " << - g_ip->tag_assoc << endl; - } - cout << " Block size (bytes): " << g_ip->line_sz << endl; - cout << " Read/write Ports: " << - g_ip->num_rw_ports << endl; - cout << " Read ports: " << - g_ip->num_rd_ports << endl; - cout << " Write ports: " << - g_ip->num_wr_ports << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) - cout << " search ports: " << - g_ip->num_search_ports << endl; - cout << " Technology size (nm): " << - g_ip->F_sz_nm << endl << endl; + cout << " Block size (bytes): " << g_ip->line_sz << endl; + cout << " Read/write Ports: " << + g_ip->num_rw_ports << endl; + cout << " Read ports: " << + g_ip->num_rd_ports << endl; + cout << " Write ports: " << + g_ip->num_wr_ports << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) + cout << " search ports: " << + g_ip->num_search_ports << endl; + cout << " Technology size (nm): " << + g_ip->F_sz_nm << endl << endl; - cout << " Access time (ns): " << fr->access_time*1e9 << endl; - cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; - if (g_ip->data_arr_ram_cell_tech_type >= 4) { - cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; - cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; - cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; - cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; - cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; - cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; - cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; - cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; - cout << " Refresh power (mW): " << - fr->data_array2->refresh_power*1e3 << endl; - } - else { - if ((g_ip->fully_assoc|| g_ip->pure_cam)) - { - cout << " Total dynamic associative search energy per access (nJ): " << - fr->power.searchOp.dynamic*1e9 << endl; + cout << " Access time (ns): " << fr->access_time*1e9 << endl; + cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; + if (g_ip->data_arr_ram_cell_tech_type >= 4) { + cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; + cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; + cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; + cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; + cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; + cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; + cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; + cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; + cout << " Refresh power (mW): " << + fr->data_array2->refresh_power*1e3 << endl; + } else { + if ((g_ip->fully_assoc || g_ip->pure_cam)) { + cout << " Total dynamic associative search energy per access (nJ): " << + fr->power.searchOp.dynamic*1e9 << endl; // cout << " Total dynamic read energy per access (nJ): " << // fr->power.readOp.dynamic*1e9 << endl; // cout << " Total dynamic write energy per access (nJ): " << // fr->power.writeOp.dynamic*1e9 << endl; - } + } // else // { - cout << " Total dynamic read energy per access (nJ): " << - fr->power.readOp.dynamic*1e9 << endl; - cout << " Total dynamic write energy per access (nJ): " << - fr->power.writeOp.dynamic*1e9 << endl; + cout << " Total dynamic read energy per access (nJ): " << + fr->power.readOp.dynamic*1e9 << endl; + cout << " Total dynamic write energy per access (nJ): " << + fr->power.writeOp.dynamic*1e9 << endl; // } - cout << " Total leakage power of a bank" - " (mW): " << fr->power.readOp.leakage*1e3 << endl; - cout << " Total gate leakage power of a bank" - " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; - } + cout << " Total leakage power of a bank" + " (mW): " << fr->power.readOp.leakage*1e3 << endl; + cout << " Total gate leakage power of a bank" + " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; + } - if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4) - { - } - cout << " Cache height x width (mm): " << - fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; + if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) { + } + cout << " Cache height x width (mm): " << + fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; - cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; - cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; - cout << " Best Nspd : " << fr->data_array2->Nspd << endl; - cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; - cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; - cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; + cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; + cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; + cout << " Best Nspd : " << fr->data_array2->Nspd << endl; + cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; + cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; + cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; - cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; - cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; - cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; - cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; - cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; - } + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; + cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; + cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; + cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; + cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; + cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; + } - switch (fr->data_array2->wt) { + switch (fr->data_array2->wt) { case (0): - cout << " Data array, H-tree wire type: Delay optimized global wires\n"; - break; + cout << " Data array, H-tree wire type: Delay optimized global wires\n"; + break; case (1): - cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; - break; + cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; + break; case (2): - cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; - break; + cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; + break; case (3): - cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; - break; + cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; + break; case (4): - cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; - break; + cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; + break; case (5): - cout << " Data array, wire type: Low swing wires\n"; - break; + cout << " Data array, wire type: Low swing wires\n"; + break; default: - cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) { - switch (fr->tag_array2->wt) { - case (0): - cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; - break; - case (1): - cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; - break; - case (2): - cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; - break; - case (3): - cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; - break; - case (4): - cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; - break; - case (5): - cout << " Tag array, wire type: Low swing wires\n"; - break; - default: - cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <print_detail) - { - //if(g_ip->fully_assoc) return; - - /* Delay stats */ - /* data array stats */ - cout << endl << "Time Components:" << endl << endl; - - cout << " Data side (with Output driver) (ns): " << - fr->data_array2->access_time/1e-9 << endl; - - cout << "\tH-tree input delay (ns): " << - fr->data_array2->delay_route_to_bank * 1e9 + - fr->data_array2->delay_input_htree * 1e9 << endl; - - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << "\tDecoder + wordline delay (ns): " << - fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->data_array2->delay_row_decoder * 1e9 << endl; - } - else - { - cout << "\tCAM search delay (ns): " << - fr->data_array2->delay_matchlines * 1e9 << endl; + cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl; + exit(0); } - cout << "\tBitline delay (ns): " << - fr->data_array2->delay_bitlines/1e-9 << endl; - - cout << "\tSense Amplifier delay (ns): " << - fr->data_array2->delay_sense_amp * 1e9 << endl; - - - cout << "\tH-tree output delay (ns): " << - fr->data_array2->delay_subarray_output_driver * 1e9 + - fr->data_array2->delay_dout_htree * 1e9 << endl; - - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - /* tag array stats */ - cout << endl << " Tag side (with Output driver) (ns): " << - fr->tag_array2->access_time/1e-9 << endl; - - cout << "\tH-tree input delay (ns): " << - fr->tag_array2->delay_route_to_bank * 1e9 + - fr->tag_array2->delay_input_htree * 1e9 << endl; - - cout << "\tDecoder + wordline delay (ns): " << - fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->tag_array2->delay_row_decoder * 1e9 << endl; - - cout << "\tBitline delay (ns): " << - fr->tag_array2->delay_bitlines/1e-9 << endl; - - cout << "\tSense Amplifier delay (ns): " << - fr->tag_array2->delay_sense_amp * 1e9 << endl; - - cout << "\tComparator delay (ns): " << - fr->tag_array2->delay_comparator * 1e9 << endl; - - cout << "\tH-tree output delay (ns): " << - fr->tag_array2->delay_subarray_output_driver * 1e9 + - fr->tag_array2->delay_dout_htree * 1e9 << endl; + if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { + switch (fr->tag_array2->wt) { + case (0): + cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; + break; + case (1): + cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; + break; + case (2): + cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; + break; + case (3): + cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; + break; + case (4): + cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; + break; + case (5): + cout << " Tag array, wire type: Low swing wires\n"; + break; + default: + cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl; + exit(-1); + } } + if (g_ip->print_detail) { + /* Delay stats */ + /* data array stats */ + cout << endl << "Time Components:" << endl << endl; + + cout << " Data side (with Output driver) (ns): " << + fr->data_array2->access_time / 1e-9 << endl; + + cout << "\tH-tree input delay (ns): " << + fr->data_array2->delay_route_to_bank * 1e9 + + fr->data_array2->delay_input_htree * 1e9 << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << "\tDecoder + wordline delay (ns): " << + fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->data_array2->delay_row_decoder * 1e9 << endl; + } else { + cout << "\tCAM search delay (ns): " << + fr->data_array2->delay_matchlines * 1e9 << endl; + } + + cout << "\tBitline delay (ns): " << + fr->data_array2->delay_bitlines / 1e-9 << endl; + + cout << "\tSense Amplifier delay (ns): " << + fr->data_array2->delay_sense_amp * 1e9 << endl; - /* Energy/Power stats */ - cout << endl << endl << "Power Components:" << endl << endl; + cout << "\tH-tree output delay (ns): " << + fr->data_array2->delay_subarray_output_driver * 1e9 + + fr->data_array2->delay_dout_htree * 1e9 << endl; - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << " Data array: Total dynamic read energy/access (nJ): " << - fr->data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal leakage read/write power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + /* tag array stats */ + cout << endl << " Tag side (with Output driver) (ns): " << + fr->tag_array2->access_time / 1e-9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tH-tree input delay (ns): " << + fr->tag_array2->delay_route_to_bank * 1e9 + + fr->tag_array2->delay_input_htree * 1e9 << endl; - cout << "\tTotal leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tDecoder + wordline delay (ns): " << + fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->tag_array2->delay_row_decoder * 1e9 << endl; + + cout << "\tBitline delay (ns): " << + fr->tag_array2->delay_bitlines / 1e-9 << endl; + + cout << "\tSense Amplifier delay (ns): " << + fr->tag_array2->delay_sense_amp * 1e9 << endl; + + cout << "\tComparator delay (ns): " << + fr->tag_array2->delay_comparator * 1e9 << endl; + + cout << "\tH-tree output delay (ns): " << + fr->tag_array2->delay_subarray_output_driver * 1e9 + + fr->tag_array2->delay_dout_htree * 1e9 << endl; + } + + + + /* Energy/Power stats */ + cout << endl << endl << "Power Components:" << endl << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << " Data array: Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal leakage read/write power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->data_array2->power_addr_input_htree.readOp.leakage + fr->data_array2->power_data_output_htree.readOp.leakage + - fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 + << endl; - cout << "\tTotal gate leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + fr->data_array2->power_data_output_htree.readOp.gate_leakage + - fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; + fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * + 1e3 << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - } - - else if (g_ip->pure_cam) - { - - cout << " CAM array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "match key and data transfer) (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic + - fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; - cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; - cout << "\tSearchlines (nJ): " << - fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tMatchlines (nJ): " << - fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - - - cout <data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - - cout << endl <<" Total leakage power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; } - else - { - cout << " Fully associative array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "match key and data transfer) (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic + - fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; - cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; - cout << "\tSearchlines (nJ): " << - fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tMatchlines (nJ): " << - fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tData portion wordline (nJ): " << - fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; - cout << "\tData Bitlines (nJ): " << - fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; + + else if (g_ip->pure_cam) { + + cout << " CAM array:" << endl; + cout << " Total dynamic associative search energy/access (nJ): " << + fr->data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - cout <data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + cout << endl << " Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - cout << endl <<" Total leakage power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; - } + cout << endl << " Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + } else { + cout << " Fully associative array:" << endl; + cout << " Total dynamic associative search energy/access (nJ): " << + fr->data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tData portion wordline (nJ): " << + fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; + cout << "\tData Bitlines (nJ): " << + fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << - fr->tag_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal leakage read/write power of a bank (mW): " << - fr->tag_array2->power.readOp.leakage * 1e3 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->tag_array2->power_addr_input_htree.readOp.dynamic + - fr->tag_array2->power_data_output_htree.readOp.dynamic + - fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << endl << " Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - cout << "\tTotal leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << endl << " Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + } + + + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << + fr->tag_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal leakage read/write power of a bank (mW): " << + fr->tag_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->tag_array2->power_addr_input_htree.readOp.dynamic + + fr->tag_array2->power_data_output_htree.readOp.dynamic + + fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->tag_array2->power_addr_input_htree.readOp.leakage + fr->tag_array2->power_data_output_htree.readOp.leakage + - fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 + << endl; - cout << "\tTotal gate leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + fr->tag_array2->power_data_output_htree.readOp.gate_leakage + - fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; + fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * + 1e3 << endl; - cout << "\tOutput Htree inside a bank Energy (nJ): " << - fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + cout << "\tOutput Htree inside a bank Energy (nJ): " << + fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + } + + cout << endl << endl << "Area Components:" << endl << endl; + /* Data array area stats */ + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else if (g_ip->pure_cam) + cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else + cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->data_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->data_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->data_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->data_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->data_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->data_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->data_array2->subarray_length*1e-3 << endl; + } + + /* Tag array area stats */ + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->tag_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->tag_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->tag_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->tag_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->tag_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->tag_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->tag_array2->subarray_length*1e-3 << endl; + } + } + Wire wpr; + wpr.print_wire(); } - - cout << endl << endl << "Area Components:" << endl << endl; - /* Data array area stats */ - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else if (g_ip->pure_cam) - cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else - cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - cout << "\tHeight (mm): " << - fr->data_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->data_array2->all_banks_width*1e-3 << endl; - if (g_ip->print_detail) { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->data_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->data_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->data_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->data_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->data_array2->subarray_length*1e-3 << endl; - } - - /* Tag array area stats */ - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; - cout << "\tHeight (mm): " << - fr->tag_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->tag_array2->all_banks_width*1e-3 << endl; - if (g_ip->print_detail) - { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->tag_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->tag_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->tag_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->tag_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->tag_array2->subarray_length*1e-3 << endl; - } - } - Wire wpr; - wpr.print_wire(); - - //cout << "FO4 = " << g_tp.FO4 << endl; - } } //McPAT's plain interface, please keep !!! -uca_org_t cacti_interface(InputParameter * const local_interface) -{ -// g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; +uca_org_t cacti_interface(InputParameter * const local_interface) { + uca_org_t fin_res; + fin_res.valid = false; - uca_org_t fin_res; - fin_res.valid = false; + g_ip = local_interface; - g_ip = local_interface; + if (!g_ip->error_checking()) { + exit(0); + } + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// g_ip->is_cache=true; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } + solve(&fin_res); - - g_ip->error_checking(); - - - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. - - solve(&fin_res); - -// g_ip->display_ip(); -// output_UCA(&fin_res); -// output_data_csv(fin_res); - - // delete (g_ip); - - return fin_res; + return fin_res; } //McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter* const local_interface) -{ - // g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; +uca_org_t init_interface(InputParameter* const local_interface, + const string &name) { + uca_org_t fin_res; + fin_res.valid = false; - uca_org_t fin_res; - fin_res.valid = false; + g_ip = local_interface; - g_ip = local_interface; + if (!g_ip->error_checking(name)) { + exit(0); + } - -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } - - - g_ip->error_checking(); - - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. - //solve(&fin_res); - //g_ip->display_ip(); - - //solve(&fin_res); - //output_UCA(&fin_res); - //output_data_csv(fin_res); - // delete (g_ip); - - return fin_res; + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + return fin_res; } void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc old mode 100755 new mode 100644 index ef98107c7..447996053 --- a/ext/mcpat/cacti/mat.cc +++ b/ext/mcpat/cacti/mat.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,371 +37,369 @@ #include "mat.h" Mat::Mat(const DynamicParameter & dyn_p) - :dp(dyn_p), - power_subarray_out_drv(), - delay_fa_tag(0), delay_cam(0), - delay_before_decoder(0), delay_bitline(0), - delay_wl_reset(0), delay_bl_restore(0), - delay_searchline(0), delay_matchchline(0), - delay_cam_sl_restore(0), delay_cam_ml_reset(0), - delay_fa_ram_wl(0),delay_hit_miss_reset(0), - delay_hit_miss(0), - subarray(dp, dp.fully_assoc), - power_bitline(), per_bitline_read_energy(0), - deg_bl_muxing(dp.deg_bl_muxing), - num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), - delay_writeback(0), - cell(subarray.cell), cam_cell(subarray.cam_cell), - is_dram(dyn_p.is_dram), - pure_cam(dyn_p.pure_cam), - num_mats(dp.num_mats), - power_sa(), delay_sa(0), - leak_power_sense_amps_closed_page_state(0), - leak_power_sense_amps_open_page_state(0), - delay_subarray_out_drv(0), - delay_comparator(0), power_comparator(), - num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), - num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), - num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir) -{ - assert(num_subarrays_per_mat <= 4); - assert(num_subarrays_per_row <= 2); - is_fa = (dp.fully_assoc) ? true : false; - camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. + : dp(dyn_p), + power_subarray_out_drv(), + delay_fa_tag(0), delay_cam(0), + delay_before_decoder(0), delay_bitline(0), + delay_wl_reset(0), delay_bl_restore(0), + delay_searchline(0), delay_matchchline(0), + delay_cam_sl_restore(0), delay_cam_ml_reset(0), + delay_fa_ram_wl(0), delay_hit_miss_reset(0), + delay_hit_miss(0), + subarray(dp, dp.fully_assoc), + power_bitline(), per_bitline_read_energy(0), + deg_bl_muxing(dp.deg_bl_muxing), + num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), + delay_writeback(0), + cell(subarray.cell), cam_cell(subarray.cam_cell), + is_dram(dyn_p.is_dram), + pure_cam(dyn_p.pure_cam), + num_mats(dp.num_mats), + power_sa(), delay_sa(0), + leak_power_sense_amps_closed_page_state(0), + leak_power_sense_amps_open_page_state(0), + delay_subarray_out_drv(0), + delay_comparator(0), power_comparator(), + num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), + num_subarrays_per_mat(dp.num_subarrays / dp.num_mats), + num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) { + assert(num_subarrays_per_mat <= 4); + assert(num_subarrays_per_row <= 2); + is_fa = (dp.fully_assoc) ? true : false; + camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. - if (is_fa || pure_cam) - num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; - - if (dp.use_inp_params == 1) { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - - } - - double number_sa_subarray; - - if (!is_fa && !pure_cam) - { - number_sa_subarray = subarray.num_cols / deg_bl_muxing; - } - else if (is_fa && !pure_cam) - { - number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; - } - - else - { - number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; - } - - int num_dec_signals = subarray.num_rows; - double C_ld_bit_mux_dec_out = 0; - double C_ld_sa_mux_lev_1_dec_out = 0; - double C_ld_sa_mux_lev_2_dec_out = 0; - double R_wire_wl_drv_out; - - if (!is_fa && !pure_cam) - { - R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + if (is_fa || pure_cam) { + num_subarrays_per_row = num_subarrays_per_mat > 2 ? + num_subarrays_per_mat / 2 : num_subarrays_per_mat; } - else if (is_fa && !pure_cam) - { + + if (dp.use_inp_params == 1) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + + } + + double number_sa_subarray; + + if (!is_fa && !pure_cam) { + number_sa_subarray = subarray.num_cols / deg_bl_muxing; + } else if (is_fa && !pure_cam) { + number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + } + + else { + number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; + } + + int num_dec_signals = subarray.num_rows; + double C_ld_bit_mux_dec_out = 0; + double C_ld_sa_mux_lev_1_dec_out = 0; + double C_ld_sa_mux_lev_2_dec_out = 0; + double R_wire_wl_drv_out; + + if (!is_fa && !pure_cam) { + R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + } else if (is_fa && !pure_cam) { R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; - } - else - { + } else { R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; } - double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA - double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; + double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA + double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; - if (deg_bl_muxing > 1) - { - C_ld_bit_mux_dec_out = - (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } + if (deg_bl_muxing > 1) { + C_ld_bit_mux_dec_out = + (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) * + gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } - if (dp.Ndsam_lev_1 > 1) - { - C_ld_sa_mux_lev_1_dec_out = - (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - if (dp.Ndsam_lev_2 > 1) - { - C_ld_sa_mux_lev_2_dec_out = - (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } + if (dp.Ndsam_lev_1 > 1) { + C_ld_sa_mux_lev_1_dec_out = + (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + if (dp.Ndsam_lev_2 > 1) { + C_ld_sa_mux_lev_2_dec_out = + (num_subarrays_per_mat * number_sa_subarray / + (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } - if (num_subarrays_per_row >= 2) - { - // wire heads for both right and left side of a mat, so half the resistance - R_wire_bit_mux_dec_out /= 2.0; - R_wire_sa_mux_dec_out /= 2.0; - } + if (num_subarrays_per_row >= 2) { + // wire heads for both right and left side of a mat, so half the resistance + R_wire_bit_mux_dec_out /= 2.0; + R_wire_sa_mux_dec_out /= 2.0; + } - row_dec = new Decoder( - num_dec_signals, - false, - subarray.C_wl, - R_wire_wl_drv_out, - false/*is_fa*/, - is_dram, - true, - camFlag? cam_cell:cell); + row_dec = new Decoder( + num_dec_signals, + false, + subarray.C_wl, + R_wire_wl_drv_out, + false/*is_fa*/, + is_dram, + true, + camFlag ? cam_cell : cell); // if (is_fa && (!dp.is_tag)) // { // row_dec->exist = true; // } - bit_mux_dec = new Decoder( - deg_bl_muxing,// This number is 1 for FA or CAM - false, - C_ld_bit_mux_dec_out, - R_wire_bit_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_1_dec = new Decoder( - dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM - dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal - C_ld_sa_mux_lev_1_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_2_dec = new Decoder( - dp.Ndsam_lev_2, // This number is 1 for FA or CAM - false, - C_ld_sa_mux_lev_2_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); + bit_mux_dec = new Decoder( + deg_bl_muxing,// This number is 1 for FA or CAM + false, + C_ld_bit_mux_dec_out, + R_wire_bit_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_1_dec = new Decoder( + dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM + dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal + C_ld_sa_mux_lev_1_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_2_dec = new Decoder( + dp.Ndsam_lev_2, // This number is 1 for FA or CAM + false, + C_ld_sa_mux_lev_2_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); - double C_wire_predec_blk_out; - double R_wire_predec_blk_out; + double C_wire_predec_blk_out; + double R_wire_predec_blk_out; - if (!is_fa && !pure_cam) - { + if (!is_fa && !pure_cam) { - C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; - R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; + C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; - } - else //for pre-decode block's load is same for both FA and CAM - { - C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; - R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; - } + } else { //for pre-decode block's load is same for both FA and CAM + C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; + R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; + } - if (is_fa||pure_cam) - num_dec_signals += _log2(num_subarrays_per_mat); + if (is_fa || pure_cam) + num_dec_signals += _log2(num_subarrays_per_mat); - PredecBlk * r_predec_blk1 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - true); - PredecBlk * r_predec_blk2 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - false); - PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); - PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); - dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); - dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); + PredecBlk * r_predec_blk1 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + true); + PredecBlk * r_predec_blk2 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + false); + PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); + PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); + dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); + dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); - PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); - PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); - way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); - dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); + PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); + PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); + way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); + dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); - r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); - b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); - sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); - sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); + r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); + b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); + sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); + sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); - subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng + subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; - if (is_fa || pure_cam) + if (is_fa || pure_cam) - { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same - driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - cam_bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same + driver_c_gate_load = (subarray.num_cols_fa_cam ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.R_per_um; + cam_bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); - if (!pure_cam) - { - //This is only used for fully asso not pure CAM - driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - } + if (!pure_cam) { + //This is only used for fully asso not pure CAM + driver_c_gate_load = (subarray.num_cols_fa_ram ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + } - else - { - driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); - double w_row_decoder = area_row_decoder / subarray.area.get_h(); + else { + driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); + double w_row_decoder = area_row_decoder / subarray.area.get_h(); - double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = - compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = + compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); - double h_subarray_out_drv = subarray_out_wire->area.get_area() * - (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); + double h_subarray_out_drv = subarray_out_wire->area.get_area() * + (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); - h_subarray_out_drv *= (RWP + ERP + SCHP); + h_subarray_out_drv *= (RWP + ERP + SCHP); - double h_comparators = 0.0; - double w_row_predecode_output_wires = 0.0; - double h_bit_mux_dec_out_wires = 0.0; - double h_senseamp_mux_dec_out_wires = 0.0; + double h_comparators = 0.0; + double w_row_predecode_output_wires = 0.0; + double h_bit_mux_dec_out_wires = 0.0; + double h_senseamp_mux_dec_out_wires = 0.0; - if ((!is_fa)&&(dp.is_tag)) - { - //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; - h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); - h_comparators *= (RWP + ERP); - } + if ((!is_fa) && (dp.is_tag)) { + //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; + h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); + h_comparators *= (RWP + ERP); + } int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * - (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + - h_subarray_out_drv + h_comparators); + double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv + h_comparators); - double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); + double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); - if (deg_bl_muxing > 1) - { - h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_1 > 1) - { - h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_2 > 1) - { - h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - - double h_addr_datain_wires; - if (!g_ip->ver_htree_wires_over_array) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + - (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - - if (is_fa || pure_cam) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit - (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + - (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; + if (deg_bl_muxing > 1) { + h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_1 > 1) { + h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_2 > 1) { + h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); } - //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + - //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); - h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + - h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + - h_addr_datain_wires + - h_bit_mux_dec_out_wires + - h_senseamp_mux_dec_out_wires; - } + double h_addr_datain_wires; + if (!g_ip->ver_htree_wires_over_array) { + h_addr_datain_wires = (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + + (dp.num_di_b_mat + dp.num_do_b_mat) / + num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; - double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + - b_mux_predec_blk_drv1->area.get_area() + - sa_mux_lev_1_predec_blk_drv1->area.get_area() + - sa_mux_lev_2_predec_blk_drv1->area.get_area() + - way_sel_drv1->area.get_area() + - r_predec_blk_drv2->area.get_area() + - b_mux_predec_blk_drv2->area.get_area() + - sa_mux_lev_1_predec_blk_drv2->area.get_area() + - sa_mux_lev_2_predec_blk_drv2->area.get_area() + - r_predec_blk1->area.get_area() + - b_mux_predec_blk1->area.get_area() + - sa_mux_lev_1_predec_blk1->area.get_area() + - sa_mux_lev_2_predec_blk1->area.get_area() + - r_predec_blk2->area.get_area() + - b_mux_predec_blk2->area.get_area() + - sa_mux_lev_1_predec_blk2->area.get_area() + - sa_mux_lev_2_predec_blk2->area.get_area() + - bit_mux_dec->area.get_area() + - sa_mux_lev_1_dec->area.get_area() + - sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + if (is_fa || pure_cam) { + h_addr_datain_wires = + (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + //TODO: revisit + (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + + (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row * + g_tp.wire_inside_mat.pitch * SCHP; + } + //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + + //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); + h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + + h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + + h_addr_datain_wires + + h_bit_mux_dec_out_wires + + h_senseamp_mux_dec_out_wires; - double area_efficiency_mat; + } + + // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; + double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + + b_mux_predec_blk_drv1->area.get_area() + + sa_mux_lev_1_predec_blk_drv1->area.get_area() + + sa_mux_lev_2_predec_blk_drv1->area.get_area() + + way_sel_drv1->area.get_area() + + r_predec_blk_drv2->area.get_area() + + b_mux_predec_blk_drv2->area.get_area() + + sa_mux_lev_1_predec_blk_drv2->area.get_area() + + sa_mux_lev_2_predec_blk_drv2->area.get_area() + + r_predec_blk1->area.get_area() + + b_mux_predec_blk1->area.get_area() + + sa_mux_lev_1_predec_blk1->area.get_area() + + sa_mux_lev_2_predec_blk1->area.get_area() + + r_predec_blk2->area.get_area() + + b_mux_predec_blk2->area.get_area() + + sa_mux_lev_1_predec_blk2->area.get_area() + + sa_mux_lev_2_predec_blk2->area.get_area() + + bit_mux_dec->area.get_area() + + sa_mux_lev_1_dec->area.get_area() + + sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + + double area_efficiency_mat; // if (!is_fa) // { - assert(num_subarrays_per_mat/num_subarrays_per_row>0); - area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; + assert(num_subarrays_per_mat / num_subarrays_per_row > 0); + area.h = (num_subarrays_per_mat / num_subarrays_per_row) * + subarray.area.h + h_non_cell_area; area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; - area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; - area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); + area.w = (area.h * area.w + area_mat_center_circuitry) / area.h; + area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * + 100.0 / area.get_area(); // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<0); - assert(area.w>0); + assert(area.h > 0); + assert(area.w > 0); // } // else // { @@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p) // area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; // area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); // } - } - - - -Mat::~Mat() -{ - delete row_dec; - delete bit_mux_dec; - delete sa_mux_lev_1_dec; - delete sa_mux_lev_2_dec; - - delete r_predec->blk1; - delete r_predec->blk2; - delete b_mux_predec->blk1; - delete b_mux_predec->blk2; - delete sa_mux_lev_1_predec->blk1; - delete sa_mux_lev_1_predec->blk2; - delete sa_mux_lev_2_predec->blk1; - delete sa_mux_lev_2_predec->blk2; - delete dummy_way_sel_predec_blk1; - delete dummy_way_sel_predec_blk2; - - delete r_predec->drv1; - delete r_predec->drv2; - delete b_mux_predec->drv1; - delete b_mux_predec->drv2; - delete sa_mux_lev_1_predec->drv1; - delete sa_mux_lev_1_predec->drv2; - delete sa_mux_lev_2_predec->drv1; - delete sa_mux_lev_2_predec->drv2; - delete way_sel_drv1; - delete dummy_way_sel_predec_blk_drv2; - - delete r_predec; - delete b_mux_predec; - delete sa_mux_lev_1_predec; - delete sa_mux_lev_2_predec; - - delete subarray_out_wire; - if (!pure_cam) - delete bl_precharge_eq_drv; - - if (is_fa || pure_cam) - { - delete sl_precharge_eq_drv ; - delete sl_data_drv ; - delete cam_bl_precharge_eq_drv; - delete ml_precharge_drv; - delete ml_to_ram_wl_drv; - } } -double Mat::compute_delays(double inrisetime) -{ - int k; - double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; - double outrisetime_search, outrisetime, row_dec_outrisetime; - // delay calculation for tags of fully associative cache - if (is_fa || pure_cam) - { - //Compute search access time - outrisetime_search = compute_cam_delay(inrisetime); - if (is_fa) - { - bl_precharge_eq_drv->compute_delay(0); - k = ml_to_ram_wl_drv->number_gates - 1; - rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); - C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; - tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); +Mat::~Mat() { + delete row_dec; + delete bit_mux_dec; + delete sa_mux_lev_1_dec; + delete sa_mux_lev_2_dec; - R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in - R_bl = subarray.num_rows * r_b_metal; - C_bl = subarray.C_bl; - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); + delete r_predec->blk1; + delete r_predec->blk2; + delete b_mux_predec->blk1; + delete b_mux_predec->blk2; + delete sa_mux_lev_1_predec->blk1; + delete sa_mux_lev_1_predec->blk2; + delete sa_mux_lev_2_predec->blk1; + delete sa_mux_lev_2_predec->blk2; + delete dummy_way_sel_predec_blk1; + delete dummy_way_sel_predec_blk2; + delete r_predec->drv1; + delete r_predec->drv2; + delete b_mux_predec->drv1; + delete b_mux_predec->drv2; + delete sa_mux_lev_1_predec->drv1; + delete sa_mux_lev_1_predec->drv2; + delete sa_mux_lev_2_predec->drv1; + delete sa_mux_lev_2_predec->drv2; + delete way_sel_drv1; + delete dummy_way_sel_predec_blk_drv2; - outrisetime_search = compute_bitline_delay(outrisetime_search); - outrisetime_search = compute_sa_delay(outrisetime_search); - } - outrisetime_search = compute_subarray_out_drv(outrisetime_search); - subarray_out_wire->set_in_rise_time(outrisetime_search); - outrisetime_search = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + delete r_predec; + delete b_mux_predec; + delete sa_mux_lev_1_predec; + delete sa_mux_lev_2_predec; + delete subarray_out_wire; + if (!pure_cam) + delete bl_precharge_eq_drv; - //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - if (pure_cam) - { - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - } - return outrisetime_search; + if (is_fa || pure_cam) { + delete sl_precharge_eq_drv ; + delete sl_data_drv ; + delete cam_bl_precharge_eq_drv; + delete ml_precharge_drv; + delete ml_to_ram_wl_drv; } - else - { - bl_precharge_eq_drv->compute_delay(0); - if (row_dec->exist == true) - { - int k = row_dec->num_gates - 1; - double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); - // TODO: this 4*cell.h number must be revisited - double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - double C_ld = row_dec->C_ld_dec_out; - double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - } - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - double r_b_metal = cell.h * g_tp.wire_local.R_per_um; - double R_bl = subarray.num_rows * r_b_metal; - double C_bl = subarray.C_bl; - - if (is_dram) - { - delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - else - { - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - } +} - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); +double Mat::compute_delays(double inrisetime) { + int k; + double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl; + double outrisetime_search, outrisetime, row_dec_outrisetime; + // delay calculation for tags of fully associative cache + if (is_fa || pure_cam) { + //Compute search access time + outrisetime_search = compute_cam_delay(inrisetime); + if (is_fa) { + bl_precharge_eq_drv->compute_delay(0); + k = ml_to_ram_wl_drv->number_gates - 1; + rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h, + is_dram, false, true); + C_ld = ml_to_ram_wl_drv->c_gate_load + + ml_to_ram_wl_drv->c_wire_load; + tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); + R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in + R_bl = subarray.num_rows * r_b_metal; + C_bl = subarray.C_bl; + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); + outrisetime_search = compute_bitline_delay(outrisetime_search); + outrisetime_search = compute_sa_delay(outrisetime_search); + } + outrisetime_search = compute_subarray_out_drv(outrisetime_search); + subarray_out_wire->set_in_rise_time(outrisetime_search); + outrisetime_search = subarray_out_wire->signal_rise_time(); + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - outrisetime = compute_subarray_out_drv(outrisetime); - subarray_out_wire->set_in_rise_time(outrisetime); - outrisetime = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); - if (dp.is_tag == true && dp.fully_assoc == false) - { - compute_comparator_delay(0); - } + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); - if (row_dec->exist == false) - { - delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + if (pure_cam) { + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + } + return outrisetime_search; + } else { + bl_precharge_eq_drv->compute_delay(0); + if (row_dec->exist == true) { + int k = row_dec->num_gates - 1; + double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); + // TODO: this 4*cell.h number must be revisited + double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram, + false, true); + double C_ld = row_dec->C_ld_dec_out; + double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + } + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + double r_b_metal = cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * r_b_metal; + double C_bl = subarray.C_bl; + + if (is_dram) { + delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } else { + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } } - return outrisetime; + + + + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + outrisetime = compute_subarray_out_drv(outrisetime); + subarray_out_wire->set_in_rise_time(outrisetime); + outrisetime = subarray_out_wire->signal_rise_time(); + + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + if (dp.is_tag == true && dp.fully_assoc == false) { + compute_comparator_delay(0); + } + + if (row_dec->exist == false) { + delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + } + return outrisetime; } -double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() -{ +double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() { - double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) + - compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry + double height = + compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, + camFlag ? cam_cell.w : + cell.w / (2 * (RWP + ERP + SCHP))) + + // precharge circuitry + compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, + camFlag ? cam_cell.w : + cell.w / (RWP + ERP + SCHP)); - if (deg_bl_muxing > 1) - { - height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height - // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height - } + if (deg_bl_muxing > 1) { + // col mux tr height + height += + compute_tr_width_after_folding(g_tp.w_nmos_b_mux, + cell.w / (2 * (RWP + ERP))); + // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height + } - height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height + height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height - if (dp.Ndsam_lev_1 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - } + if (dp.Ndsam_lev_1 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + } - if (dp.Ndsam_lev_2 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + if (dp.Ndsam_lev_2 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux - height += 2 * compute_tr_width_after_folding( - pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - } + // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux + height += 2 * compute_tr_width_after_folding( + pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + } - // TODO: this should be uncommented... - /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) - { - //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); - double width_write_driver_write_mux = width_write_driver_or_write_mux(); - double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, - cell.w * - // deg_bl_muxing * - dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); - height += height_write_driver_write_mux; - }*/ + // TODO: this should be uncommented... + /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) + { + //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); + double width_write_driver_write_mux = width_write_driver_or_write_mux(); + double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, + cell.w * + // deg_bl_muxing * + dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); + height += height_write_driver_write_mux; + }*/ - return height; + return height; } -double Mat::compute_cam_delay(double inrisetime) -{ +double Mat::compute_cam_delay(double inrisetime) { - double out_time_ramp, this_delay; - double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; + double out_time_ramp, this_delay; + double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; - double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, + double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; - double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; - int Htagbits; + double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; + int Htagbits; - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; - //double searchline_precharge_time; + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + //double searchline_precharge_time; - double leak_power_cc_inverters_sram_cell = 0; - double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; - double leak_power_RD_port_sram_cell = 0; - double leak_power_SCHP_port_sram_cell = 0; - double leak_comparator_cam_cell =0; + double leak_power_cc_inverters_sram_cell = 0; + double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; + double leak_power_RD_port_sram_cell = 0; + double leak_power_SCHP_port_sram_cell = 0; + double leak_comparator_cam_cell =0; - double gate_leak_comparator_cam_cell = 0; - double gate_leak_power_cc_inverters_sram_cell = 0; - double gate_leak_power_RD_port_sram_cell = 0; - double gate_leak_power_SCHP_port_sram_cell = 0; + double gate_leak_comparator_cam_cell = 0; + double gate_leak_power_cc_inverters_sram_cell = 0; + double gate_leak_power_RD_port_sram_cell = 0; + double gate_leak_power_SCHP_port_sram_cell = 0; - c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; - c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; - r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; - r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; + c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; + c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; + r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; + r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; - dynSearchEng = 0.0; - delay_matchchline = 0.0; - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); - bool linear_scaling = false; + dynSearchEng = 0.0; + delay_matchchline = 0.0; + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); + bool linear_scaling = false; - if (linear_scaling) - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + if (linear_scaling) { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort - } - else - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort + } else { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process - Wdummyn = g_tp.cam.cell_nmos_w; - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - } + Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process + Wdummyn = g_tp.cam.cell_nmos_w; + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + } - Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); + Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); - /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. - search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. - From the driver(am and an) to the comparators in all the rows including the dummy row, - Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ + /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. + search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. + From the driver(am and an) to the comparators in all the rows including the dummy row, + Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ - //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports - //Searchline precharge routes horizontally - driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports + //Searchline precharge routes horizontally + driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - sl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + sl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); - //searchline data driver ; subarray.num_rows + 1 is because of the dummy row - //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - sl_data_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + //searchline data driver ; subarray.num_rows + 1 is because of the dummy row + //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + sl_data_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); - sl_precharge_eq_drv->compute_delay(0); - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr - double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; - double R_bl = (subarray.num_rows + 1) * r_b_metal; - double C_bl = subarray.C_bl_cam; - delay_cam_sl_restore = sl_precharge_eq_drv->delay - + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2); + sl_precharge_eq_drv->compute_delay(0); + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr + double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; + double R_bl = (subarray.num_rows + 1) * r_b_metal; + double C_bl = subarray.C_bl_cam; + delay_cam_sl_restore = sl_precharge_eq_drv->delay + + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2); - out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) + out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) - //matchline ops delay - delay_matchchline += sl_data_drv->delay; + //matchline ops delay + delay_matchchline += sl_data_drv->delay; - /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ - //matchline delay, matchline power, matchline_reset for cycle time computation, + /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ + //matchline delay, matchline power, matchline_reset for cycle time computation, - ////matchline precharge circuitry routes vertically - //There are two matchline precharge driver chains per subarray. - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + ////matchline precharge circuitry routes vertically + //There are two matchline precharge driver chains per subarray. + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - ml_precharge_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + ml_precharge_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); - ml_precharge_drv->compute_delay(0); + ml_precharge_drv->compute_delay(0); - rd = tr_R_on(Wdummyn, NCH, 2, is_dram); - c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit - + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline + rd = tr_R_on(Wdummyn, NCH, 2, is_dram); + c_intrinsic = Htagbits * + (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, + is_dram)//TODO: the cell_h_def should be revisit + + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) / + Htagbits);//since each halve only has one precharge tx per matchline - Cwire = c_matchline_metal * Htagbits; - Rwire = r_matchline_metal * Htagbits; - c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); + Cwire = c_matchline_metal * Htagbits; + Rwire = r_matchline_metal * Htagbits; + c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); - double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_ml = Rwire; - double C_ml = Cwire + c_intrinsic; - delay_cam_ml_reset = ml_precharge_drv->delay - + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too + double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_ml = Rwire; + double C_ml = Cwire + c_intrinsic; + //TODO: latest CAM has sense amps on matchlines too + delay_cam_ml_reset = ml_precharge_drv->delay + + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2); - //matchline ops delay - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); - delay_matchchline += this_delay; - out_time_ramp = this_delay / VTHFA3; + //matchline ops delay + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); + delay_matchchline += this_delay; + out_time_ramp = this_delay / VTHFA3; - dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise - * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves + dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) * + (subarray.num_rows + 1)) //TODO: need to be precise + * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * + 2;//each subarry has two halves - /* third stage, from the NAND2 gates to the drivers in the dummy row */ - rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); - c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + - drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2; - c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); - out_time_ramp = this_delay / (1 - VTHFA4); - delay_matchchline += this_delay; + /* third stage, from the NAND2 gates to the drivers in the dummy row */ + rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); + c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2; + c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); + out_time_ramp = this_delay / (1 - VTHFA4); + delay_matchchline += this_delay; - //only the dummy row has the extra inverter between NAND and NOR gates - dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; + //only the dummy row has the extra inverter between NAND and NOR gates + dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; - /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ - rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); - c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2; - Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2; - c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); - tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); - out_time_ramp = this_delay / VTHFA5; - delay_matchchline += this_delay; + /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ + rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); + c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Cwire = c_matchline_metal * Htagbits + c_searchline_metal * + (subarray.num_rows + 1) / 2; + Rwire = r_matchline_metal * Htagbits + r_searchline_metal * + (subarray.num_rows + 1) / 2; + c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); + tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); + out_time_ramp = this_delay / VTHFA5; + delay_matchchline += this_delay; - dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - /*final statge from the NOR gate to drive the wordline of the data portion */ + /*final statge from the NOR gate to drive the wordline of the data portion */ - //searchline data driver There are two matchline precharge driver chains per subarray. - driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic - driver_c_wire_load = subarray.C_wl_ram; - driver_r_wire_load = subarray.R_wl_ram; + //searchline data driver There are two matchline precharge driver chains per subarray. + driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic + driver_c_wire_load = subarray.C_wl_ram; + driver_r_wire_load = subarray.R_wl_ram; - ml_to_ram_wl_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); + ml_to_ram_wl_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); - rd = tr_R_on(Wfanorn, NCH, 1, is_dram); - c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); - out_time_ramp = this_delay / (1-0.5); - delay_matchchline += this_delay; + rd = tr_R_on(Wfanorn, NCH, 1, is_dram); + c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); + out_time_ramp = this_delay / (1 - 0.5); + delay_matchchline += this_delay; - out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); + out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); - //c_gate_load energy is computed in ml_to_ram_wl_drv - dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + //c_gate_load energy is computed in ml_to_ram_wl_drv + dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ - /*Precharge the hitting logic */ - c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; + /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ + /*Precharge the hitting logic */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; - rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_hit_miss = Rwire; - double C_hit_miss = Cwire + c_intrinsic; - delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_hit_miss = Rwire; + double C_hit_miss = Cwire + c_intrinsic; + delay_hit_miss_reset = log(g_tp.cam.Vbitpre) * + (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - /*hitting logic evaluation */ - c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; + /*hitting logic evaluation */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; - rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); + delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); - if (is_fa) - delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); + if (is_fa) + delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ + /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ - power_matchline.searchOp.dynamic = dynSearchEng; + power_matchline.searchOp.dynamic = dynSearchEng; - //leakage in one subarray - double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? - double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); - double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv + //leakage in one subarray + double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? + double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); + double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * 2; + //approx XOR with Inv + double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, + false, true) * 2; - leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; - leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; - leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; - leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; - leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports + leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; + leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; + leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports - power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + - leak_comparator_cam_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP + - leak_power_SCHP_port_sram_cell*SCHP; + power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + + leak_comparator_cam_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP + + leak_power_SCHP_port_sram_cell * SCHP; // power_matchline.searchOp.leakage += leak_comparator_cam_cell; - power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; - //In idle states, the hit/miss txs are closed (on) therefore no Isub - power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ + power_matchline.searchOp.leakage *= (subarray.num_rows + 1) * + subarray.num_cols_fa_cam;//TODO:dumy line precise + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; + //In idle states, the hit/miss txs are closed (on) therefore no Isub + power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; + //in idle state, Ig_on only possibly exist in access transistors of read only ports + double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); + double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * 2; + double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, + false, true) * 2; - gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; - gate_leak_power_SCHP_port_sram_cell = 0; + gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd; + gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; + gate_leak_power_SCHP_port_sram_cell = 0; - //cout<<"power_matchline.searchOp.leakage"< 1) - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* - subarray.num_cols * num_subarrays_per_mat*/; - dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); - dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; - //Write Ops are differential for SRAM + gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd; + gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd; } - else - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; + + + double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w / + (2 * (RWP + ERP + SCHP)), is_dram); + double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); + double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); + double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, + is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + + if (is_dram) { + double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl)); + tstep = 2.3 * fraction * r_dev * + (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)) / + (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)); + delay_writeback = tstep; + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * + num_act_mats_hor_dir * 100; + per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; + } else { + double tau; + + if (deg_bl_muxing > 1) { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 * + C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) + + R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * + g_tp.sram_cell.Vdd; + dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * + (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing); + dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + //Write Ops are differential for SRAM + } else { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * C_bl) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + + } + tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); + power_bitline.readOp.leakage = + leak_power_cc_inverters_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP; + power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + + gate_leak_power_RD_port_sram_cell * ERP; } - tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); - power_bitline.readOp.leakage = - leak_power_cc_inverters_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP; - power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + - gate_leak_power_RD_port_sram_cell * ERP; - - } // cout<<"leak_power_cc_inverters_sram_cell"<repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; + // delay of signal through pass-transistor to input of subarray output driver. + rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + C_ld = dp.Ndsam_lev_2 * + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), + is_dram) + + //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + gate_C(subarray_out_wire->repeater_size * + (subarray_out_wire->wire_length / + subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * + (1 + p_to_n_sz_r), 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; - return inrisetime; + return inrisetime; } -double Mat::compute_comparator_delay(double inrisetime) -{ - int A = g_ip->tag_assoc; +double Mat::compute_comparator_delay(double inrisetime) { + int A = g_ip->tag_assoc; - int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already - // a multiple of 4. + int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already + // a multiple of 4. - /* First Inverter */ - double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); - double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); - double tf = Req*Ceq; - double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); - double nextinputtime = st1del/VTHCOMPINV; - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + /* First Inverter */ + double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); + double tf = Req * Ceq; + double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL); + double nextinputtime = st1del / VTHCOMPINV; + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - //For each degree of associativity - //there are 4 such quarter comparators - double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - /* Second Inverter */ - Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); - tf = Req*Ceq; - double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); - nextinputtime = st2del/(1.0-VTHCOMPINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; + //For each degree of associativity + //there are 4 such quarter comparators + double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + /* Second Inverter */ + Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); + tf = Req * Ceq; + double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE); + nextinputtime = st2del / (1.0 - VTHCOMPINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; - /* Third Inverter */ - Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); - tf = Req*Ceq; - double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); - nextinputtime = st3del/(VTHEVALINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; + /* Third Inverter */ + Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); + tf = Req * Ceq; + double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL); + nextinputtime = st3del / (VTHEVALINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, + 1, inv, is_dram) * 4 * A; - /* Final Inverter (virtual ground driver) discharging compare part */ - double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); - double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ - double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); - double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); - power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); - lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 + /* Final Inverter (virtual ground driver) discharging compare part */ + double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram); + double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */ + double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram); + power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); + lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; // stack factor of 0.2 - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + //for gate leakage this equals to a inverter + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; - /* time to go to threshold of mux driver */ - double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); - /* take into account non-zero input rise time */ - double m = g_tp.peri_global.Vdd/nextinputtime; - double Tcomparatorni; + /* time to go to threshold of mux driver */ + double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND); + /* take into account non-zero input rise time */ + double m = g_tp.peri_global.Vdd / nextinputtime; + double Tcomparatorni; - if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) - { - double a = m; - double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); - } - else - { - Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; - } - delay_comparator = Tcomparatorni+st1del+st2del+st3del; - power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; - power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; + if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) { + double a = m; + double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - + g_tp.peri_global.Vth); + double c = -2 * (tstep) * (g_tp.peri_global.Vdd - + g_tp.peri_global.Vth) + 1 / m * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); + Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a); + } else { + Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd + + g_tp.peri_global.Vth) / (2 * m) - + (g_tp.peri_global.Vdd * VTHEVALINV) / m; + } + delay_comparator = Tcomparatorni + st1del + st2del + st3del; + power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; + power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; - return Tcomparatorni / (1.0 - VTHMUXNAND);; + return Tcomparatorni / (1.0 - VTHMUXNAND);; } -void Mat::compute_power_energy() -{ - //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power +void Mat::compute_power_energy() { + //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power //when search all subarrays and all mats are fully active - //when plain read/write only one subarray in a single mat is active. + //when plain read/write only one subarray in a single mat is active. // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. - power.readOp.dynamic += r_predec->power.readOp.dynamic + - b_mux_predec->power.readOp.dynamic + - sa_mux_lev_1_predec->power.readOp.dynamic + - sa_mux_lev_2_predec->power.readOp.dynamic; + power.readOp.dynamic += r_predec->power.readOp.dynamic + + b_mux_predec->power.readOp.dynamic + + sa_mux_lev_1_predec->power.readOp.dynamic + + sa_mux_lev_2_predec->power.readOp.dynamic; - // add energy consumed in decoders - power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; - if (!(is_fa||pure_cam)) - power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; + // add energy consumed in decoders + power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; + if (!(is_fa || pure_cam)) + power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; - // add energy consumed in bitline prechagers, SAs, and bitlines - if (!(is_fa||pure_cam)) - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + // add energy consumed in bitline prechagers, SAs, and bitlines + if (!(is_fa || pure_cam)) { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - //Add sense amps energy - num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; + //Add sense amps energy + num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ; - // add energy consumed in bitlines - //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; + // add energy consumed in bitlines + //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - } + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + } - else if (is_fa) - { - //for plain read/write only one subarray in a mat is active - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic - + cam_bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + else if (is_fa) { + //for plain read/write only one subarray in a mat is active + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic + + cam_bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - //Add sense amps energy - num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; - num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; - power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; - power_sa.readOp.dynamic *= num_sa_subarray; + //Add sense amps energy + num_sa_subarray = (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram) / deg_bl_muxing; + num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing; + power_sa.searchOp.dynamic = power_sa.readOp.dynamic * + num_sa_subarray_search; + power_sa.readOp.dynamic *= num_sa_subarray; - // add energy consumed in bitlines - power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; - power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; + // add energy consumed in bitlines + power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; + power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; - //Add subarray output energy - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + //Add subarray output energy + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; - //add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; + //add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - } - else - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; + } else { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; - //Add sense amps energy - num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; - power_sa.searchOp.dynamic = 0; + //Add sense amps energy + num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; + power_sa.searchOp.dynamic = 0; - power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; - power_bitline.searchOp.dynamic = 0; - power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.searchOp.dynamic = 0; + power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; - ////add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; + ////add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic = + power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - } + } - // calculate leakage power - if (!(is_fa || pure_cam)) - { + // calculate leakage power + if (!(is_fa || pure_cam)) { int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP); - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - //cout<<"leakage"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; - power.readOp.leakage += r_predec->power.readOp.leakage + - b_mux_predec->power.readOp.leakage + - sa_mux_lev_1_predec->power.readOp.leakage + - sa_mux_lev_2_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage + - power_bit_mux_decoders.readOp.leakage + - power_sa_mux_lev_1_decoders.readOp.leakage + - power_sa_mux_lev_2_decoders.readOp.leakage; - //cout<<"leakage2"<power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + //cout<<"leakage2"<power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); + power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP); - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - //cout<<"leakage"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - b_mux_predec->power.readOp.gate_leakage + - sa_mux_lev_1_predec->power.readOp.gate_leakage + - sa_mux_lev_2_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage + - power_bit_mux_decoders.readOp.gate_leakage + - power_sa_mux_lev_1_decoders.readOp.gate_leakage + - power_sa_mux_lev_2_decoders.readOp.gate_leakage; - } - else if (is_fa) - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + b_mux_predec->power.readOp.gate_leakage + + sa_mux_lev_1_predec->power.readOp.gate_leakage + + sa_mux_lev_2_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage + + power_bit_mux_decoders.readOp.gate_leakage + + power_sa_mux_lev_1_decoders.readOp.gate_leakage + + power_sa_mux_lev_2_decoders.readOp.gate_leakage; + } else if (is_fa) { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); - //cout<<"leakage3"<power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; - //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; - //cout<<"leakage5"<power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= + num_subarrays_per_mat; - power.readOp.leakage += power_cam_all_active.searchOp.leakage; + power.readOp.leakage += power_cam_all_active.searchOp.leakage; // cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + //+++Below is gate leakage + power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); - //cout<<"leakage3"<power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; - //cout<<"leakage4"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + // gate_leakage power + power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; - //cout<<"leakage5"<power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + //inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - } - else - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + } else { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.leakage += //power_bitline.readOp.leakage + - //power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; + power.readOp.leakage += //power_bitline.readOp.leakage + + //power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; - // leakage power - power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * + subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; - //inside cam - power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; - power.readOp.leakage += power_cam_all_active.searchOp.leakage; + power.readOp.leakage += power_cam_all_active.searchOp.leakage; - //+++Below is gate leakage - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + //+++Below is gate leakage + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + - //power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; + power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + + //power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + // gate_leakage power + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + //inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= + num_subarrays_per_mat; - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - } + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + } } diff --git a/ext/mcpat/cacti/mat.h b/ext/mcpat/cacti/mat.h index 8d038be8b..38200107c 100755 --- a/ext/mcpat/cacti/mat.h +++ b/ext/mcpat/cacti/mat.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,9 +40,8 @@ #include "subarray.h" #include "wire.h" -class Mat : public Component -{ - public: +class Mat : public Component { +public: Mat(const DynamicParameter & dyn_p); ~Mat(); double compute_delays(double inrisetime); // return outrisetime @@ -106,8 +106,8 @@ class Mat : public Component int deg_bl_muxing; int num_act_mats_hor_dir; double delay_writeback; - Area cell,cam_cell; - bool is_dram,is_fa, pure_cam, camFlag; + Area cell, cam_cell; + bool is_dram, is_fa, pure_cam, camFlag; int num_mats; powerDef power_sa; double delay_sa; @@ -127,7 +127,7 @@ class Mat : public Component uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat - private: +private: double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); double width_write_driver_or_write_mux(); double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w); diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc index 2aabe843f..e0b4dcdaf 100644 --- a/ext/mcpat/cacti/nuca.cc +++ b/ext/mcpat/cacti/nuca.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,89 +37,86 @@ #include "Ucache.h" #include "nuca.h" -unsigned int MIN_BANKSIZE=65536; +unsigned int MIN_BANKSIZE = 65536; #define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ #define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ #define CONTR_2_BANK_LAT 0 int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */]; - Nuca::Nuca( - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ):deviceType(dt) -{ - init_cont(); +Nuca::Nuca( + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) +): deviceType(dt) { + init_cont(); } void -Nuca::init_cont() -{ - FILE *cont; - char line[5000]; - char jk[5000]; - cont = fopen("contention.dat", "r"); - if (!cont) { - cout << "contention.dat file is missing!\n"; - exit(0); - } - - for(int i=0; i<2; i++) { - for(int j=2; j<5; j++) { - for(int k=0; k nuca_list; - Router *router_s[ROUTER_TYPES]; - router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); - router_s[0]->print_router(); - router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); - router_s[1]->print_router(); - router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); - router_s[2]->print_router(); +void +Nuca::sim_nuca() { + /* temp variables */ + int it, ro, wr; + int num_cyc; + unsigned int i, j, k; + unsigned int r, c; + int l2_c; + int bank_count = 0; + uca_org_t ures; + nuca_org_t *opt_n; + mem_array tag, data; + list nuca_list; + Router *router_s[ROUTER_TYPES]; + router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); + router_s[0]->print_router(); + router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); + router_s[1]->print_router(); + router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); + router_s[2]->print_router(); - int core_in; // to store no. of cores + int core_in; // to store no. of cores - /* to search diff grid organizations */ - double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, - curr_acclat; - double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, - avg_leakage_power; + /* to search diff grid organizations */ + double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, + curr_acclat; + double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, + avg_leakage_power; - double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF; - int opt_rows = 0; - int opt_columns = 0; - double opt_totno_hops = 0; - double opt_avg_hop = 0; - double opt_dyn_power = 0, opt_leakage_power = 0; - min_values_t minval; + double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF; + int opt_rows = 0; + int opt_columns = 0; + double opt_totno_hops = 0; + double opt_avg_hop = 0; + double opt_dyn_power = 0, opt_leakage_power = 0; + min_values_t minval; - int bank_start = 0; + int bank_start = 0; - int flit_width = 0; + int flit_width = 0; - /* vertical and horizontal hop latency values */ - int ver_hop_lat, hor_hop_lat; /* in cycles */ + /* vertical and horizontal hop latency values */ + int ver_hop_lat, hor_hop_lat; /* in cycles */ - /* no. of different bank sizes to consider */ - int iterations; + /* no. of different bank sizes to consider */ + int iterations; - g_ip->nuca_cache_sz = g_ip->cache_sz; - nuca_list.push_back(new nuca_org_t()); + g_ip->nuca_cache_sz = g_ip->cache_sz; + nuca_list.push_back(new nuca_org_t()); - if (g_ip->cache_level == 0) l2_c = 1; - else l2_c = 0; + if (g_ip->cache_level == 0) l2_c = 1; + else l2_c = 0; - if (g_ip->cores <= 4) core_in = 2; - else if (g_ip->cores <= 8) core_in = 3; - else if (g_ip->cores <= 16) core_in = 4; - else {cout << "Number of cores should be <= 16!\n"; exit(0);} - - - // set the lower bound to an appropriate value. this depends on cache associativity - if (g_ip->assoc > 2) { - i = 2; - while (i != g_ip->assoc) { - MIN_BANKSIZE *= 2; - i *= 2; - } - } - - iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE); - - if (g_ip->force_wiretype) - { - if (g_ip->wt == Low_swing) { - wt_min = Low_swing; - wt_max = Low_swing; - } + if (g_ip->cores <= 4) core_in = 2; + else if (g_ip->cores <= 8) core_in = 3; + else if (g_ip->cores <= 16) core_in = 4; else { - wt_min = Global; - wt_max = Low_swing-1; + cout << "Number of cores should be <= 16!\n"; + exit(0); } - } - else { - wt_min = Global; - wt_max = Low_swing; - } - if (g_ip->nuca_bank_count != 0) { // simulate just one bank - if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && - g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && - g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { - fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n"); + + + // set the lower bound to an appropriate value. this depends on cache associativity + if (g_ip->assoc > 2) { + i = 2; + while (i != g_ip->assoc) { + MIN_BANKSIZE *= 2; + i *= 2; + } } - bank_start = (int)logtwo((double)g_ip->nuca_bank_count); - iterations = bank_start+1; - g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count; - } - cout << "Simulating various NUCA configurations\n"; - for (it=bank_start; itnuca_cache_sz/g_ip->cache_sz; - cout << "====" << g_ip->cache_sz << "\n"; - for (wr=wt_min; wr<=wt_max; wr++) { - - for (ro=0; roflit_size; //initialize router - nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; - - /* calculate router and wire parameters */ - - double vlength = ures.cache_ht; /* length of the wire (u)*/ - double hlength = ures.cache_len; // u - - /* find delay, area, and power for wires */ - wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); - wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); - - - hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); - ver_hop_lat = calc_cycles(wire_vertical[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE); + if (g_ip->force_wiretype) { + if (g_ip->wt == Low_swing) { + wt_min = Low_swing; + wt_max = Low_swing; + } else { + wt_min = Global; + wt_max = Low_swing - 1; + } + } else { + wt_min = Global; + wt_max = Low_swing; + } + if (g_ip->nuca_bank_count != 0) { // simulate just one bank + if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && + g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && + g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { + fprintf(stderr, "Incorrect bank count value! Please fix the ", + "value in cache.cfg\n"); + } + bank_start = (int)logtwo((double)g_ip->nuca_bank_count); + iterations = bank_start + 1; + g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count; + } + cout << "Simulating various NUCA configurations\n"; + for (it = bank_start; it < iterations; it++) { + /* different bank count values */ + ures.tag_array2 = &tag; + ures.data_array2 = &data; /* - * assume a grid like topology and explore for optimal network - * configuration using different row and column count values. + * find the optimal bank organization */ - for (c=1; c<=(unsigned int)bank_count; c++) { - while (bank_count%c != 0) c++; - r = bank_count/c; + solve(&ures); +// output_UCA(&ures); + bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz; + cout << "====" << g_ip->cache_sz << "\n"; - /* - * to find the avg access latency of a NUCA cache, uncontended - * access time to each bank from the - * cache controller is calculated. - * avg latency = - * sum of the access latencies to individual banks)/bank - * count value. - */ - totno_hops = totno_hhops = totno_vhops = tot_lat = 0; - k = 1; - for (i=0; iflit_size; //initialize router + nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; + + /* calculate router and wire parameters */ + + double vlength = ures.cache_ht; /* length of the wire (u)*/ + double hlength = ures.cache_len; // u + + /* find delay, area, and power for wires */ + wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); + wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); + + + hor_hop_lat = + calc_cycles(wire_horizontal[wr]->delay, + 1 /(nuca_list.back()->nuca_pda.cycle_time * + .001)); + ver_hop_lat = + calc_cycles(wire_vertical[wr]->delay, + 1 / (nuca_list.back()->nuca_pda.cycle_time * + .001)); + + /* + * assume a grid like topology and explore for optimal network + * configuration using different row and column count values. + */ + for (c = 1; c <= (unsigned int)bank_count; c++) { + while (bank_count % c != 0) c++; + r = bank_count / c; + + /* + * to find the avg access latency of a NUCA cache, uncontended + * access time to each bank from the + * cache controller is calculated. + * avg latency = + * sum of the access latencies to individual banks)/bank + * count value. + */ + totno_hops = totno_hhops = totno_vhops = tot_lat = 0; + k = 1; + for (i = 0; i < r; i++) { + for (j = 0; j < c; j++) { + /* + * vertical hops including the + * first hop from the cache controller + */ + curr_hop = i + 1; + curr_hop += j; /* horizontal hops */ + totno_hhops += j; + totno_vhops += (i + 1); + curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT + + j * hor_hop_lat); + + tot_lat += curr_acclat; + totno_hops += curr_hop; + } + } + avg_lat = tot_lat / bank_count; + avg_hop = totno_hops / bank_count; + avg_hhop = totno_hhops / bank_count; + avg_vhop = totno_vhops / bank_count; + + /* net access latency */ + curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay * + avg_hop) + + calc_cycles(ures.access_time, + 1 / + (nuca_list.back()->nuca_pda.cycle_time * + .001)); + + /* avg access lat of nuca */ + avg_dyn_power = + avg_hop * + (router_s[ro]->power.readOp.dynamic) + avg_hhop * + (wire_horizontal[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + avg_vhop * + (wire_vertical[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic; + + avg_leakage_power = + bank_count * router_s[ro]->power.readOp.leakage + + avg_hhop * (wire_horizontal[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay) * flit_width + + avg_vhop * (wire_vertical[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay); + + if (curr_acclat < opt_acclat) { + opt_acclat = curr_acclat; + opt_tot_lat = tot_lat; + opt_avg_lat = avg_lat; + opt_totno_hops = totno_hops; + opt_avg_hop = avg_hop; + opt_rows = r; + opt_columns = c; + opt_dyn_power = avg_dyn_power; + opt_leakage_power = avg_leakage_power; + } + totno_hops = 0; + tot_lat = 0; + totno_hhops = 0; + totno_vhops = 0; + } + nuca_list.back()->wire_pda.power.readOp.dynamic = + opt_avg_hop * flit_width * + (wire_horizontal[wr]->power.readOp.dynamic + + wire_vertical[wr]->power.readOp.dynamic); + nuca_list.back()->avg_hops = opt_avg_hop; + /* network delay/power */ + nuca_list.back()->h_wire = wire_horizontal[wr]; + nuca_list.back()->v_wire = wire_vertical[wr]; + nuca_list.back()->router = router_s[ro]; + /* bank delay/power */ + + nuca_list.back()->bank_pda.delay = ures.access_time; + nuca_list.back()->bank_pda.power = ures.power; + nuca_list.back()->bank_pda.area.h = ures.cache_ht; + nuca_list.back()->bank_pda.area.w = ures.cache_len; + nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; + + num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, + 1 / + (nuca_list.back()->nuca_pda.cycle_time * + .001/*GHz*/)); + if (num_cyc % 2 != 0) num_cyc++; + if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles + + if (it < 7) { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + } else { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + } + nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; + nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; + + /* array organization */ + nuca_list.back()->bank_count = bank_count; + nuca_list.back()->rows = opt_rows; + nuca_list.back()->columns = opt_columns; + calculate_nuca_area (nuca_list.back()); + + minval.update_min_values(nuca_list.back()); + nuca_list.push_back(new nuca_org_t()); + opt_acclat = BIGNUM; - tot_lat += curr_acclat; - totno_hops += curr_hop; } - } - avg_lat = tot_lat/bank_count; - avg_hop = totno_hops/bank_count; - avg_hhop = totno_hhops/bank_count; - avg_vhop = totno_vhops/bank_count; - - /* net access latency */ - curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) + - calc_cycles(ures.access_time, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); - - /* avg access lat of nuca */ - avg_dyn_power = - avg_hop * - (router_s[ro]->power.readOp.dynamic) + avg_hhop * - (wire_horizontal[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + avg_vhop * - (wire_vertical[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic; - - avg_leakage_power = - bank_count * router_s[ro]->power.readOp.leakage + - avg_hhop * (wire_horizontal[wr]->power.readOp.leakage* - wire_horizontal[wr]->delay) * flit_width + - avg_vhop * (wire_vertical[wr]->power.readOp.leakage * - wire_horizontal[wr]->delay); - - if (curr_acclat < opt_acclat) { - opt_acclat = curr_acclat; - opt_tot_lat = tot_lat; - opt_avg_lat = avg_lat; - opt_totno_hops = totno_hops; - opt_avg_hop = avg_hop; - opt_rows = r; - opt_columns = c; - opt_dyn_power = avg_dyn_power; - opt_leakage_power = avg_leakage_power; - } - totno_hops = 0; - tot_lat = 0; - totno_hhops = 0; - totno_vhops = 0; } - nuca_list.back()->wire_pda.power.readOp.dynamic = - opt_avg_hop * flit_width * - (wire_horizontal[wr]->power.readOp.dynamic + - wire_vertical[wr]->power.readOp.dynamic); - nuca_list.back()->avg_hops = opt_avg_hop; - /* network delay/power */ - nuca_list.back()->h_wire = wire_horizontal[wr]; - nuca_list.back()->v_wire = wire_vertical[wr]; - nuca_list.back()->router = router_s[ro]; - /* bank delay/power */ + g_ip->cache_sz /= 2; + } - nuca_list.back()->bank_pda.delay = ures.access_time; - nuca_list.back()->bank_pda.power = ures.power; - nuca_list.back()->bank_pda.area.h = ures.cache_ht; - nuca_list.back()->bank_pda.area.w = ures.cache_len; - nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; + delete(nuca_list.back()); + nuca_list.pop_back(); + opt_n = find_optimal_nuca(&nuca_list, &minval); + print_nuca(opt_n); + g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count; - num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/)); - if(num_cyc%2 != 0) num_cyc++; - if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles + list::iterator niter; + for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) { + delete *niter; + } + nuca_list.clear(); - if (it < 7) { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; - nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + for (int i = 0; i < ROUTER_TYPES; i++) { + delete router_s[i]; + } + g_ip->display_ip(); + // g_ip->force_cache_config = true; + // g_ip->ndwl = 8; + // g_ip->ndbl = 16; + // g_ip->nspd = 4; + // g_ip->ndcm = 1; + // g_ip->ndsam1 = 8; + // g_ip->ndsam2 = 32; + +} + + +void +Nuca::print_nuca (nuca_org_t *fr) { + printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " + "----------\n\n"); + printf("Optimal number of banks - %d\n", fr->bank_count); + printf("Grid organization rows x columns - %d x %d\n", + fr->rows, fr->columns); + printf("Network frequency - %g GHz\n", + (1 / fr->nuca_pda.cycle_time)*1e3); + printf("Cache dimension (mm x mm) - %g x %g\n", + fr->nuca_pda.area.h, + fr->nuca_pda.area.w); + + fr->router->print_router(); + + printf("\n\nWire stats:\n"); + if (fr->h_wire->wt == Global) { + printf("\tWire type - Full swing global wires with least " + "possible delay\n"); + } else if (fr->h_wire->wt == Global_5) { + printf("\tWire type - Full swing global wires with " + "5%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_10) { + printf("\tWire type - Full swing global wires with " + "10%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_20) { + printf("\tWire type - Full swing global wires with " + "20%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_30) { + printf("\tWire type - Full swing global wires with " + "30%% delay penalty\n"); + } else if (fr->h_wire->wt == Low_swing) { + printf("\tWire type - Low swing wires\n"); + } + + printf("\tHorizontal link delay - %g (ns)\n", + fr->h_wire->delay*1e9); + printf("\tVertical link delay - %g (ns)\n", + fr->v_wire->delay*1e9); + printf("\tDelay/length - %g (ns/mm)\n", + fr->h_wire->delay*1e9 / fr->bank_pda.area.w); + printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->h_wire->power.readOp.dynamic*1e9, + fr->h_wire->power.readOp.leakage*1e9); + printf("\tVertical link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->v_wire->power.readOp.dynamic*1e9, + fr->v_wire->power.readOp.leakage*1e9); + printf("\n\n"); + fr->v_wire->print_wire(); + printf("\n\nBank stats:\n"); +} + + +nuca_org_t * +Nuca::find_optimal_nuca (list *n, min_values_t *minval) { + double cost = 0; + double min_cost = BIGNUM; + nuca_org_t *res = NULL; + float d, a, dp, lp, c; + int v; + dp = g_ip->dynamic_power_wt_nuca; + lp = g_ip->leakage_power_wt_nuca; + a = g_ip->area_wt_nuca; + d = g_ip->delay_wt_nuca; + c = g_ip->cycle_time_wt_nuca; + + list::iterator niter; + + + for (niter = n->begin(); niter != n->end(); niter++) { + fprintf(stderr, "\n-----------------------------" + "---------------\n"); + + + printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " + "bank_dpower = %g \tleak = %g \tcycle = %g\n", + (*niter)->bank_count, + (*niter)->nuca_pda.delay, + (*niter)->nuca_pda.power.readOp.dynamic, + (*niter)->h_wire->wt, + (*niter)->bank_pda.power.readOp.dynamic, + (*niter)->nuca_pda.power.readOp.leakage, + (*niter)->nuca_pda.cycle_time); + + + if (g_ip->ed == 1) { + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else if (g_ip->ed == 2) { + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else { + /* + * check whether the current organization + * meets the input deviation constraints + */ + v = check_nuca_org((*niter), minval); + if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + + if (v) { + cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) + + c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) + + dp * ((*niter)->nuca_pda.power.readOp.dynamic / + minval->min_dyn) + + lp * ((*niter)->nuca_pda.power.readOp.leakage / + minval->min_leakage) + + a * ((*niter)->nuca_pda.area.get_area() / + minval->min_area)); + fprintf(stderr, "cost = %g\n", cost); + + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else { + niter = n->erase(niter); + if (niter != n->begin()) + niter --; + } } - else { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; - nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; - } - nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; - nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; - - /* array organization */ - nuca_list.back()->bank_count = bank_count; - nuca_list.back()->rows = opt_rows; - nuca_list.back()->columns = opt_columns; - calculate_nuca_area (nuca_list.back()); - - minval.update_min_values(nuca_list.back()); - nuca_list.push_back(new nuca_org_t()); - opt_acclat = BIGNUM; - - } } - g_ip->cache_sz /= 2; - } - - delete(nuca_list.back()); - nuca_list.pop_back(); - opt_n = find_optimal_nuca(&nuca_list, &minval); - print_nuca(opt_n); - g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count; - - list::iterator niter; - for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) - { - delete *niter; - } - nuca_list.clear(); - - for(int i=0; i < ROUTER_TYPES; i++) - { - delete router_s[i]; - } - g_ip->display_ip(); - // g_ip->force_cache_config = true; - // g_ip->ndwl = 8; - // g_ip->ndbl = 16; - // g_ip->nspd = 4; - // g_ip->ndcm = 1; - // g_ip->ndsam1 = 8; - // g_ip->ndsam2 = 32; - + return res; } - - void -Nuca::print_nuca (nuca_org_t *fr) -{ - printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " - "----------\n\n"); - printf("Optimal number of banks - %d\n", fr->bank_count); - printf("Grid organization rows x columns - %d x %d\n", - fr->rows, fr->columns); - printf("Network frequency - %g GHz\n", - (1/fr->nuca_pda.cycle_time)*1e3); - printf("Cache dimension (mm x mm) - %g x %g\n", - fr->nuca_pda.area.h, - fr->nuca_pda.area.w); - - fr->router->print_router(); - - printf("\n\nWire stats:\n"); - if (fr->h_wire->wt == Global) { - printf("\tWire type - Full swing global wires with least " - "possible delay\n"); - } - else if (fr->h_wire->wt == Global_5) { - printf("\tWire type - Full swing global wires with " - "5%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_10) { - printf("\tWire type - Full swing global wires with " - "10%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_20) { - printf("\tWire type - Full swing global wires with " - "20%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_30) { - printf("\tWire type - Full swing global wires with " - "30%% delay penalty\n"); - } - else if(fr->h_wire->wt == Low_swing) { - printf("\tWire type - Low swing wires\n"); - } - - printf("\tHorizontal link delay - %g (ns)\n", - fr->h_wire->delay*1e9); - printf("\tVertical link delay - %g (ns)\n", - fr->v_wire->delay*1e9); - printf("\tDelay/length - %g (ns/mm)\n", - fr->h_wire->delay*1e9/fr->bank_pda.area.w); - printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->h_wire->power.readOp.dynamic*1e9, - fr->h_wire->power.readOp.leakage*1e9); - printf("\tVertical link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->v_wire->power.readOp.dynamic*1e9, - fr->v_wire->power.readOp.leakage*1e9); - printf("\n\n"); - fr->v_wire->print_wire(); - printf("\n\nBank stats:\n"); -} - - - nuca_org_t * -Nuca::find_optimal_nuca (list *n, min_values_t *minval) -{ - double cost = 0; - double min_cost = BIGNUM; - nuca_org_t *res = NULL; - float d, a, dp, lp, c; - int v; - dp = g_ip->dynamic_power_wt_nuca; - lp = g_ip->leakage_power_wt_nuca; - a = g_ip->area_wt_nuca; - d = g_ip->delay_wt_nuca; - c = g_ip->cycle_time_wt_nuca; - - list::iterator niter; - - - for (niter = n->begin(); niter != n->end(); niter++) { - fprintf(stderr, "\n-----------------------------" - "---------------\n"); - - - printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " - "bank_dpower = %g \tleak = %g \tcycle = %g\n", - (*niter)->bank_count, - (*niter)->nuca_pda.delay, - (*niter)->nuca_pda.power.readOp.dynamic, - (*niter)->h_wire->wt, - (*niter)->bank_pda.power.readOp.dynamic, - (*niter)->nuca_pda.power.readOp.leakage, - (*niter)->nuca_pda.cycle_time); - - - if (g_ip->ed == 1) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); - } +int +Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) { + if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) > + g_ip->delay_dev_nuca) { + return 0; } - else if (g_ip->ed == 2) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); - } + if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) / + minval->min_dyn)*100 > + g_ip->dynamic_power_dev_nuca) { + return 0; } - else { - /* - * check whether the current organization - * meets the input deviation constraints - */ - v = check_nuca_org((*niter), minval); - if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling - - if (v) { - cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) + - c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) + - dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) + - lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) + - a * ((*niter)->nuca_pda.area.get_area()/minval->min_area)); - fprintf(stderr, "cost = %g\n", cost); - - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); - } - } - else { - niter = n->erase(niter); - if (niter !=n->begin()) - niter --; - } + if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage)*100 > + g_ip->leakage_power_dev_nuca) { + return 0; } - } - return res; + if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 > + g_ip->cycle_time_dev_nuca) { + return 0; + } + if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) * + 100 > + g_ip->area_dev_nuca) { + return 0; + } + return 1; } - int -Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) -{ - if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) { - return 0; - } - if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev_nuca) { - return 0; - } - if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev_nuca) { - return 0; - } - if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev_nuca) { - return 0; - } - if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev_nuca) { - return 0; - } - return 1; -} - - void -Nuca::calculate_nuca_area (nuca_org_t *nuca) -{ - nuca->nuca_pda.area.h= - nuca->rows * ((nuca->h_wire->wire_width + - nuca->h_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.h); - - nuca->nuca_pda.area.w = - nuca->columns * ((nuca->v_wire->wire_width + - nuca->v_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.w); +void +Nuca::calculate_nuca_area (nuca_org_t *nuca) { + nuca->nuca_pda.area.h = + nuca->rows * ((nuca->h_wire->wire_width + + nuca->h_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.h); + + nuca->nuca_pda.area.w = + nuca->columns * ((nuca->v_wire->wire_width + + nuca->v_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.w); } diff --git a/ext/mcpat/cacti/nuca.h b/ext/mcpat/cacti/nuca.h index adfe32564..38cca6f70 100644 --- a/ext/mcpat/cacti/nuca.h +++ b/ext/mcpat/cacti/nuca.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -46,8 +47,8 @@ #include "wire.h" class nuca_org_t { - public: - ~nuca_org_t(); +public: + ~nuca_org_t(); // int size; /* area, power, access time, and cycle time stats */ Component nuca_pda; @@ -71,9 +72,8 @@ class nuca_org_t { -class Nuca : public Component -{ - public: +class Nuca : public Component { +public: Nuca( TechnologyParameter::DeviceType *dt); void print_router(); @@ -87,12 +87,12 @@ class Nuca : public Component void print_nuca(nuca_org_t *n); void print_cont_stats(); - private: +private: TechnologyParameter::DeviceType *deviceType; int wt_min, wt_max; Wire *wire_vertical[WIRE_TYPES], - *wire_horizontal[WIRE_TYPES]; + *wire_horizontal[WIRE_TYPES]; }; diff --git a/ext/mcpat/cacti/parameter.cc b/ext/mcpat/cacti/parameter.cc index b71640c19..f7184d8a9 100644 --- a/ext/mcpat/cacti/parameter.cc +++ b/ext/mcpat/cacti/parameter.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -46,147 +47,141 @@ TechnologyParameter g_tp; -void TechnologyParameter::DeviceType::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::DeviceType::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; - cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; - cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; - cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; - cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; - cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; - cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; - cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; - cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; - cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; - cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; - cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; - cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; - cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; - cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; - cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; - cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; + cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; + cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; + cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; + cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; + cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; + cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; + cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; + cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; + cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; + cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; + cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; + cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; + cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; + cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; + cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; + cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; + cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; } -void TechnologyParameter::InterconnectType::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::InterconnectType::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; - cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; - cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; + cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; + cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; + cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; } -void TechnologyParameter::ScalingFactor::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::ScalingFactor::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; - cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; + cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; + cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; } -void TechnologyParameter::MemoryType::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::MemoryType::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; - cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; - cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; - cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; - cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; - cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; + cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; + cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; + cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; + cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; + cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; + cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; } -void TechnologyParameter::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; - cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; - cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; - cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; - cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; - cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; - cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; - cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; - cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; - cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; - cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; - cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; - cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; - cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; - cout << endl; - cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; - cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; - cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; - cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; - cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; - cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; - cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; - cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; - cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; - cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; - cout << endl; - cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; - cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; - cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; - cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; - cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; - cout << endl; - cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; - cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; - cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; - cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; - cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; - cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; - cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; + cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; + cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; + cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; + cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; + cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; + cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; + cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; + cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; + cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; + cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; + cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; + cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; + cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; + cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; + cout << endl; + cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; + cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; + cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; + cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; + cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; + cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; + cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; + cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; + cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; + cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; + cout << endl; + cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; + cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; + cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; + cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; + cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; + cout << endl; + cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; + cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; + cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; + cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; + cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; + cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; + cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; - cout << endl; - cout << indent_str << "SRAM cell transistor: " << endl; - sram_cell.display(indent + 2); + cout << endl; + cout << indent_str << "SRAM cell transistor: " << endl; + sram_cell.display(indent + 2); - cout << endl; - cout << indent_str << "DRAM access transistor: " << endl; - dram_acc.display(indent + 2); + cout << endl; + cout << indent_str << "DRAM access transistor: " << endl; + dram_acc.display(indent + 2); - cout << endl; - cout << indent_str << "DRAM wordline transistor: " << endl; - dram_wl.display(indent + 2); + cout << endl; + cout << indent_str << "DRAM wordline transistor: " << endl; + dram_wl.display(indent + 2); - cout << endl; - cout << indent_str << "peripheral global transistor: " << endl; - peri_global.display(indent + 2); + cout << endl; + cout << indent_str << "peripheral global transistor: " << endl; + peri_global.display(indent + 2); - cout << endl; - cout << indent_str << "wire local" << endl; - wire_local.display(indent + 2); + cout << endl; + cout << indent_str << "wire local" << endl; + wire_local.display(indent + 2); - cout << endl; - cout << indent_str << "wire inside mat" << endl; - wire_inside_mat.display(indent + 2); + cout << endl; + cout << indent_str << "wire inside mat" << endl; + wire_inside_mat.display(indent + 2); - cout << endl; - cout << indent_str << "wire outside mat" << endl; - wire_outside_mat.display(indent + 2); + cout << endl; + cout << indent_str << "wire outside mat" << endl; + wire_outside_mat.display(indent + 2); - cout << endl; - cout << indent_str << "SRAM" << endl; - sram.display(indent + 2); + cout << endl; + cout << indent_str << "SRAM" << endl; + sram.display(indent + 2); - cout << endl; - cout << indent_str << "DRAM" << endl; - dram.display(indent + 2); + cout << endl; + cout << indent_str << "DRAM" << endl; + dram.display(indent + 2); } DynamicParameter::DynamicParameter(): - use_inp_params(0), cell(), is_valid(true) -{ + use_inp_params(0), cell(), is_valid(true) { } @@ -202,512 +197,433 @@ DynamicParameter::DynamicParameter( unsigned int Ndsam_lev_1_, unsigned int Ndsam_lev_2_, bool is_main_mem_): - is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), - Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), - number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), - is_main_mem(is_main_mem_), cell(), is_valid(false) -{ - ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), + Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_), + Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), + number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), + is_main_mem(is_main_mem_), cell(), is_valid(false) { + ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer - const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; - fully_assoc = (g_ip->fully_assoc) ? true : false; + unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer + const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; + fully_assoc = (g_ip->fully_assoc) ? true : false; - if (fully_assoc || pure_cam) - { // fully-assocative cache -- ref: CACTi 2.0 report - if (Ndwl != 1 || //Ndwl is fixed to 1 for FA - Ndcm != 1 || //Ndcm is fixed to 1 for FA - Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA - Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one - Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one - Ndbl < 2) - { - return; - } - } + // fully-assocative cache -- ref: CACTi 2.0 report + if (fully_assoc || pure_cam) { + if (Ndwl != 1 || //Ndwl is fixed to 1 for FA + Ndcm != 1 || //Ndcm is fixed to 1 for FA + Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA + Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one + Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one + Ndbl < 2) { + return; + } + } - if ((is_dram) && (!is_tag) && (Ndcm > 1)) - { - return; // For a DRAM array, each bitline has its own sense-amp - } + if ((is_dram) && (!is_tag) && (Ndcm > 1)) { + return; // For a DRAM array, each bitline has its own sense-amp + } - // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be - // at least two because an array is assumed to have at least one mat. And a mat - // is formed out of two horizontal subarrays and two vertical subarrays - if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) - { - return; - } + // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be + // at least two because an array is assumed to have at least one mat. And a mat + // is formed out of two horizontal subarrays and two vertical subarrays + if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) { + return; + } - //***********compute row, col of an subarray - if (!(fully_assoc || pure_cam))//Not fully_asso nor cam - { - // if data array, let tagbits = 0 - if (is_tag) - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + - _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks); + //***********compute row, col of an subarray + if (!(fully_assoc || pure_cam)) { + //Not fully_asso nor cam + // if data array, let tagbits = 0 + if (is_tag) { + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + + _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks); - } - tagbits = (((tagbits + 3) >> 2) << 2); + } + tagbits = (((tagbits + 3) >> 2) << 2); - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); - //burst_length = 1; - } - else - { - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); - // burst_length = g_ip->block_sz * 8 / g_ip->out_w; - } + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); + num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); + //burst_length = 1; + } else { + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); + num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); + // burst_length = g_ip->block_sz * 8 / g_ip->out_w; + } - if (num_r_subarray < MINSUBARRAYROWS) return; - if (num_r_subarray == 0) return; - if (num_r_subarray > MAXSUBARRAYROWS) return; - if (num_c_subarray < MINSUBARRAYCOLS) return; - if (num_c_subarray > MAXSUBARRAYCOLS) return; + if (num_r_subarray < MINSUBARRAYROWS) return; + if (num_r_subarray == 0) return; + if (num_r_subarray > MAXSUBARRAYROWS) return; + if (num_c_subarray < MINSUBARRAYCOLS) return; + if (num_c_subarray > MAXSUBARRAYCOLS) return; - } + } - else - {//either fully-asso or cam - if (pure_cam) - { - if (g_ip->specific_tag) - { - tagbits = int(ceil(g_ip->tag_w/8.0)*8); - } - else - { - tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); + else {//either fully-asso or cam + if (pure_cam) { + if (g_ip->specific_tag) { + tagbits = int(ceil(g_ip->tag_w / 8.0) * 8); + } else { + tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8); // cout<<"Pure CAM needs tag width to be specified"<> 2) << 2); - - tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. - //tag_num_c_subarray = (int)(tagbits + EPSILON); - tag_num_c_subarray = tagbits; - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } - else //fully associative - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. - } - tagbits = (((tagbits + 3) >> 2) << 2); - - tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); - tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - - data_num_r_subarray = tag_num_r_subarray; - data_num_c_subarray = 8 * g_ip->block_sz; - if (data_num_r_subarray == 0) return; - if (data_num_r_subarray > MAXSUBARRAYROWS) return; - if (data_num_c_subarray < MINSUBARRAYCOLS) return; - if (data_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } - } - - num_subarrays = Ndwl * Ndbl; - //****************end of computation of row, col of an subarray - - // calculate wire parameters - if (fully_assoc || pure_cam) - { - cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) - + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - } - else - { - if(is_tag) - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + - g_ip->num_wr_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + - wire_local.pitch * g_ip->num_se_rd_ports; - } - else - { - if (is_dram) - { - cell.h = g_tp.dram.b_h; - cell.w = g_tp.dram.b_w; - } - else - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + - g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + - g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; - } - } - } - - double c_b_metal = cell.h * wire_local.C_per_um; - double C_bl; - - if (!(fully_assoc || pure_cam)) - { - if (is_dram) - { - deg_bl_muxing = 1; - if (ram_cell_tech_type == comm_dram) - { - C_bl = num_r_subarray * c_b_metal; - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); - if (V_b_sense < VBITSENSEMIN) - { - return; - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - dram_refresh_period = 64e-3; - } - else - { - double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); - - if (V_b_sense < VBITSENSEMIN) - { - return; //Sense amp input signal is smaller that minimum allowable sense amp input signal - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; - //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; - dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; - } - } - else - { //SRAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = Ndcm; - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; - } - } - else - { - c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = 1;//FA fix as 1 - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; - } - - - // do/di: data in/out, for fully associative they are the data width for normal read and write - // so/si: search data in/out, for fully associative they are the data width for the search ops - // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) - // so/si needs broadcase while do/di do not - - if (fully_assoc || pure_cam) - { - switch (Ndbl) { - case (0): - cout << " Invalid Ndbl \n"<> 2) << 2); - if (fully_assoc) - { - num_so_b_mat = data_num_c_subarray; - num_do_b_mat = data_num_c_subarray + tagbits; + //TODO: error check input of tagbits and blocksize + //TODO: for pure CAM, g_ip->block should be number of entries. + tag_num_r_subarray = (int)ceil(capacity_per_die / + (g_ip->nbanks * tagbits / 8.0 * Ndbl)); + //tag_num_c_subarray = (int)(tagbits + EPSILON); + tag_num_c_subarray = tagbits; + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; + } else { //fully associative + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. } - else - { - num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data - num_do_b_mat = tagbits; + tagbits = (((tagbits + 3) >> 2) << 2); + + tag_num_r_subarray = (int)(capacity_per_die / + (g_ip->nbanks * g_ip->block_sz * Ndbl)); + tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + + data_num_r_subarray = tag_num_r_subarray; + data_num_c_subarray = 8 * g_ip->block_sz; + if (data_num_r_subarray == 0) return; + if (data_num_r_subarray > MAXSUBARRAYROWS) return; + if (data_num_c_subarray < MINSUBARRAYCOLS) return; + if (data_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; + } + } + + num_subarrays = Ndwl * Ndbl; + //****************end of computation of row, col of an subarray + + // calculate wire parameters + if (fully_assoc || pure_cam) { + cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * + (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports - + g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * + g_ip->num_se_rd_ports + 2 * wire_local.pitch * + (g_ip->num_search_ports - 1); + } else { + if (is_tag) { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + + g_ip->num_wr_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + + wire_local.pitch * g_ip->num_se_rd_ports; + } else { + if (is_dram) { + cell.h = g_tp.dram.b_h; + cell.w = g_tp.dram.b_w; + } else { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; } - } - else - { - num_mats_h_dir = MAX(Ndwl / 2, 1); - num_mats_v_dir = MAX(Ndbl / 2, 1); - num_mats = num_mats_h_dir * num_mats_v_dir; - num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); - } + } + } - if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats))) - { - return; - } + double c_b_metal = cell.h * wire_local.C_per_um; + double C_bl; + + if (!(fully_assoc || pure_cam)) { + if (is_dram) { + deg_bl_muxing = 1; + if (ram_cell_tech_type == comm_dram) { + C_bl = num_r_subarray * c_b_metal; + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + if (V_b_sense < VBITSENSEMIN) { + return; + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value + dram_refresh_period = 64e-3; + } else { + double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + + if (V_b_sense < VBITSENSEMIN) { + return; //Sense amp input signal is smaller that minimum allowable sense amp input signal + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value + //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; + //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; + dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; + } + } else { //SRAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = Ndcm; + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } + } else { + c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = 1;//FA fix as 1 + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } - int deg_sa_mux_l1_non_assoc; - //TODO:the i/o for subbank is not necessary and should be removed. - if (!(fully_assoc || pure_cam)) - { - if (!is_tag) - { - if (is_main_mem == true) - { - num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { - if (g_ip->fast_access == true) - { - num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { + // do/di: data in/out, for fully associative they are the data width for normal read and write + // so/si: search data in/out, for fully associative they are the data width for the search ops + // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) + // so/si needs broadcase while do/di do not - num_do_b_subbank = g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; - if (deg_sa_mux_l1_non_assoc < 1) - { - return; - } + if (fully_assoc || pure_cam) { + switch (Ndbl) { + case (0): + cout << " Invalid Ndbl \n" << endl; + exit(0); + break; + case (1): + num_mats_h_dir = 1;//one subarray per mat + num_mats_v_dir = 1; + break; + case (2): + num_mats_h_dir = 1;//two subarrays per mat + num_mats_v_dir = 1; + break; + default: + num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat + num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir); + } + num_mats = num_mats_h_dir * num_mats_v_dir; - } - } - } - else - { - num_do_b_subbank = tagbits * g_ip->tag_assoc; - if (num_do_b_mat < tagbits) - { - return; - } - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; - } - } - else - { - if (fully_assoc) - { - num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa - num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; - } - else - { - num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data - num_do_b_subbank = tag_num_c_subarray; - } + if (fully_assoc) { + num_so_b_mat = data_num_c_subarray; + num_do_b_mat = data_num_c_subarray + tagbits; + } else { + num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data + num_do_b_mat = tagbits; + } + } else { + num_mats_h_dir = MAX(Ndwl / 2, 1); + num_mats_v_dir = MAX(Ndbl / 2, 1); + num_mats = num_mats_h_dir * num_mats_v_dir; + num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray / + (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); + } - deg_sa_mux_l1_non_assoc = 1; - } + if (!(fully_assoc || pure_cam) && (num_do_b_mat < + (num_subarrays / num_mats))) { + return; + } - deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; - if (fully_assoc || pure_cam) - { - num_act_mats_hor_dir = 1; - num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used - } - else - { - num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; - if (num_act_mats_hor_dir == 0) - { - return; - } - } + int deg_sa_mux_l1_non_assoc; + //TODO:the i/o for subbank is not necessary and should be removed. + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (is_main_mem == true) { + num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { + if (g_ip->fast_access == true) { + num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { - //compute num_do_mat for tag - if (is_tag) - { - if (!(fully_assoc || pure_cam)) - { - num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; - num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; - } - } + num_do_b_subbank = g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; + if (deg_sa_mux_l1_non_assoc < 1) { + return; + } - if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) - { - if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) - { - return; - } - } + } + } + } else { + num_do_b_subbank = tagbits * g_ip->tag_assoc; + if (num_do_b_mat < tagbits) { + return; + } + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; + } + } else { + if (fully_assoc) { + num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa + num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; + } else { + num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data + num_do_b_subbank = tag_num_c_subarray; + } + + deg_sa_mux_l1_non_assoc = 1; + } + + deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; + + if (fully_assoc || pure_cam) { + num_act_mats_hor_dir = 1; + num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used + } else { + num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; + if (num_act_mats_hor_dir == 0) { + return; + } + } + + //compute num_do_mat for tag + if (is_tag) { + if (!(fully_assoc || pure_cam)) { + num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; + num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; + } + } + + if ((g_ip->is_cache == false && is_main_mem == true) || + (PAGE_MODE == 1 && is_dram)) { + if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != + (int)g_ip->page_sz_bits) { + return; + } + } // if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays - if (is_tag == false && g_ip->is_main_mem == true && - num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) - { - return; - } + if (is_tag == false && g_ip->is_main_mem == true && + num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < + ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) { + return; + } - if (num_act_mats_hor_dir > num_mats_h_dir) - { - return; - } + if (num_act_mats_hor_dir > num_mats_h_dir) { + return; + } - //compute di for mat subbank and bank - if (!(fully_assoc ||pure_cam)) - { - if(!is_tag) - { - if(g_ip->fast_access == true) - { - num_di_b_mat = num_do_b_mat / g_ip->data_assoc; - } - else - { - num_di_b_mat = num_do_b_mat; - } - } - else - { - num_di_b_mat = tagbits; - } - } - else - { - if (fully_assoc) - { - num_di_b_mat = num_do_b_mat; - //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, - //but inside the mat wire tracks need to be reserved for search data bus - num_si_b_mat = tagbits; - } - else - { - num_di_b_mat = tagbits; - num_si_b_mat = tagbits;//*num_subarrays/num_mats; - } + //compute di for mat subbank and bank + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (g_ip->fast_access == true) { + num_di_b_mat = num_do_b_mat / g_ip->data_assoc; + } else { + num_di_b_mat = num_do_b_mat; + } + } else { + num_di_b_mat = tagbits; + } + } else { + if (fully_assoc) { + num_di_b_mat = num_do_b_mat; + //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, + //but inside the mat wire tracks need to be reserved for search data bus + num_si_b_mat = tagbits; + } else { + num_di_b_mat = tagbits; + num_si_b_mat = tagbits;//*num_subarrays/num_mats; + } - } + } - num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA - num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA + num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast - int num_addr_b_row_dec = _log2(num_r_subarray); - if ((fully_assoc ||pure_cam)) - num_addr_b_row_dec +=_log2(num_subarrays/num_mats); - int number_subbanks = num_mats / num_act_mats_hor_dir; - number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + int num_addr_b_row_dec = _log2(num_r_subarray); + if ((fully_assoc || pure_cam)) + num_addr_b_row_dec += _log2(num_subarrays / num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM - num_rw_ports = g_ip->num_rw_ports; - num_rd_ports = g_ip->num_rd_ports; - num_wr_ports = g_ip->num_wr_ports; - num_se_rd_ports = g_ip->num_se_rd_ports; - num_search_ports = g_ip->num_search_ports; + num_rw_ports = g_ip->num_rw_ports; + num_rd_ports = g_ip->num_rd_ports; + num_wr_ports = g_ip->num_wr_ports; + num_se_rd_ports = g_ip->num_se_rd_ports; + num_search_ports = g_ip->num_search_ports; - if (is_dram && is_main_mem) - { - number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, - _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); - } - else - { - number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + - _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); - } + if (is_dram && is_main_mem) { + number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, + _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); + } else { + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + } - if (!(fully_assoc ||pure_cam)) - { - if (is_tag) - { - num_di_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->data_assoc; - } - else - { - num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; - num_do_b_bank_per_port = g_ip->out_w; - } - } - else - { - if (fully_assoc) - { - num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->out_w + tagbits; - num_so_b_bank_per_port = g_ip->out_w; - } - else - { - num_di_b_bank_per_port = tagbits; - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = tagbits; - num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); - } - } + if (!(fully_assoc || pure_cam)) { + if (is_tag) { + num_di_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->data_assoc; + } else { + num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; + num_do_b_bank_per_port = g_ip->out_w; + } + } else { + if (fully_assoc) { + num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->out_w + tagbits; + num_so_b_bank_per_port = g_ip->out_w; + } else { + num_di_b_bank_per_port = tagbits; + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = tagbits; + num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); + } + } - if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) - { - number_way_select_signals_mat = g_ip->data_assoc; - } + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) { + number_way_select_signals_mat = g_ip->data_assoc; + } - // add ECC adjustment to all data signals that traverse on H-trees. - if (g_ip->add_ecc_b_ == true) - { - num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); - num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); - num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); - num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); - num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); - num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); + // add ECC adjustment to all data signals that traverse on H-trees. + if (g_ip->add_ecc_b_ == true) { + num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); + num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); + num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); + num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); + num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); + num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); - num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); - num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); - num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); - num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); - num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); - num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); - } + num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); + num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); + num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); + num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); + num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); + num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); + } - is_valid = true; + is_valid = true; } diff --git a/ext/mcpat/cacti/parameter.h b/ext/mcpat/cacti/parameter.h index 9c827bbc8..573b726a6 100644 --- a/ext/mcpat/cacti/parameter.h +++ b/ext/mcpat/cacti/parameter.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -40,251 +41,242 @@ #include "io.h" // parameters which are functions of certain device technology -class TechnologyParameter -{ - public: - class DeviceType - { - public: - double C_g_ideal; - double C_fringe; - double C_overlap; - double C_junc; // C_junc_area - double C_junc_sidewall; - double l_phy; - double l_elec; - double R_nch_on; - double R_pch_on; - double Vdd; - double Vth; - double I_on_n; - double I_on_p; - double I_off_n; - double I_off_p; - double I_g_on_n; - double I_g_on_p; - double C_ox; - double t_ox; - double n_to_p_eff_curr_drv_ratio; - double long_channel_leakage_reduction; +class TechnologyParameter { +public: + class DeviceType { + public: + double C_g_ideal; + double C_fringe; + double C_overlap; + double C_junc; // C_junc_area + double C_junc_sidewall; + double l_phy; + double l_elec; + double R_nch_on; + double R_pch_on; + double Vdd; + double Vth; + double I_on_n; + double I_on_p; + double I_off_n; + double I_off_p; + double I_g_on_n; + double I_g_on_p; + double C_ox; + double t_ox; + double n_to_p_eff_curr_drv_ratio; + double long_channel_leakage_reduction; - DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), - C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), - Vdd(0), Vth(0), - I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0), - C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { }; - void reset() - { - C_g_ideal = 0; - C_fringe = 0; - C_overlap = 0; - C_junc = 0; - l_phy = 0; - l_elec = 0; - R_nch_on = 0; - R_pch_on = 0; - Vdd = 0; - Vth = 0; - I_on_n = 0; - I_on_p = 0; - I_off_n = 0; - I_off_p = 0; - I_g_on_n = 0; - I_g_on_p = 0; - C_ox = 0; - t_ox = 0; - n_to_p_eff_curr_drv_ratio = 0; - long_channel_leakage_reduction = 0; - } + DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), + C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), + Vdd(0), Vth(0), + I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0), + I_g_on_p(0), + C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), + long_channel_leakage_reduction(0) { }; + void reset() { + C_g_ideal = 0; + C_fringe = 0; + C_overlap = 0; + C_junc = 0; + l_phy = 0; + l_elec = 0; + R_nch_on = 0; + R_pch_on = 0; + Vdd = 0; + Vth = 0; + I_on_n = 0; + I_on_p = 0; + I_off_n = 0; + I_off_p = 0; + I_g_on_n = 0; + I_g_on_p = 0; + C_ox = 0; + t_ox = 0; + n_to_p_eff_curr_drv_ratio = 0; + long_channel_leakage_reduction = 0; + } + + void display(uint32_t indent = 0); + }; + class InterconnectType { + public: + double pitch; + double R_per_um; + double C_per_um; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double aspect_ratio; + double miller_value; + double ild_thickness; + + InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; + + void reset() { + pitch = 0; + R_per_um = 0; + C_per_um = 0; + horiz_dielectric_constant = 0; + vert_dielectric_constant = 0; + aspect_ratio = 0; + miller_value = 0; + ild_thickness = 0; + } + + void display(uint32_t indent = 0); + }; + class MemoryType { + public: + double b_w; + double b_h; + double cell_a_w; + double cell_pmos_w; + double cell_nmos_w; + double Vbitpre; + + void reset() { + b_w = 0; + b_h = 0; + cell_a_w = 0; + cell_pmos_w = 0; + cell_nmos_w = 0; + Vbitpre = 0; + } + + void display(uint32_t indent = 0); + }; + + class ScalingFactor { + public: + double logic_scaling_co_eff; + double core_tx_density; + double long_channel_leakage_reduction; + + ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), + long_channel_leakage_reduction(0) { }; + + void reset() { + logic_scaling_co_eff = 0; + core_tx_density = 0; + long_channel_leakage_reduction = 0; + } + + void display(uint32_t indent = 0); + }; + + double ram_wl_stitching_overhead_; + double min_w_nmos_; + double max_w_nmos_; + double max_w_nmos_dec; + double unit_len_wire_del; + double FO4; + double kinv; + double vpp; + double w_sense_en; + double w_sense_n; + double w_sense_p; + double sense_delay; + double sense_dy_power; + double w_iso; + double w_poly_contact; + double spacing_poly_to_poly; + double spacing_poly_to_contact; + + double w_comp_inv_p1; + double w_comp_inv_p2; + double w_comp_inv_p3; + double w_comp_inv_n1; + double w_comp_inv_n2; + double w_comp_inv_n3; + double w_eval_inv_p; + double w_eval_inv_n; + double w_comp_n; + double w_comp_p; + + double dram_cell_I_on; + double dram_cell_Vdd; + double dram_cell_I_off_worst_case_len_temp; + double dram_cell_C; + double gm_sense_amp_latch; + + double w_nmos_b_mux; + double w_nmos_sa_mux; + double w_pmos_bl_precharge; + double w_pmos_bl_eq; + double MIN_GAP_BET_P_AND_N_DIFFS; + double MIN_GAP_BET_SAME_TYPE_DIFFS; + double HPOWERRAIL; + double cell_h_def; + + double chip_layout_overhead; + double macro_layout_overhead; + double sckt_co_eff; + + double fringe_cap; + + uint64_t h_dec; + + DeviceType sram_cell; // SRAM cell transistor + DeviceType dram_acc; // DRAM access transistor + DeviceType dram_wl; // DRAM wordline transistor + DeviceType peri_global; // peripheral global + DeviceType cam_cell; // SRAM cell transistor + + InterconnectType wire_local; + InterconnectType wire_inside_mat; + InterconnectType wire_outside_mat; + + ScalingFactor scaling_factor; + + MemoryType sram; + MemoryType dram; + MemoryType cam; void display(uint32_t indent = 0); - }; - class InterconnectType - { - public: - double pitch; - double R_per_um; - double C_per_um; - double horiz_dielectric_constant; - double vert_dielectric_constant; - double aspect_ratio; - double miller_value; - double ild_thickness; - InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; + void reset() { + dram_cell_Vdd = 0; + dram_cell_I_on = 0; + dram_cell_C = 0; + vpp = 0; - void reset() - { - pitch = 0; - R_per_um = 0; - C_per_um = 0; - horiz_dielectric_constant = 0; - vert_dielectric_constant = 0; - aspect_ratio = 0; - miller_value = 0; - ild_thickness = 0; - } - - void display(uint32_t indent = 0); - }; - class MemoryType - { - public: - double b_w; - double b_h; - double cell_a_w; - double cell_pmos_w; - double cell_nmos_w; - double Vbitpre; - - void reset() - { - b_w = 0; - b_h = 0; - cell_a_w = 0; - cell_pmos_w = 0; - cell_nmos_w = 0; - Vbitpre = 0; - } - - void display(uint32_t indent = 0); - }; - - class ScalingFactor - { - public: - double logic_scaling_co_eff; - double core_tx_density; - double long_channel_leakage_reduction; - - ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), - long_channel_leakage_reduction(0) { }; - - void reset() - { - logic_scaling_co_eff= 0; - core_tx_density = 0; - long_channel_leakage_reduction= 0; - } - - void display(uint32_t indent = 0); - }; - - double ram_wl_stitching_overhead_; - double min_w_nmos_; - double max_w_nmos_; - double max_w_nmos_dec; - double unit_len_wire_del; - double FO4; - double kinv; - double vpp; - double w_sense_en; - double w_sense_n; - double w_sense_p; - double sense_delay; - double sense_dy_power; - double w_iso; - double w_poly_contact; - double spacing_poly_to_poly; - double spacing_poly_to_contact; - - double w_comp_inv_p1; - double w_comp_inv_p2; - double w_comp_inv_p3; - double w_comp_inv_n1; - double w_comp_inv_n2; - double w_comp_inv_n3; - double w_eval_inv_p; - double w_eval_inv_n; - double w_comp_n; - double w_comp_p; - - double dram_cell_I_on; - double dram_cell_Vdd; - double dram_cell_I_off_worst_case_len_temp; - double dram_cell_C; - double gm_sense_amp_latch; - - double w_nmos_b_mux; - double w_nmos_sa_mux; - double w_pmos_bl_precharge; - double w_pmos_bl_eq; - double MIN_GAP_BET_P_AND_N_DIFFS; - double MIN_GAP_BET_SAME_TYPE_DIFFS; - double HPOWERRAIL; - double cell_h_def; - - double chip_layout_overhead; - double macro_layout_overhead; - double sckt_co_eff; - - double fringe_cap; - - uint64_t h_dec; - - DeviceType sram_cell; // SRAM cell transistor - DeviceType dram_acc; // DRAM access transistor - DeviceType dram_wl; // DRAM wordline transistor - DeviceType peri_global; // peripheral global - DeviceType cam_cell; // SRAM cell transistor - - InterconnectType wire_local; - InterconnectType wire_inside_mat; - InterconnectType wire_outside_mat; - - ScalingFactor scaling_factor; - - MemoryType sram; - MemoryType dram; - MemoryType cam; - - void display(uint32_t indent = 0); - - void reset() - { - dram_cell_Vdd = 0; - dram_cell_I_on = 0; - dram_cell_C = 0; - vpp = 0; - - sense_delay = 0; - sense_dy_power = 0; - fringe_cap = 0; + sense_delay = 0; + sense_dy_power = 0; + fringe_cap = 0; // horiz_dielectric_constant = 0; // vert_dielectric_constant = 0; // aspect_ratio = 0; // miller_value = 0; // ild_thickness = 0; - dram_cell_I_off_worst_case_len_temp = 0; + dram_cell_I_off_worst_case_len_temp = 0; - sram_cell.reset(); - dram_acc.reset(); - dram_wl.reset(); - peri_global.reset(); - cam_cell.reset(); + sram_cell.reset(); + dram_acc.reset(); + dram_wl.reset(); + peri_global.reset(); + cam_cell.reset(); - scaling_factor.reset(); + scaling_factor.reset(); - wire_local.reset(); - wire_inside_mat.reset(); - wire_outside_mat.reset(); + wire_local.reset(); + wire_inside_mat.reset(); + wire_outside_mat.reset(); - sram.reset(); - dram.reset(); - cam.reset(); + sram.reset(); + dram.reset(); + cam.reset(); - chip_layout_overhead = 0; - macro_layout_overhead = 0; - sckt_co_eff = 0; - } + chip_layout_overhead = 0; + macro_layout_overhead = 0; + sckt_co_eff = 0; + } }; -class DynamicParameter -{ - public: +class DynamicParameter { +public: bool is_tag; bool pure_ram; bool pure_cam; @@ -313,8 +305,8 @@ class DynamicParameter int num_so_b_mat; int num_si_b_subbank; int num_so_b_subbank; - int num_si_b_bank_per_port; - int num_so_b_bank_per_port; + int num_si_b_bank_per_port; + int num_so_b_bank_per_port; int number_way_select_signals_mat; int num_act_mats_hor_dir; diff --git a/ext/mcpat/cacti/router.cc b/ext/mcpat/cacti/router.cc index 06f170691..d3368d946 100644 --- a/ext/mcpat/cacti/router.cc +++ b/ext/mcpat/cacti/router.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -41,57 +42,56 @@ Router::Router( double I_, double O_, double M_ - ):flit_size(flit_size_), - deviceType(dt), - I(I_), - O(O_), - M(M_) -{ - vc_buffer_size = vc_buf; - vc_count = vc_c; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - double technology = g_ip->F_sz_um; + ): flit_size(flit_size_), + deviceType(dt), + I(I_), + O(O_), + M(M_) { + vc_buffer_size = vc_buf; + vc_count = vc_c; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + double technology = g_ip->F_sz_um; - Vdd = dt->Vdd; + Vdd = dt->Vdd; - /*Crossbar parameters. Transmisson gate is employed for connector*/ - NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology*1e-6/2; /* pmos tr. length*/ - wt = 15*technology*1e-6/2; /*track width*/ - ht = 15*technology*1e-6/2; /*track height*/ + /*Crossbar parameters. Transmisson gate is employed for connector*/ + NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/ + wt = 15 * technology * 1e-6 / 2; /*track width*/ + ht = 15 * technology * 1e-6 / 2; /*track height*/ // I = 5; /*Number of crossbar input ports*/ // O = 5; /*Number of crossbar output ports*/ - NTi = 12.5*technology*1e-6/2; - PTi = 25*technology*1e-6/2; + NTi = 12.5 * technology * 1e-6 / 2; + PTi = 25 * technology * 1e-6 / 2; - NTid = 60*technology*1e-6/2; //m - PTid = 120*technology*1e-6/2; // m - NTod = 60*technology*1e-6/2; // m - PTod = 120*technology*1e-6/2; // m + NTid = 60 * technology * 1e-6 / 2; //m + PTid = 120 * technology * 1e-6 / 2; // m + NTod = 60 * technology * 1e-6 / 2; // m + PTod = 120 * technology * 1e-6 / 2; // m - calc_router_parameters(); + calc_router_parameters(); } -Router::~Router(){} +Router::~Router() {} double //wire cap with triple spacing Router::Cw3(double length) { - Wire wc(g_ip->wt, length, 1, 3, 3); - return (wc.wire_cap(length)); + Wire wc(g_ip->wt, length, 1, 3, 3); + return (wc.wire_cap(length)); } /*Function to calculate the gate capacitance*/ double Router::gate_cap(double w) { - return (double) gate_C (w*1e6 /*u*/, 0); + return (double) gate_C (w*1e6 /*u*/, 0); } /*Function to calculate the diffusion capacitance*/ double Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, - double s /*number of stacking transistors*/) { - return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); + double s /*number of stacking transistors*/) { + return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); } @@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, // Model for simple transmission gate double Router::transmission_buf_inpcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } double Router::transmission_buf_outcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } double Router::transmission_buf_ctrcap() { - return gate_cap(NTtr)+gate_cap(PTtr); + return gate_cap(NTtr) + gate_cap(PTtr); } double Router::crossbar_inpline() { - return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + - gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); + return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + + gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); } double Router::crossbar_outline() { - return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + - gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); + return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + + gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); } double Router::crossbar_ctrline() { - return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + - diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + - gate_cap(NTi) + gate_cap(PTi)); + return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + + diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + + gate_cap(NTi) + gate_cap(PTi)); } double Router::tr_crossbar_power() { - return (crossbar_inpline()*Vdd*Vdd*flit_size/2 + - crossbar_outline()*Vdd*Vdd*flit_size/2)*2; + return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 + + crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2; } -void Router::buffer_stats() -{ - DynamicParameter dyn_p; - dyn_p.is_tag = false; - dyn_p.pure_cam = false; - dyn_p.fully_assoc = false; - dyn_p.pure_ram = true; - dyn_p.is_dram = false; - dyn_p.is_main_mem = false; - dyn_p.num_subarrays = 1; - dyn_p.num_mats = 1; - dyn_p.Ndbl = 1; - dyn_p.Ndwl = 1; - dyn_p.Nspd = 1; - dyn_p.deg_bl_muxing = 1; - dyn_p.deg_senseamp_muxing_non_associativity = 1; - dyn_p.Ndsam_lev_1 = 1; - dyn_p.Ndsam_lev_2 = 1; - dyn_p.Ndcm = 1; - dyn_p.number_addr_bits_mat = 8; - dyn_p.number_way_select_signals_mat = 1; - dyn_p.number_subbanks_decode = 0; - dyn_p.num_act_mats_hor_dir = 1; - dyn_p.V_b_sense = Vdd; // FIXME check power calc. - dyn_p.ram_cell_tech_type = 0; - dyn_p.num_r_subarray = (int) vc_buffer_size; - dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; - dyn_p.num_mats_h_dir = 1; - dyn_p.num_mats_v_dir = 1; - dyn_p.num_do_b_subbank = (int)flit_size; - dyn_p.num_di_b_subbank = (int)flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_bank_per_port = (int) flit_size; - dyn_p.num_di_b_bank_per_port = (int) flit_size; - dyn_p.out_w = (int) flit_size; +void Router::buffer_stats() { + DynamicParameter dyn_p; + dyn_p.is_tag = false; + dyn_p.pure_cam = false; + dyn_p.fully_assoc = false; + dyn_p.pure_ram = true; + dyn_p.is_dram = false; + dyn_p.is_main_mem = false; + dyn_p.num_subarrays = 1; + dyn_p.num_mats = 1; + dyn_p.Ndbl = 1; + dyn_p.Ndwl = 1; + dyn_p.Nspd = 1; + dyn_p.deg_bl_muxing = 1; + dyn_p.deg_senseamp_muxing_non_associativity = 1; + dyn_p.Ndsam_lev_1 = 1; + dyn_p.Ndsam_lev_2 = 1; + dyn_p.Ndcm = 1; + dyn_p.number_addr_bits_mat = 8; + dyn_p.number_way_select_signals_mat = 1; + dyn_p.number_subbanks_decode = 0; + dyn_p.num_act_mats_hor_dir = 1; + dyn_p.V_b_sense = Vdd; // FIXME check power calc. + dyn_p.ram_cell_tech_type = 0; + dyn_p.num_r_subarray = (int) vc_buffer_size; + dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; + dyn_p.num_mats_h_dir = 1; + dyn_p.num_mats_v_dir = 1; + dyn_p.num_do_b_subbank = (int)flit_size; + dyn_p.num_di_b_subbank = (int)flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_bank_per_port = (int) flit_size; + dyn_p.num_di_b_bank_per_port = (int) flit_size; + dyn_p.out_w = (int) flit_size; - dyn_p.use_inp_params = 1; - dyn_p.num_wr_ports = (unsigned int) vc_count; - dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book - dyn_p.num_rw_ports = 0; - dyn_p.num_se_rd_ports =0; - dyn_p.num_search_ports =0; + dyn_p.use_inp_params = 1; + dyn_p.num_wr_ports = (unsigned int) vc_count; + dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book + dyn_p.num_rw_ports = 0; + dyn_p.num_se_rd_ports = 0; + dyn_p.num_search_ports = 0; - dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + - dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); - dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + - (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + - dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; + dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + + dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); + dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + + (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + + dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; - Mat buff(dyn_p); - buff.compute_delays(0); - buff.compute_power_energy(); - buffer.power.readOp = buff.power.readOp; - buffer.power.writeOp = buffer.power.readOp; //FIXME - buffer.area = buff.area; + Mat buff(dyn_p); + buff.compute_delays(0); + buff.compute_power_energy(); + buffer.power.readOp = buff.power.readOp; + buffer.power.writeOp = buffer.power.readOp; //FIXME + buffer.area = buff.area; } - void -Router::cb_stats () -{ - if (1) { - Crossbar c_b(I, O, flit_size); - c_b.compute_power(); - crossbar.delay = c_b.delay; - crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; - crossbar.power.readOp.leakage = c_b.power.readOp.leakage; - crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; - crossbar.area = c_b.area; +void +Router::cb_stats () { + if (1) { + Crossbar c_b(I, O, flit_size); + c_b.compute_power(); + crossbar.delay = c_b.delay; + crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; + crossbar.power.readOp.leakage = c_b.power.readOp.leakage; + crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; + crossbar.area = c_b.area; // c_b.print_crossbar(); - } - else { - crossbar.power.readOp.dynamic = tr_crossbar_power(); - crossbar.power.readOp.leakage = flit_size * I * O * - cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); - crossbar.power.readOp.gate_leakage = flit_size * I * O * - cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); - } + } else { + crossbar.power.readOp.dynamic = tr_crossbar_power(); + crossbar.power.readOp.leakage = flit_size * I * O * + cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); + crossbar.power.readOp.gate_leakage = flit_size * I * O * + cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); + } } void -Router::get_router_power() -{ - /* calculate buffer stats */ - buffer_stats(); +Router::get_router_power() { + /* calculate buffer stats */ + buffer_stats(); - /* calculate cross-bar stats */ - cb_stats(); + /* calculate cross-bar stats */ + cb_stats(); - /* calculate arbiter stats */ - Arbiter vcarb(vc_count, flit_size, buffer.area.w); - Arbiter cbarb(I, flit_size, crossbar.area.w); - vcarb.compute_power(); - cbarb.compute_power(); - arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + - cbarb.power.readOp.dynamic * O; - arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + - cbarb.power.readOp.leakage * O; - arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + - cbarb.power.readOp.gate_leakage * O; + /* calculate arbiter stats */ + Arbiter vcarb(vc_count, flit_size, buffer.area.w); + Arbiter cbarb(I, flit_size, crossbar.area.w); + vcarb.compute_power(); + cbarb.compute_power(); + arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + + cbarb.power.readOp.dynamic * O; + arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + + cbarb.power.readOp.leakage * O; + arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + + cbarb.power.readOp.gate_leakage * O; // arb_stats(); - power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) + - crossbar.power.readOp.dynamic + - arbiter.power.readOp.dynamic)*MIN(I, O)*M; - double pppm_t[4] = {1,I,I,1}; - power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg; + power.readOp.dynamic = ((buffer.power.readOp.dynamic + + buffer.power.writeOp.dynamic) + + crossbar.power.readOp.dynamic + + arbiter.power.readOp.dynamic) * MIN(I, O) * M; + double pppm_t[4] = {1, I, I, 1}; + power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) * + pppm_lkg; } - void -Router::get_router_delay () -{ - FREQUENCY=5; // move this to config file --TODO - cycle_time = (1/(double)FREQUENCY)*1e3; //ps - delay = 4; - max_cyc = 17 * g_tp.FO4; //s - max_cyc *= 1e12; //ps - if (cycle_time < max_cyc) { - FREQUENCY = (1/max_cyc)*1e3; //GHz - } +void +Router::get_router_delay () { + FREQUENCY = 5; // move this to config file --TODO + cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps + delay = 4; + max_cyc = 17 * g_tp.FO4; //s + max_cyc *= 1e12; //ps + if (cycle_time < max_cyc) { + FREQUENCY = (1 / max_cyc) * 1e3; //GHz + } } - void -Router::get_router_area() -{ - area.h = I*buffer.area.h; - area.w = buffer.area.w+crossbar.area.w; +void +Router::get_router_area() { + area.h = I * buffer.area.h; + area.w = buffer.area.w + crossbar.area.w; } - void -Router::calc_router_parameters() -{ - /* calculate router frequency and pipeline cycles */ - get_router_delay(); +void +Router::calc_router_parameters() { + /* calculate router frequency and pipeline cycles */ + get_router_delay(); - /* router power stats */ - get_router_power(); + /* router power stats */ + get_router_power(); - /* area stats */ - get_router_area(); + /* area stats */ + get_router_area(); } - void -Router::print_router() -{ - cout << "\n\nRouter stats:\n"; - cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n"; - cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n"; - cout << "\tNo. of Virtual channels - " << vc_count << "\n"; - cout << "\tNo. of pipeline stages - " << delay << endl; - cout << "\tLink bandwidth - " << flit_size << " (bits)\n"; - cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n"; - cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n"; - cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n"; - cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n"; - cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n"; - cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; + uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; - technology = technology * 1000.0; // in the unit of nm + technology = technology * 1000.0; // in the unit of nm - // initialize parameters - g_tp.reset(); - double gmp_to_gmn_multiplier_periph_global = 0; + // initialize parameters + g_tp.reset(); + double gmp_to_gmn_multiplier_periph_global = 0; - double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, - curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, - curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, - curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; - double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data - curr_asp_ratio_cell_cam; - double SENSE_AMP_D, SENSE_AMP_P; // J - double area_cell_dram = 0; - double asp_ratio_cell_dram = 0; - double area_cell_sram = 0; - double asp_ratio_cell_sram = 0; - double area_cell_cam = 0; - double asp_ratio_cell_cam = 0; - double mobility_eff_periph_global = 0; - double Vdsat_periph_global = 0; - double nmos_effective_resistance_multiplier; - double width_dram_access_transistor; + double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, + curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, + curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, + curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; + double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data + curr_asp_ratio_cell_cam; + double SENSE_AMP_D, SENSE_AMP_P; // J + double area_cell_dram = 0; + double asp_ratio_cell_dram = 0; + double area_cell_sram = 0; + double asp_ratio_cell_sram = 0; + double area_cell_cam = 0; + double asp_ratio_cell_cam = 0; + double mobility_eff_periph_global = 0; + double Vdsat_periph_global = 0; + double nmos_effective_resistance_multiplier; + double width_dram_access_transistor; - double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date - double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn - double curr_chip_layout_overhead = 0; - double curr_macro_layout_overhead = 0; - double curr_sckt_co_eff = 0; + double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date + double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn + double curr_chip_layout_overhead = 0; + double curr_macro_layout_overhead = 0; + double curr_sckt_co_eff = 0; - if (technology < 181 && technology > 179) - { + if (technology < 181 && technology > 179) { tech_lo = 180; tech_hi = 180; - } - else if (technology < 91 && technology > 89) - { - tech_lo = 90; - tech_hi = 90; - } - else if (technology < 66 && technology > 64) - { - tech_lo = 65; - tech_hi = 65; - } - else if (technology < 46 && technology > 44) - { - tech_lo = 45; - tech_hi = 45; - } - else if (technology < 33 && technology > 31) - { - tech_lo = 32; - tech_hi = 32; - } - else if (technology < 23 && technology > 21) - { - tech_lo = 22; - tech_hi = 22; - if (ram_cell_tech_type == 3 ) - { - cout<<"current version does not support eDRAM technologies at 22nm"< 15) -// { -// tech_lo = 16; -// tech_hi = 16; -// } - else if (technology < 180 && technology > 90) - { - tech_lo = 180; - tech_hi = 90; - } - else if (technology < 90 && technology > 65) - { - tech_lo = 90; - tech_hi = 65; - } - else if (technology < 65 && technology > 45) - { - tech_lo = 65; - tech_hi = 45; - } - else if (technology < 45 && technology > 32) - { - tech_lo = 45; - tech_hi = 32; - } - else if (technology < 32 && technology > 22) - { - tech_lo = 32; - tech_hi = 22; + } else if (technology < 91 && technology > 89) { + tech_lo = 90; + tech_hi = 90; + } else if (technology < 66 && technology > 64) { + tech_lo = 65; + tech_hi = 65; + } else if (technology < 46 && technology > 44) { + tech_lo = 45; + tech_hi = 45; + } else if (technology < 33 && technology > 31) { + tech_lo = 32; + tech_hi = 32; + } else if (technology < 23 && technology > 21) { + tech_lo = 22; + tech_hi = 22; + if (ram_cell_tech_type == 3 ) { + cout << "current version does not support eDRAM technologies at " + << "22nm" << endl; + exit(0); + } + } else if (technology < 180 && technology > 90) { + tech_lo = 180; + tech_hi = 90; + } else if (technology < 90 && technology > 65) { + tech_lo = 90; + tech_hi = 65; + } else if (technology < 65 && technology > 45) { + tech_lo = 65; + tech_hi = 45; + } else if (technology < 45 && technology > 32) { + tech_lo = 45; + tech_hi = 32; + } else if (technology < 32 && technology > 22) { + tech_lo = 32; + tech_hi = 22; } // else if (technology < 22 && technology > 16) // { // tech_lo = 22; // tech_hi = 16; // } - else - { - cout<<"Invalid technology nodes"<F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm - curr_core_tx_density = 1.25*0.7*0.7*0.4; - curr_sckt_co_eff = 1.11; - curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 + curr_asp_ratio_cell_cam = 2.92;//2.5 + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm + curr_core_tx_density = 1.25 * 0.7 * 0.7 * 0.4; + curr_sckt_co_eff = 1.11; + curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb - } + } - if (tech == 90) - { - SENSE_AMP_D = .28e-9; // s - SENSE_AMP_P = 14.7e-15; // J - //90nm technology-node. Corresponds to year 2004 in ITRS - //ITRS HP device type - vdd[0] = 1.2; - Lphy[0] = 0.037;//Lphy is the physical gate-length. micron - Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron - t_ox[0] = 1.2e-3;//micron - v_th[0] = 0.23707;//V - c_ox[0] = 1.79e-14;//F/micron2 - mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 0.128; //V - c_g_ideal[0] = 6.64e-16;//F/micron - c_fringe[0] = 0.08e-15;//F/micron - c_junc[0] = 1e-15;//F/micron2 - I_on_n[0] = 1076.9e-6;//A/micron - I_on_p[0] = 712.6e-6;//A/micron - //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline - nmos_effective_resistance_multiplier = 1.54; - n_to_p_eff_curr_drv_ratio[0] = 2.45; - gmp_to_gmn_multiplier[0] = 1.22; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1; - I_off_n[0][0] = 3.24e-8;//A/micron - I_off_n[0][10] = 4.01e-8; - I_off_n[0][20] = 4.90e-8; - I_off_n[0][30] = 5.92e-8; - I_off_n[0][40] = 7.08e-8; - I_off_n[0][50] = 8.38e-8; - I_off_n[0][60] = 9.82e-8; - I_off_n[0][70] = 1.14e-7; - I_off_n[0][80] = 1.29e-7; - I_off_n[0][90] = 1.43e-7; - I_off_n[0][100] = 1.54e-7; + if (tech == 90) { + SENSE_AMP_D = .28e-9; // s + SENSE_AMP_P = 14.7e-15; // J + //90nm technology-node. Corresponds to year 2004 in ITRS + //ITRS HP device type + vdd[0] = 1.2; + Lphy[0] = 0.037;//Lphy is the physical gate-length. micron + Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron + t_ox[0] = 1.2e-3;//micron + v_th[0] = 0.23707;//V + c_ox[0] = 1.79e-14;//F/micron2 + mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 0.128; //V + c_g_ideal[0] = 6.64e-16;//F/micron + c_fringe[0] = 0.08e-15;//F/micron + c_junc[0] = 1e-15;//F/micron2 + I_on_n[0] = 1076.9e-6;//A/micron + I_on_p[0] = 712.6e-6;//A/micron + //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline + nmos_effective_resistance_multiplier = 1.54; + n_to_p_eff_curr_drv_ratio[0] = 2.45; + gmp_to_gmn_multiplier[0] = 1.22; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1; + I_off_n[0][0] = 3.24e-8;//A/micron + I_off_n[0][10] = 4.01e-8; + I_off_n[0][20] = 4.90e-8; + I_off_n[0][30] = 5.92e-8; + I_off_n[0][40] = 7.08e-8; + I_off_n[0][50] = 8.38e-8; + I_off_n[0][60] = 9.82e-8; + I_off_n[0][70] = 1.14e-7; + I_off_n[0][80] = 1.29e-7; + I_off_n[0][90] = 1.43e-7; + I_off_n[0][100] = 1.54e-7; - I_g_on_n[0][0] = 1.65e-8;//A/micron - I_g_on_n[0][10] = 1.65e-8; - I_g_on_n[0][20] = 1.65e-8; - I_g_on_n[0][30] = 1.65e-8; - I_g_on_n[0][40] = 1.65e-8; - I_g_on_n[0][50] = 1.65e-8; - I_g_on_n[0][60] = 1.65e-8; - I_g_on_n[0][70] = 1.65e-8; - I_g_on_n[0][80] = 1.65e-8; - I_g_on_n[0][90] = 1.65e-8; - I_g_on_n[0][100] = 1.65e-8; + I_g_on_n[0][0] = 1.65e-8;//A/micron + I_g_on_n[0][10] = 1.65e-8; + I_g_on_n[0][20] = 1.65e-8; + I_g_on_n[0][30] = 1.65e-8; + I_g_on_n[0][40] = 1.65e-8; + I_g_on_n[0][50] = 1.65e-8; + I_g_on_n[0][60] = 1.65e-8; + I_g_on_n[0][70] = 1.65e-8; + I_g_on_n[0][80] = 1.65e-8; + I_g_on_n[0][90] = 1.65e-8; + I_g_on_n[0][100] = 1.65e-8; - //ITRS LSTP device type - vdd[1] = 1.3; - Lphy[1] = 0.075; - Lelec[1] = 0.0486; - t_ox[1] = 2.2e-3; - v_th[1] = 0.48203; - c_ox[1] = 1.22e-14; - mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.373; - c_g_ideal[1] = 9.15e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 503.6e-6; - I_on_p[1] = 235.1e-6; - nmos_effective_resistance_multiplier = 1.92; - n_to_p_eff_curr_drv_ratio[1] = 2.44; - gmp_to_gmn_multiplier[1] =0.88; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1; - I_off_n[1][0] = 2.81e-12; - I_off_n[1][10] = 4.76e-12; - I_off_n[1][20] = 7.82e-12; - I_off_n[1][30] = 1.25e-11; - I_off_n[1][40] = 1.94e-11; - I_off_n[1][50] = 2.94e-11; - I_off_n[1][60] = 4.36e-11; - I_off_n[1][70] = 6.32e-11; - I_off_n[1][80] = 8.95e-11; - I_off_n[1][90] = 1.25e-10; - I_off_n[1][100] = 1.7e-10; + //ITRS LSTP device type + vdd[1] = 1.3; + Lphy[1] = 0.075; + Lelec[1] = 0.0486; + t_ox[1] = 2.2e-3; + v_th[1] = 0.48203; + c_ox[1] = 1.22e-14; + mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 0.373; + c_g_ideal[1] = 9.15e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 503.6e-6; + I_on_p[1] = 235.1e-6; + nmos_effective_resistance_multiplier = 1.92; + n_to_p_eff_curr_drv_ratio[1] = 2.44; + gmp_to_gmn_multiplier[1] = 0.88; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1; + I_off_n[1][0] = 2.81e-12; + I_off_n[1][10] = 4.76e-12; + I_off_n[1][20] = 7.82e-12; + I_off_n[1][30] = 1.25e-11; + I_off_n[1][40] = 1.94e-11; + I_off_n[1][50] = 2.94e-11; + I_off_n[1][60] = 4.36e-11; + I_off_n[1][70] = 6.32e-11; + I_off_n[1][80] = 8.95e-11; + I_off_n[1][90] = 1.25e-10; + I_off_n[1][100] = 1.7e-10; - I_g_on_n[1][0] = 3.87e-11;//A/micron - I_g_on_n[1][10] = 3.87e-11; - I_g_on_n[1][20] = 3.87e-11; - I_g_on_n[1][30] = 3.87e-11; - I_g_on_n[1][40] = 3.87e-11; - I_g_on_n[1][50] = 3.87e-11; - I_g_on_n[1][60] = 3.87e-11; - I_g_on_n[1][70] = 3.87e-11; - I_g_on_n[1][80] = 3.87e-11; - I_g_on_n[1][90] = 3.87e-11; - I_g_on_n[1][100] = 3.87e-11; + I_g_on_n[1][0] = 3.87e-11;//A/micron + I_g_on_n[1][10] = 3.87e-11; + I_g_on_n[1][20] = 3.87e-11; + I_g_on_n[1][30] = 3.87e-11; + I_g_on_n[1][40] = 3.87e-11; + I_g_on_n[1][50] = 3.87e-11; + I_g_on_n[1][60] = 3.87e-11; + I_g_on_n[1][70] = 3.87e-11; + I_g_on_n[1][80] = 3.87e-11; + I_g_on_n[1][90] = 3.87e-11; + I_g_on_n[1][100] = 3.87e-11; - //ITRS LOP device type - vdd[2] = 0.9; - Lphy[2] = 0.053; - Lelec[2] = 0.0354; - t_ox[2] = 1.5e-3; - v_th[2] = 0.30764; - c_ox[2] = 1.59e-14; - mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.113; - c_g_ideal[2] = 8.45e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 386.6e-6; - I_on_p[2] = 209.7e-6; - nmos_effective_resistance_multiplier = 1.77; - n_to_p_eff_curr_drv_ratio[2] = 2.54; - gmp_to_gmn_multiplier[2] = 0.98; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1; - I_off_n[2][0] = 2.14e-9; - I_off_n[2][10] = 2.9e-9; - I_off_n[2][20] = 3.87e-9; - I_off_n[2][30] = 5.07e-9; - I_off_n[2][40] = 6.54e-9; - I_off_n[2][50] = 8.27e-8; - I_off_n[2][60] = 1.02e-7; - I_off_n[2][70] = 1.20e-7; - I_off_n[2][80] = 1.36e-8; - I_off_n[2][90] = 1.52e-8; - I_off_n[2][100] = 1.73e-8; + //ITRS LOP device type + vdd[2] = 0.9; + Lphy[2] = 0.053; + Lelec[2] = 0.0354; + t_ox[2] = 1.5e-3; + v_th[2] = 0.30764; + c_ox[2] = 1.59e-14; + mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 0.113; + c_g_ideal[2] = 8.45e-16; + c_fringe[2] = 0.08e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 386.6e-6; + I_on_p[2] = 209.7e-6; + nmos_effective_resistance_multiplier = 1.77; + n_to_p_eff_curr_drv_ratio[2] = 2.54; + gmp_to_gmn_multiplier[2] = 0.98; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1; + I_off_n[2][0] = 2.14e-9; + I_off_n[2][10] = 2.9e-9; + I_off_n[2][20] = 3.87e-9; + I_off_n[2][30] = 5.07e-9; + I_off_n[2][40] = 6.54e-9; + I_off_n[2][50] = 8.27e-8; + I_off_n[2][60] = 1.02e-7; + I_off_n[2][70] = 1.20e-7; + I_off_n[2][80] = 1.36e-8; + I_off_n[2][90] = 1.52e-8; + I_off_n[2][100] = 1.73e-8; - I_g_on_n[2][0] = 4.31e-8;//A/micron - I_g_on_n[2][10] = 4.31e-8; - I_g_on_n[2][20] = 4.31e-8; - I_g_on_n[2][30] = 4.31e-8; - I_g_on_n[2][40] = 4.31e-8; - I_g_on_n[2][50] = 4.31e-8; - I_g_on_n[2][60] = 4.31e-8; - I_g_on_n[2][70] = 4.31e-8; - I_g_on_n[2][80] = 4.31e-8; - I_g_on_n[2][90] = 4.31e-8; - I_g_on_n[2][100] = 4.31e-8; + I_g_on_n[2][0] = 4.31e-8;//A/micron + I_g_on_n[2][10] = 4.31e-8; + I_g_on_n[2][20] = 4.31e-8; + I_g_on_n[2][30] = 4.31e-8; + I_g_on_n[2][40] = 4.31e-8; + I_g_on_n[2][50] = 4.31e-8; + I_g_on_n[2][60] = 4.31e-8; + I_g_on_n[2][70] = 4.31e-8; + I_g_on_n[2][80] = 4.31e-8; + I_g_on_n[2][90] = 4.31e-8; + I_g_on_n[2][100] = 4.31e-8; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.4545; - width_dram_access_transistor = 0.14; - curr_I_on_dram_cell = 45e-6; - curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.168; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.2; + Lphy[3] = 0.12; + Lelec[3] = 0.0756; + curr_v_th_dram_access_transistor = 0.4545; + width_dram_access_transistor = 0.14; + curr_I_on_dram_cell = 45e-6; + curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 0.168; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.4545; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.3; - c_g_ideal[3] = 1.47e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 321.6e-6; - I_on_p[3] = 203.3e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.42e-11; - I_off_n[3][10] = 2.25e-11; - I_off_n[3][20] = 3.46e-11; - I_off_n[3][30] = 5.18e-11; - I_off_n[3][40] = 7.58e-11; - I_off_n[3][50] = 1.08e-10; - I_off_n[3][60] = 1.51e-10; - I_off_n[3][70] = 2.02e-10; - I_off_n[3][80] = 2.57e-10; - I_off_n[3][90] = 3.14e-10; - I_off_n[3][100] = 3.85e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.6; - Lphy[3] = 0.09; - Lelec[3] = 0.0576; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.09*0.09; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; + //LP-DRAM wordline transistor parameters + curr_vpp = 1.6; + t_ox[3] = 2.2e-3; + v_th[3] = 0.4545; + c_ox[3] = 1.22e-14; + mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.3; + c_g_ideal[3] = 1.47e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 321.6e-6; + I_on_p[3] = 203.3e-6; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.42e-11; + I_off_n[3][10] = 2.25e-11; + I_off_n[3][20] = 3.46e-11; + I_off_n[3][30] = 5.18e-11; + I_off_n[3][40] = 7.58e-11; + I_off_n[3][50] = 1.08e-10; + I_off_n[3][60] = 1.51e-10; + I_off_n[3][70] = 2.02e-10; + I_off_n[3][80] = 2.57e-10; + I_off_n[3][90] = 3.14e-10; + I_off_n[3][100] = 3.85e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.6; + Lphy[3] = 0.09; + Lelec[3] = 0.0576; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.09; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.09 * 0.09; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.7; - t_ox[3] = 5.5e-3; - v_th[3] = 1.0; - c_ox[3] = 5.65e-15; - mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.32; - c_g_ideal[3] = 5.08e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1094.3e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.62; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 5.80e-15; - I_off_n[3][10] = 1.21e-14; - I_off_n[3][20] = 2.42e-14; - I_off_n[3][30] = 4.65e-14; - I_off_n[3][40] = 8.60e-14; - I_off_n[3][50] = 1.54e-13; - I_off_n[3][60] = 2.66e-13; - I_off_n[3][70] = 4.45e-13; - I_off_n[3][80] = 7.17e-13; - I_off_n[3][90] = 1.11e-12; - I_off_n[3][100] = 1.67e-12; - } + //COMM-DRAM wordline transistor parameters + curr_vpp = 3.7; + t_ox[3] = 5.5e-3; + v_th[3] = 1.0; + c_ox[3] = 5.65e-15; + mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.32; + c_g_ideal[3] = 5.08e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1094.3e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.62; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 5.80e-15; + I_off_n[3][10] = 1.21e-14; + I_off_n[3][20] = 2.42e-14; + I_off_n[3][30] = 4.65e-14; + I_off_n[3][40] = 8.60e-14; + I_off_n[3][50] = 1.54e-13; + I_off_n[3][60] = 2.66e-13; + I_off_n[3][70] = 4.45e-13; + I_off_n[3][80] = 7.17e-13; + I_off_n[3][90] = 1.11e-12; + I_off_n[3][100] = 1.67e-12; + } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1; - curr_core_tx_density = 1.25*0.7*0.7; - curr_sckt_co_eff = 1.1539; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 + curr_asp_ratio_cell_cam = 2.92;//2.5 + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1; + curr_core_tx_density = 1.25 * 0.7 * 0.7; + curr_sckt_co_eff = 1.1539; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + } - if (tech == 65) - { //65nm technology-node. Corresponds to year 2007 in ITRS - //ITRS HP device type - SENSE_AMP_D = .2e-9; // s - SENSE_AMP_P = 5.7e-15; // J - vdd[0] = 1.1; - Lphy[0] = 0.025; - Lelec[0] = 0.019; - t_ox[0] = 1.1e-3; - v_th[0] = .19491; - c_ox[0] = 1.88e-14; - mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 7.71e-2; - c_g_ideal[0] = 4.69e-16; - c_fringe[0] = 0.077e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 1197.2e-6; - I_on_p[0] = 870.8e-6; - nmos_effective_resistance_multiplier = 1.50; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.74; - //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first - //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. - I_off_n[0][0] = 1.96e-7; - I_off_n[0][10] = 2.29e-7; - I_off_n[0][20] = 2.66e-7; - I_off_n[0][30] = 3.05e-7; - I_off_n[0][40] = 3.49e-7; - I_off_n[0][50] = 3.95e-7; - I_off_n[0][60] = 4.45e-7; - I_off_n[0][70] = 4.97e-7; - I_off_n[0][80] = 5.48e-7; - I_off_n[0][90] = 5.94e-7; - I_off_n[0][100] = 6.3e-7; - I_g_on_n[0][0] = 4.09e-8;//A/micron - I_g_on_n[0][10] = 4.09e-8; - I_g_on_n[0][20] = 4.09e-8; - I_g_on_n[0][30] = 4.09e-8; - I_g_on_n[0][40] = 4.09e-8; - I_g_on_n[0][50] = 4.09e-8; - I_g_on_n[0][60] = 4.09e-8; - I_g_on_n[0][70] = 4.09e-8; - I_g_on_n[0][80] = 4.09e-8; - I_g_on_n[0][90] = 4.09e-8; - I_g_on_n[0][100] = 4.09e-8; + if (tech == 65) { + //65nm technology-node. Corresponds to year 2007 in ITRS + //ITRS HP device type + SENSE_AMP_D = .2e-9; // s + SENSE_AMP_P = 5.7e-15; // J + vdd[0] = 1.1; + Lphy[0] = 0.025; + Lelec[0] = 0.019; + t_ox[0] = 1.1e-3; + v_th[0] = .19491; + c_ox[0] = 1.88e-14; + mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 7.71e-2; + c_g_ideal[0] = 4.69e-16; + c_fringe[0] = 0.077e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 1197.2e-6; + I_on_p[0] = 870.8e-6; + nmos_effective_resistance_multiplier = 1.50; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; + long_channel_leakage_reduction[0] = 1 / 3.74; + //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first + //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. + I_off_n[0][0] = 1.96e-7; + I_off_n[0][10] = 2.29e-7; + I_off_n[0][20] = 2.66e-7; + I_off_n[0][30] = 3.05e-7; + I_off_n[0][40] = 3.49e-7; + I_off_n[0][50] = 3.95e-7; + I_off_n[0][60] = 4.45e-7; + I_off_n[0][70] = 4.97e-7; + I_off_n[0][80] = 5.48e-7; + I_off_n[0][90] = 5.94e-7; + I_off_n[0][100] = 6.3e-7; + I_g_on_n[0][0] = 4.09e-8;//A/micron + I_g_on_n[0][10] = 4.09e-8; + I_g_on_n[0][20] = 4.09e-8; + I_g_on_n[0][30] = 4.09e-8; + I_g_on_n[0][40] = 4.09e-8; + I_g_on_n[0][50] = 4.09e-8; + I_g_on_n[0][60] = 4.09e-8; + I_g_on_n[0][70] = 4.09e-8; + I_g_on_n[0][80] = 4.09e-8; + I_g_on_n[0][90] = 4.09e-8; + I_g_on_n[0][100] = 4.09e-8; - //ITRS LSTP device type - vdd[1] = 1.2; - Lphy[1] = 0.045; - Lelec[1] = 0.0298; - t_ox[1] = 1.9e-3; - v_th[1] = 0.52354; - c_ox[1] = 1.36e-14; - mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.128; - c_g_ideal[1] = 6.14e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 519.2e-6; - I_on_p[1] = 266e-6; - nmos_effective_resistance_multiplier = 1.96; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.82; - I_off_n[1][0] = 9.12e-12; - I_off_n[1][10] = 1.49e-11; - I_off_n[1][20] = 2.36e-11; - I_off_n[1][30] = 3.64e-11; - I_off_n[1][40] = 5.48e-11; - I_off_n[1][50] = 8.05e-11; - I_off_n[1][60] = 1.15e-10; - I_off_n[1][70] = 1.59e-10; - I_off_n[1][80] = 2.1e-10; - I_off_n[1][90] = 2.62e-10; - I_off_n[1][100] = 3.21e-10; + //ITRS LSTP device type + vdd[1] = 1.2; + Lphy[1] = 0.045; + Lelec[1] = 0.0298; + t_ox[1] = 1.9e-3; + v_th[1] = 0.52354; + c_ox[1] = 1.36e-14; + mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 0.128; + c_g_ideal[1] = 6.14e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 519.2e-6; + I_on_p[1] = 266e-6; + nmos_effective_resistance_multiplier = 1.96; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 2.82; + I_off_n[1][0] = 9.12e-12; + I_off_n[1][10] = 1.49e-11; + I_off_n[1][20] = 2.36e-11; + I_off_n[1][30] = 3.64e-11; + I_off_n[1][40] = 5.48e-11; + I_off_n[1][50] = 8.05e-11; + I_off_n[1][60] = 1.15e-10; + I_off_n[1][70] = 1.59e-10; + I_off_n[1][80] = 2.1e-10; + I_off_n[1][90] = 2.62e-10; + I_off_n[1][100] = 3.21e-10; - I_g_on_n[1][0] = 1.09e-10;//A/micron - I_g_on_n[1][10] = 1.09e-10; - I_g_on_n[1][20] = 1.09e-10; - I_g_on_n[1][30] = 1.09e-10; - I_g_on_n[1][40] = 1.09e-10; - I_g_on_n[1][50] = 1.09e-10; - I_g_on_n[1][60] = 1.09e-10; - I_g_on_n[1][70] = 1.09e-10; - I_g_on_n[1][80] = 1.09e-10; - I_g_on_n[1][90] = 1.09e-10; - I_g_on_n[1][100] = 1.09e-10; + I_g_on_n[1][0] = 1.09e-10;//A/micron + I_g_on_n[1][10] = 1.09e-10; + I_g_on_n[1][20] = 1.09e-10; + I_g_on_n[1][30] = 1.09e-10; + I_g_on_n[1][40] = 1.09e-10; + I_g_on_n[1][50] = 1.09e-10; + I_g_on_n[1][60] = 1.09e-10; + I_g_on_n[1][70] = 1.09e-10; + I_g_on_n[1][80] = 1.09e-10; + I_g_on_n[1][90] = 1.09e-10; + I_g_on_n[1][100] = 1.09e-10; - //ITRS LOP device type - vdd[2] = 0.8; - Lphy[2] = 0.032; - Lelec[2] = 0.0216; - t_ox[2] = 1.2e-3; - v_th[2] = 0.28512; - c_ox[2] = 1.87e-14; - mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.292; - c_g_ideal[2] = 6e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 573.1e-6; - I_on_p[2] = 340.6e-6; - nmos_effective_resistance_multiplier = 1.82; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/2.05; - I_off_n[2][0] = 4.9e-9; - I_off_n[2][10] = 6.49e-9; - I_off_n[2][20] = 8.45e-9; - I_off_n[2][30] = 1.08e-8; - I_off_n[2][40] = 1.37e-8; - I_off_n[2][50] = 1.71e-8; - I_off_n[2][60] = 2.09e-8; - I_off_n[2][70] = 2.48e-8; - I_off_n[2][80] = 2.84e-8; - I_off_n[2][90] = 3.13e-8; - I_off_n[2][100] = 3.42e-8; + //ITRS LOP device type + vdd[2] = 0.8; + Lphy[2] = 0.032; + Lelec[2] = 0.0216; + t_ox[2] = 1.2e-3; + v_th[2] = 0.28512; + c_ox[2] = 1.87e-14; + mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 0.292; + c_g_ideal[2] = 6e-16; + c_fringe[2] = 0.08e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 573.1e-6; + I_on_p[2] = 340.6e-6; + nmos_effective_resistance_multiplier = 1.82; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 2.05; + I_off_n[2][0] = 4.9e-9; + I_off_n[2][10] = 6.49e-9; + I_off_n[2][20] = 8.45e-9; + I_off_n[2][30] = 1.08e-8; + I_off_n[2][40] = 1.37e-8; + I_off_n[2][50] = 1.71e-8; + I_off_n[2][60] = 2.09e-8; + I_off_n[2][70] = 2.48e-8; + I_off_n[2][80] = 2.84e-8; + I_off_n[2][90] = 3.13e-8; + I_off_n[2][100] = 3.42e-8; - I_g_on_n[2][0] = 9.61e-9;//A/micron - I_g_on_n[2][10] = 9.61e-9; - I_g_on_n[2][20] = 9.61e-9; - I_g_on_n[2][30] = 9.61e-9; - I_g_on_n[2][40] = 9.61e-9; - I_g_on_n[2][50] = 9.61e-9; - I_g_on_n[2][60] = 9.61e-9; - I_g_on_n[2][70] = 9.61e-9; - I_g_on_n[2][80] = 9.61e-9; - I_g_on_n[2][90] = 9.61e-9; - I_g_on_n[2][100] = 9.61e-9; + I_g_on_n[2][0] = 9.61e-9;//A/micron + I_g_on_n[2][10] = 9.61e-9; + I_g_on_n[2][20] = 9.61e-9; + I_g_on_n[2][30] = 9.61e-9; + I_g_on_n[2][40] = 9.61e-9; + I_g_on_n[2][50] = 9.61e-9; + I_g_on_n[2][60] = 9.61e-9; + I_g_on_n[2][70] = 9.61e-9; + I_g_on_n[2][80] = 9.61e-9; + I_g_on_n[2][90] = 9.61e-9; + I_g_on_n[2][100] = 9.61e-9; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.43806; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.11; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.2; + Lphy[3] = 0.12; + Lelec[3] = 0.0756; + curr_v_th_dram_access_transistor = 0.43806; + width_dram_access_transistor = 0.09; + curr_I_on_dram_cell = 36e-6; + curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 0.11; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.43806; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.43806; - c_g_ideal[3] = 1.46e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 399.8e-6; - I_on_p[3] = 243.4e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.23e-11; - I_off_n[3][10] = 3.46e-11; - I_off_n[3][20] = 5.24e-11; - I_off_n[3][30] = 7.75e-11; - I_off_n[3][40] = 1.12e-10; - I_off_n[3][50] = 1.58e-10; - I_off_n[3][60] = 2.18e-10; - I_off_n[3][70] = 2.88e-10; - I_off_n[3][80] = 3.63e-10; - I_off_n[3][90] = 4.41e-10; - I_off_n[3][100] = 5.36e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.3; - Lphy[3] = 0.065; - Lelec[3] = 0.0426; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.065; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.065*0.065; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; + //LP-DRAM wordline transistor parameters + curr_vpp = 1.6; + t_ox[3] = 2.2e-3; + v_th[3] = 0.43806; + c_ox[3] = 1.22e-14; + mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.43806; + c_g_ideal[3] = 1.46e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15 ; + I_on_n[3] = 399.8e-6; + I_on_p[3] = 243.4e-6; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 2.23e-11; + I_off_n[3][10] = 3.46e-11; + I_off_n[3][20] = 5.24e-11; + I_off_n[3][30] = 7.75e-11; + I_off_n[3][40] = 1.12e-10; + I_off_n[3][50] = 1.58e-10; + I_off_n[3][60] = 2.18e-10; + I_off_n[3][70] = 2.88e-10; + I_off_n[3][80] = 3.63e-10; + I_off_n[3][90] = 4.41e-10; + I_off_n[3][100] = 5.36e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.3; + Lphy[3] = 0.065; + Lelec[3] = 0.0426; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.065; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.065 * 0.065; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.3; - t_ox[3] = 5e-3; - v_th[3] = 1.0; - c_ox[3] = 6.16e-15; - mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.385; - c_g_ideal[3] = 4e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 1031e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 2.39; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.80e-14; - I_off_n[3][10] = 3.64e-14; - I_off_n[3][20] = 7.03e-14; - I_off_n[3][30] = 1.31e-13; - I_off_n[3][40] = 2.35e-13; - I_off_n[3][50] = 4.09e-13; - I_off_n[3][60] = 6.89e-13; - I_off_n[3][70] = 1.13e-12; - I_off_n[3][80] = 1.78e-12; - I_off_n[3][90] = 2.71e-12; - I_off_n[3][100] = 3.99e-12; - } + //COMM-DRAM wordline transistor parameters + curr_vpp = 3.3; + t_ox[3] = 5e-3; + v_th[3] = 1.0; + c_ox[3] = 6.16e-15; + mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.385; + c_g_ideal[3] = 4e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15 ; + I_on_n[3] = 1031e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 2.39; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.80e-14; + I_off_n[3][10] = 3.64e-14; + I_off_n[3][20] = 7.03e-14; + I_off_n[3][30] = 1.31e-13; + I_off_n[3][40] = 2.35e-13; + I_off_n[3][50] = 4.09e-13; + I_off_n[3][60] = 6.89e-13; + I_off_n[3][70] = 1.13e-12; + I_off_n[3][80] = 1.78e-12; + I_off_n[3][90] = 2.71e-12; + I_off_n[3][100] = 3.99e-12; + } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor - curr_core_tx_density = 1.25*0.7; - curr_sckt_co_eff = 1.1359; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor + curr_core_tx_density = 1.25 * 0.7; + curr_sckt_co_eff = 1.1359; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - if (tech == 45) - { //45nm technology-node. Corresponds to year 2010 in ITRS - //ITRS HP device type - SENSE_AMP_D = .04e-9; // s - SENSE_AMP_P = 2.7e-15; // J - vdd[0] = 1.0; - Lphy[0] = 0.018; - Lelec[0] = 0.01345; - t_ox[0] = 0.65e-3; - v_th[0] = .18035; - c_ox[0] = 3.77e-14; - mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.38E-2; - c_g_ideal[0] = 6.78e-16; - c_fringe[0] = 0.05e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2046.6e-6; - //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of - //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm - I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI - nmos_effective_resistance_multiplier = 1.51; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 - I_off_n[0][0] = 2.8e-7; - I_off_n[0][10] = 3.28e-7; - I_off_n[0][20] = 3.81e-7; - I_off_n[0][30] = 4.39e-7; - I_off_n[0][40] = 5.02e-7; - I_off_n[0][50] = 5.69e-7; - I_off_n[0][60] = 6.42e-7; - I_off_n[0][70] = 7.2e-7; - I_off_n[0][80] = 8.03e-7; - I_off_n[0][90] = 8.91e-7; - I_off_n[0][100] = 9.84e-7; + if (tech == 45) { + //45nm technology-node. Corresponds to year 2010 in ITRS + //ITRS HP device type + SENSE_AMP_D = .04e-9; // s + SENSE_AMP_P = 2.7e-15; // J + vdd[0] = 1.0; + Lphy[0] = 0.018; + Lelec[0] = 0.01345; + t_ox[0] = 0.65e-3; + v_th[0] = .18035; + c_ox[0] = 3.77e-14; + mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 9.38E-2; + c_g_ideal[0] = 6.78e-16; + c_fringe[0] = 0.05e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 2046.6e-6; + //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of + //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm + I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI + nmos_effective_resistance_multiplier = 1.51; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; + //Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, + //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 + long_channel_leakage_reduction[0] = 1 / 3.546; + I_off_n[0][0] = 2.8e-7; + I_off_n[0][10] = 3.28e-7; + I_off_n[0][20] = 3.81e-7; + I_off_n[0][30] = 4.39e-7; + I_off_n[0][40] = 5.02e-7; + I_off_n[0][50] = 5.69e-7; + I_off_n[0][60] = 6.42e-7; + I_off_n[0][70] = 7.2e-7; + I_off_n[0][80] = 8.03e-7; + I_off_n[0][90] = 8.91e-7; + I_off_n[0][100] = 9.84e-7; - I_g_on_n[0][0] = 3.59e-8;//A/micron - I_g_on_n[0][10] = 3.59e-8; - I_g_on_n[0][20] = 3.59e-8; - I_g_on_n[0][30] = 3.59e-8; - I_g_on_n[0][40] = 3.59e-8; - I_g_on_n[0][50] = 3.59e-8; - I_g_on_n[0][60] = 3.59e-8; - I_g_on_n[0][70] = 3.59e-8; - I_g_on_n[0][80] = 3.59e-8; - I_g_on_n[0][90] = 3.59e-8; - I_g_on_n[0][100] = 3.59e-8; + I_g_on_n[0][0] = 3.59e-8;//A/micron + I_g_on_n[0][10] = 3.59e-8; + I_g_on_n[0][20] = 3.59e-8; + I_g_on_n[0][30] = 3.59e-8; + I_g_on_n[0][40] = 3.59e-8; + I_g_on_n[0][50] = 3.59e-8; + I_g_on_n[0][60] = 3.59e-8; + I_g_on_n[0][70] = 3.59e-8; + I_g_on_n[0][80] = 3.59e-8; + I_g_on_n[0][90] = 3.59e-8; + I_g_on_n[0][100] = 3.59e-8; - //ITRS LSTP device type - vdd[1] = 1.1; - Lphy[1] = 0.028; - Lelec[1] = 0.0212; - t_ox[1] = 1.4e-3; - v_th[1] = 0.50245; - c_ox[1] = 2.01e-14; - mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 9.12e-2; - c_g_ideal[1] = 5.18e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 666.2e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.08; - I_off_n[1][0] = 1.01e-11; - I_off_n[1][10] = 1.65e-11; - I_off_n[1][20] = 2.62e-11; - I_off_n[1][30] = 4.06e-11; - I_off_n[1][40] = 6.12e-11; - I_off_n[1][50] = 9.02e-11; - I_off_n[1][60] = 1.3e-10; - I_off_n[1][70] = 1.83e-10; - I_off_n[1][80] = 2.51e-10; - I_off_n[1][90] = 3.29e-10; - I_off_n[1][100] = 4.1e-10; + //ITRS LSTP device type + vdd[1] = 1.1; + Lphy[1] = 0.028; + Lelec[1] = 0.0212; + t_ox[1] = 1.4e-3; + v_th[1] = 0.50245; + c_ox[1] = 2.01e-14; + mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 9.12e-2; + c_g_ideal[1] = 5.18e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 666.2e-6; + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 2.08; + I_off_n[1][0] = 1.01e-11; + I_off_n[1][10] = 1.65e-11; + I_off_n[1][20] = 2.62e-11; + I_off_n[1][30] = 4.06e-11; + I_off_n[1][40] = 6.12e-11; + I_off_n[1][50] = 9.02e-11; + I_off_n[1][60] = 1.3e-10; + I_off_n[1][70] = 1.83e-10; + I_off_n[1][80] = 2.51e-10; + I_off_n[1][90] = 3.29e-10; + I_off_n[1][100] = 4.1e-10; - I_g_on_n[1][0] = 9.47e-12;//A/micron - I_g_on_n[1][10] = 9.47e-12; - I_g_on_n[1][20] = 9.47e-12; - I_g_on_n[1][30] = 9.47e-12; - I_g_on_n[1][40] = 9.47e-12; - I_g_on_n[1][50] = 9.47e-12; - I_g_on_n[1][60] = 9.47e-12; - I_g_on_n[1][70] = 9.47e-12; - I_g_on_n[1][80] = 9.47e-12; - I_g_on_n[1][90] = 9.47e-12; - I_g_on_n[1][100] = 9.47e-12; + I_g_on_n[1][0] = 9.47e-12;//A/micron + I_g_on_n[1][10] = 9.47e-12; + I_g_on_n[1][20] = 9.47e-12; + I_g_on_n[1][30] = 9.47e-12; + I_g_on_n[1][40] = 9.47e-12; + I_g_on_n[1][50] = 9.47e-12; + I_g_on_n[1][60] = 9.47e-12; + I_g_on_n[1][70] = 9.47e-12; + I_g_on_n[1][80] = 9.47e-12; + I_g_on_n[1][90] = 9.47e-12; + I_g_on_n[1][100] = 9.47e-12; - //ITRS LOP device type - vdd[2] = 0.7; - Lphy[2] = 0.022; - Lelec[2] = 0.016; - t_ox[2] = 0.9e-3; - v_th[2] = 0.22599; - c_ox[2] = 2.82e-14;//F/micron2 - mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 5.71e-2; - c_g_ideal[2] = 6.2e-16; - c_fringe[2] = 0.073e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 748.9e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.76; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.92; - I_off_n[2][0] = 4.03e-9; - I_off_n[2][10] = 5.02e-9; - I_off_n[2][20] = 6.18e-9; - I_off_n[2][30] = 7.51e-9; - I_off_n[2][40] = 9.04e-9; - I_off_n[2][50] = 1.08e-8; - I_off_n[2][60] = 1.27e-8; - I_off_n[2][70] = 1.47e-8; - I_off_n[2][80] = 1.66e-8; - I_off_n[2][90] = 1.84e-8; - I_off_n[2][100] = 2.03e-8; + //ITRS LOP device type + vdd[2] = 0.7; + Lphy[2] = 0.022; + Lelec[2] = 0.016; + t_ox[2] = 0.9e-3; + v_th[2] = 0.22599; + c_ox[2] = 2.82e-14;//F/micron2 + mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 5.71e-2; + c_g_ideal[2] = 6.2e-16; + c_fringe[2] = 0.073e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 748.9e-6; + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.76; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 1.92; + I_off_n[2][0] = 4.03e-9; + I_off_n[2][10] = 5.02e-9; + I_off_n[2][20] = 6.18e-9; + I_off_n[2][30] = 7.51e-9; + I_off_n[2][40] = 9.04e-9; + I_off_n[2][50] = 1.08e-8; + I_off_n[2][60] = 1.27e-8; + I_off_n[2][70] = 1.47e-8; + I_off_n[2][80] = 1.66e-8; + I_off_n[2][90] = 1.84e-8; + I_off_n[2][100] = 2.03e-8; - I_g_on_n[2][0] = 3.24e-8;//A/micron - I_g_on_n[2][10] = 4.01e-8; - I_g_on_n[2][20] = 4.90e-8; - I_g_on_n[2][30] = 5.92e-8; - I_g_on_n[2][40] = 7.08e-8; - I_g_on_n[2][50] = 8.38e-8; - I_g_on_n[2][60] = 9.82e-8; - I_g_on_n[2][70] = 1.14e-7; - I_g_on_n[2][80] = 1.29e-7; - I_g_on_n[2][90] = 1.43e-7; - I_g_on_n[2][100] = 1.54e-7; + I_g_on_n[2][0] = 3.24e-8;//A/micron + I_g_on_n[2][10] = 4.01e-8; + I_g_on_n[2][20] = 4.90e-8; + I_g_on_n[2][30] = 5.92e-8; + I_g_on_n[2][40] = 7.08e-8; + I_g_on_n[2][50] = 8.38e-8; + I_g_on_n[2][60] = 9.82e-8; + I_g_on_n[2][70] = 1.14e-7; + I_g_on_n[2][80] = 1.29e-7; + I_g_on_n[2][90] = 1.43e-7; + I_g_on_n[2][100] = 1.54e-7; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.078; - Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44559; - width_dram_access_transistor = 0.079; - curr_I_on_dram_cell = 36e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.1; + Lphy[3] = 0.078; + Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 0.44559; + width_dram_access_transistor = 0.079; + curr_I_on_dram_cell = 36e-6;//A + curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2.1e-3; - v_th[3] = 0.44559; - c_ox[3] = 1.41e-14; - mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.181; - c_g_ideal[3] = 1.10e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 456e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.54e-11; - I_off_n[3][10] = 3.94e-11; - I_off_n[3][20] = 5.95e-11; - I_off_n[3][30] = 8.79e-11; - I_off_n[3][40] = 1.27e-10; - I_off_n[3][50] = 1.79e-10; - I_off_n[3][60] = 2.47e-10; - I_off_n[3][70] = 3.31e-10; - I_off_n[3][80] = 4.26e-10; - I_off_n[3][90] = 5.27e-10; - I_off_n[3][100] = 6.46e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.045; - Lelec[3] = 0.0298; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.045; - curr_I_on_dram_cell = 20e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.045*0.045; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; + //LP-DRAM wordline transistor parameters + curr_vpp = 1.5; + t_ox[3] = 2.1e-3; + v_th[3] = 0.44559; + c_ox[3] = 1.41e-14; + mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.181; + c_g_ideal[3] = 1.10e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 456e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 2.54e-11; + I_off_n[3][10] = 3.94e-11; + I_off_n[3][20] = 5.95e-11; + I_off_n[3][30] = 8.79e-11; + I_off_n[3][40] = 1.27e-10; + I_off_n[3][50] = 1.79e-10; + I_off_n[3][60] = 2.47e-10; + I_off_n[3][70] = 3.31e-10; + I_off_n[3][80] = 4.26e-10; + I_off_n[3][90] = 5.27e-10; + I_off_n[3][100] = 6.46e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.1; + Lphy[3] = 0.045; + Lelec[3] = 0.0298; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.045; + curr_I_on_dram_cell = 20e-6;//A + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.045 * 0.045; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.7; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.98e-15; - mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.147; - c_g_ideal[3] = 3.59e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 999.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.31e-14; - I_off_n[3][10] = 2.68e-14; - I_off_n[3][20] = 5.25e-14; - I_off_n[3][30] = 9.88e-14; - I_off_n[3][40] = 1.79e-13; - I_off_n[3][50] = 3.15e-13; - I_off_n[3][60] = 5.36e-13; - I_off_n[3][70] = 8.86e-13; - I_off_n[3][80] = 1.42e-12; - I_off_n[3][90] = 2.20e-12; - I_off_n[3][100] = 3.29e-12; - } + //COMM-DRAM wordline transistor parameters + curr_vpp = 2.7; + t_ox[3] = 4e-3; + v_th[3] = 1.0; + c_ox[3] = 7.98e-15; + mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.147; + c_g_ideal[3] = 3.59e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 999.4e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.31e-14; + I_off_n[3][10] = 2.68e-14; + I_off_n[3][20] = 5.25e-14; + I_off_n[3][30] = 9.88e-14; + I_off_n[3][40] = 1.79e-13; + I_off_n[3][50] = 3.15e-13; + I_off_n[3][60] = 5.36e-13; + I_off_n[3][70] = 8.86e-13; + I_off_n[3][80] = 1.42e-12; + I_off_n[3][90] = 2.20e-12; + I_off_n[3][100] = 3.29e-12; + } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7; - curr_core_tx_density = 1.25; - curr_sckt_co_eff = 1.1387; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7; + curr_core_tx_density = 1.25; + curr_sckt_co_eff = 1.1387; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - if (tech == 32) - { - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm - //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for - //HP and LSTP. - vdd[0] = 0.9; - Lphy[0] = 0.013; - Lelec[0] = 0.01013; - t_ox[0] = 0.5e-3; - v_th[0] = 0.21835; - c_ox[0] = 4.11e-14; - mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 5.09E-2; - c_g_ideal[0] = 5.34e-16; - c_fringe[0] = 0.04e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2211.7e-6; - I_on_p[0] = I_on_n[0] / 2; - nmos_effective_resistance_multiplier = 1.49; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.706; - //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), - //whichever comes first - I_off_n[0][0] = 1.52e-7; - I_off_n[0][10] = 1.55e-7; - I_off_n[0][20] = 1.59e-7; - I_off_n[0][30] = 1.68e-7; - I_off_n[0][40] = 1.90e-7; - I_off_n[0][50] = 2.69e-7; - I_off_n[0][60] = 5.32e-7; - I_off_n[0][70] = 1.02e-6; - I_off_n[0][80] = 1.62e-6; - I_off_n[0][90] = 2.73e-6; - I_off_n[0][100] = 6.1e-6; + if (tech == 32) { + SENSE_AMP_D = .03e-9; // s + SENSE_AMP_P = 2.16e-15; // J + //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm + //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for + //HP and LSTP. + vdd[0] = 0.9; + Lphy[0] = 0.013; + Lelec[0] = 0.01013; + t_ox[0] = 0.5e-3; + v_th[0] = 0.21835; + c_ox[0] = 4.11e-14; + mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 5.09E-2; + c_g_ideal[0] = 5.34e-16; + c_fringe[0] = 0.04e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 2211.7e-6; + I_on_p[0] = I_on_n[0] / 2; + nmos_effective_resistance_multiplier = 1.49; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.706; + //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), + //whichever comes first + I_off_n[0][0] = 1.52e-7; + I_off_n[0][10] = 1.55e-7; + I_off_n[0][20] = 1.59e-7; + I_off_n[0][30] = 1.68e-7; + I_off_n[0][40] = 1.90e-7; + I_off_n[0][50] = 2.69e-7; + I_off_n[0][60] = 5.32e-7; + I_off_n[0][70] = 1.02e-6; + I_off_n[0][80] = 1.62e-6; + I_off_n[0][90] = 2.73e-6; + I_off_n[0][100] = 6.1e-6; - I_g_on_n[0][0] = 6.55e-8;//A/micron - I_g_on_n[0][10] = 6.55e-8; - I_g_on_n[0][20] = 6.55e-8; - I_g_on_n[0][30] = 6.55e-8; - I_g_on_n[0][40] = 6.55e-8; - I_g_on_n[0][50] = 6.55e-8; - I_g_on_n[0][60] = 6.55e-8; - I_g_on_n[0][70] = 6.55e-8; - I_g_on_n[0][80] = 6.55e-8; - I_g_on_n[0][90] = 6.55e-8; - I_g_on_n[0][100] = 6.55e-8; + I_g_on_n[0][0] = 6.55e-8;//A/micron + I_g_on_n[0][10] = 6.55e-8; + I_g_on_n[0][20] = 6.55e-8; + I_g_on_n[0][30] = 6.55e-8; + I_g_on_n[0][40] = 6.55e-8; + I_g_on_n[0][50] = 6.55e-8; + I_g_on_n[0][60] = 6.55e-8; + I_g_on_n[0][70] = 6.55e-8; + I_g_on_n[0][80] = 6.55e-8; + I_g_on_n[0][90] = 6.55e-8; + I_g_on_n[0][100] = 6.55e-8; -// 32 DG -// I_g_on_n[0][0] = 2.71e-9;//A/micron -// I_g_on_n[0][10] = 2.71e-9; -// I_g_on_n[0][20] = 2.71e-9; -// I_g_on_n[0][30] = 2.71e-9; -// I_g_on_n[0][40] = 2.71e-9; -// I_g_on_n[0][50] = 2.71e-9; -// I_g_on_n[0][60] = 2.71e-9; -// I_g_on_n[0][70] = 2.71e-9; -// I_g_on_n[0][80] = 2.71e-9; -// I_g_on_n[0][90] = 2.71e-9; -// I_g_on_n[0][100] = 2.71e-9; + //LSTP device type + vdd[1] = 1; + Lphy[1] = 0.020; + Lelec[1] = 0.0173; + t_ox[1] = 1.2e-3; + v_th[1] = 0.513; + c_ox[1] = 2.29e-14; + mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 8.64e-2; + c_g_ideal[1] = 4.58e-16; + c_fringe[1] = 0.053e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 683.6e-6; + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 1.93; + I_off_n[1][0] = 2.06e-11; + I_off_n[1][10] = 3.30e-11; + I_off_n[1][20] = 5.15e-11; + I_off_n[1][30] = 7.83e-11; + I_off_n[1][40] = 1.16e-10; + I_off_n[1][50] = 1.69e-10; + I_off_n[1][60] = 2.40e-10; + I_off_n[1][70] = 3.34e-10; + I_off_n[1][80] = 4.54e-10; + I_off_n[1][90] = 5.96e-10; + I_off_n[1][100] = 7.44e-10; - //LSTP device type - vdd[1] = 1; - Lphy[1] = 0.020; - Lelec[1] = 0.0173; - t_ox[1] = 1.2e-3; - v_th[1] = 0.513; - c_ox[1] = 2.29e-14; - mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 8.64e-2; - c_g_ideal[1] = 4.58e-16; - c_fringe[1] = 0.053e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 683.6e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/1.93; - I_off_n[1][0] = 2.06e-11; - I_off_n[1][10] = 3.30e-11; - I_off_n[1][20] = 5.15e-11; - I_off_n[1][30] = 7.83e-11; - I_off_n[1][40] = 1.16e-10; - I_off_n[1][50] = 1.69e-10; - I_off_n[1][60] = 2.40e-10; - I_off_n[1][70] = 3.34e-10; - I_off_n[1][80] = 4.54e-10; - I_off_n[1][90] = 5.96e-10; - I_off_n[1][100] = 7.44e-10; + I_g_on_n[1][0] = 3.73e-11;//A/micron + I_g_on_n[1][10] = 3.73e-11; + I_g_on_n[1][20] = 3.73e-11; + I_g_on_n[1][30] = 3.73e-11; + I_g_on_n[1][40] = 3.73e-11; + I_g_on_n[1][50] = 3.73e-11; + I_g_on_n[1][60] = 3.73e-11; + I_g_on_n[1][70] = 3.73e-11; + I_g_on_n[1][80] = 3.73e-11; + I_g_on_n[1][90] = 3.73e-11; + I_g_on_n[1][100] = 3.73e-11; - I_g_on_n[1][0] = 3.73e-11;//A/micron - I_g_on_n[1][10] = 3.73e-11; - I_g_on_n[1][20] = 3.73e-11; - I_g_on_n[1][30] = 3.73e-11; - I_g_on_n[1][40] = 3.73e-11; - I_g_on_n[1][50] = 3.73e-11; - I_g_on_n[1][60] = 3.73e-11; - I_g_on_n[1][70] = 3.73e-11; - I_g_on_n[1][80] = 3.73e-11; - I_g_on_n[1][90] = 3.73e-11; - I_g_on_n[1][100] = 3.73e-11; + //LOP device type + vdd[2] = 0.6; + Lphy[2] = 0.016; + Lelec[2] = 0.01232; + t_ox[2] = 0.9e-3; + v_th[2] = 0.24227; + c_ox[2] = 2.84e-14; + mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 4.64e-2; + c_g_ideal[2] = 4.54e-16; + c_fringe[2] = 0.057e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 827.8e-6; + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.73; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 1.89; + I_off_n[2][0] = 5.94e-8; + I_off_n[2][10] = 7.23e-8; + I_off_n[2][20] = 8.7e-8; + I_off_n[2][30] = 1.04e-7; + I_off_n[2][40] = 1.22e-7; + I_off_n[2][50] = 1.43e-7; + I_off_n[2][60] = 1.65e-7; + I_off_n[2][70] = 1.90e-7; + I_off_n[2][80] = 2.15e-7; + I_off_n[2][90] = 2.39e-7; + I_off_n[2][100] = 2.63e-7; + I_g_on_n[2][0] = 2.93e-9;//A/micron + I_g_on_n[2][10] = 2.93e-9; + I_g_on_n[2][20] = 2.93e-9; + I_g_on_n[2][30] = 2.93e-9; + I_g_on_n[2][40] = 2.93e-9; + I_g_on_n[2][50] = 2.93e-9; + I_g_on_n[2][60] = 2.93e-9; + I_g_on_n[2][70] = 2.93e-9; + I_g_on_n[2][80] = 2.93e-9; + I_g_on_n[2][90] = 2.93e-9; + I_g_on_n[2][100] = 2.93e-9; - //LOP device type - vdd[2] = 0.6; - Lphy[2] = 0.016; - Lelec[2] = 0.01232; - t_ox[2] = 0.9e-3; - v_th[2] = 0.24227; - c_ox[2] = 2.84e-14; - mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 4.64e-2; - c_g_ideal[2] = 4.54e-16; - c_fringe[2] = 0.057e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 827.8e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.89; - I_off_n[2][0] = 5.94e-8; - I_off_n[2][10] = 7.23e-8; - I_off_n[2][20] = 8.7e-8; - I_off_n[2][30] = 1.04e-7; - I_off_n[2][40] = 1.22e-7; - I_off_n[2][50] = 1.43e-7; - I_off_n[2][60] = 1.65e-7; - I_off_n[2][70] = 1.90e-7; - I_off_n[2][80] = 2.15e-7; - I_off_n[2][90] = 2.39e-7; - I_off_n[2][100] = 2.63e-7; + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.0; + Lphy[3] = 0.056; + Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 0.44129; + width_dram_access_transistor = 0.056; + curr_I_on_dram_cell = 36e-6; + curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; - I_g_on_n[2][0] = 2.93e-9;//A/micron - I_g_on_n[2][10] = 2.93e-9; - I_g_on_n[2][20] = 2.93e-9; - I_g_on_n[2][30] = 2.93e-9; - I_g_on_n[2][40] = 2.93e-9; - I_g_on_n[2][50] = 2.93e-9; - I_g_on_n[2][60] = 2.93e-9; - I_g_on_n[2][70] = 2.93e-9; - I_g_on_n[2][80] = 2.93e-9; - I_g_on_n[2][90] = 2.93e-9; - I_g_on_n[2][100] = 2.93e-9; + //LP-DRAM wordline transistor parameters + curr_vpp = 1.5; + t_ox[3] = 2e-3; + v_th[3] = 0.44467; + c_ox[3] = 1.48e-14; + mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.174; + c_g_ideal[3] = 7.45e-16; + c_fringe[3] = 0.053e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1055.4e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 3.57e-11; + I_off_n[3][10] = 5.51e-11; + I_off_n[3][20] = 8.27e-11; + I_off_n[3][30] = 1.21e-10; + I_off_n[3][40] = 1.74e-10; + I_off_n[3][50] = 2.45e-10; + I_off_n[3][60] = 3.38e-10; + I_off_n[3][70] = 4.53e-10; + I_off_n[3][80] = 5.87e-10; + I_off_n[3][90] = 7.29e-10; + I_off_n[3][100] = 8.87e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.0; + Lphy[3] = 0.032; + Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.032; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.032 * 0.032; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.056; - Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44129; - width_dram_access_transistor = 0.056; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; + //COMM-DRAM wordline transistor parameters + curr_vpp = 2.6; + t_ox[3] = 4e-3; + v_th[3] = 1.0; + c_ox[3] = 7.99e-15; + mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.129; + c_g_ideal[3] = 2.56e-16; + c_fringe[3] = 0.053e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1024.5e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 3.63e-14; + I_off_n[3][10] = 7.18e-14; + I_off_n[3][20] = 1.36e-13; + I_off_n[3][30] = 2.49e-13; + I_off_n[3][40] = 4.41e-13; + I_off_n[3][50] = 7.55e-13; + I_off_n[3][60] = 1.26e-12; + I_off_n[3][70] = 2.03e-12; + I_off_n[3][80] = 3.19e-12; + I_off_n[3][90] = 4.87e-12; + I_off_n[3][100] = 7.16e-12; + } - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2e-3; - v_th[3] = 0.44467; - c_ox[3] = 1.48e-14; - mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.174; - c_g_ideal[3] = 7.45e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1055.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.57e-11; - I_off_n[3][10] = 5.51e-11; - I_off_n[3][20] = 8.27e-11; - I_off_n[3][30] = 1.21e-10; - I_off_n[3][40] = 1.74e-10; - I_off_n[3][50] = 2.45e-10; - I_off_n[3][60] = 3.38e-10; - I_off_n[3][70] = 4.53e-10; - I_off_n[3][80] = 5.87e-10; - I_off_n[3][90] = 7.29e-10; - I_off_n[3][100] = 8.87e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.032; - Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.032; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.032*0.032; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7; + curr_sckt_co_eff = 1.1111; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.6; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.99e-15; - mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.129; - c_g_ideal[3] = 2.56e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1024.5e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.63e-14; - I_off_n[3][10] = 7.18e-14; - I_off_n[3][20] = 1.36e-13; - I_off_n[3][30] = 2.49e-13; - I_off_n[3][40] = 4.41e-13; - I_off_n[3][50] = 7.55e-13; - I_off_n[3][60] = 1.26e-12; - I_off_n[3][70] = 2.03e-12; - I_off_n[3][80] = 3.19e-12; - I_off_n[3][90] = 4.87e-12; - I_off_n[3][100] = 7.16e-12; - } + if (tech == 22) { + SENSE_AMP_D = .03e-9; // s + SENSE_AMP_P = 2.16e-15; // J + //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm + //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. + //22 nm HP + vdd[0] = 0.8; + Lphy[0] = 0.009;//Lphy is the physical gate-length. + Lelec[0] = 0.00468;//Lelec is the electrical gate-length. + t_ox[0] = 0.55e-3;//micron + v_th[0] = 0.1395;//V + c_ox[0] = 3.63e-14;//F/micron2 + mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 2.33e-2; //V/micron + c_g_ideal[0] = 3.27e-16;//F/micron + c_fringe[0] = 0.06e-15;//F/micron + c_junc[0] = 0;//F/micron2 + I_on_n[0] = 2626.4e-6;//A/micron + I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = 1.45; + n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.274; + //From 22nm, leakage current are directly from ITRS report rather + //than MASTAR, since MASTAR has serious bugs there. + I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2; + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2; + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2; + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2; + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2; + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2; + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2; + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2; + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2; + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2; + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2; + //for 22nm DG HP + I_g_on_n[0][0] = 1.81e-9;//A/micron + I_g_on_n[0][10] = 1.81e-9; + I_g_on_n[0][20] = 1.81e-9; + I_g_on_n[0][30] = 1.81e-9; + I_g_on_n[0][40] = 1.81e-9; + I_g_on_n[0][50] = 1.81e-9; + I_g_on_n[0][60] = 1.81e-9; + I_g_on_n[0][70] = 1.81e-9; + I_g_on_n[0][80] = 1.81e-9; + I_g_on_n[0][90] = 1.81e-9; + I_g_on_n[0][100] = 1.81e-9; - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7; - curr_sckt_co_eff = 1.1111; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + //22 nm LSTP DG + vdd[1] = 0.8; + Lphy[1] = 0.014; + Lelec[1] = 0.008;//Lelec is the electrical gate-length. + t_ox[1] = 1.1e-3;//micron + v_th[1] = 0.40126;//V + c_ox[1] = 2.30e-14;//F/micron2 + mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[1] = 6.64e-2; //V/micron + c_g_ideal[1] = 3.22e-16;//F/micron + c_fringe[1] = 0.08e-15; + c_junc[1] = 0;//F/micron2 + I_on_n[1] = 727.6e-6;//A/micron + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron + long_channel_leakage_reduction[1] = 1 / 1.89; + I_off_n[1][0] = 2.43e-11; + I_off_n[1][10] = 4.85e-11; + I_off_n[1][20] = 9.68e-11; + I_off_n[1][30] = 1.94e-10; + I_off_n[1][40] = 3.87e-10; + I_off_n[1][50] = 7.73e-10; + I_off_n[1][60] = 3.55e-10; + I_off_n[1][70] = 3.09e-9; + I_off_n[1][80] = 6.19e-9; + I_off_n[1][90] = 1.24e-8; + I_off_n[1][100] = 2.48e-8; - if(tech == 22){ - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm - //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. - //22 nm HP - vdd[0] = 0.8; - Lphy[0] = 0.009;//Lphy is the physical gate-length. - Lelec[0] = 0.00468;//Lelec is the electrical gate-length. - t_ox[0] = 0.55e-3;//micron - v_th[0] = 0.1395;//V - c_ox[0] = 3.63e-14;//F/micron2 - mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 2.33e-2; //V/micron - c_g_ideal[0] = 3.27e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron - c_junc[0] = 0;//F/micron2 - I_on_n[0] = 2626.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.45; - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.274; - I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there. - I_off_n[0][10] = 1.55e-7/1.5*1.2; - I_off_n[0][20] = 1.59e-7/1.5*1.2; - I_off_n[0][30] = 1.68e-7/1.5*1.2; - I_off_n[0][40] = 1.90e-7/1.5*1.2; - I_off_n[0][50] = 2.69e-7/1.5*1.2; - I_off_n[0][60] = 5.32e-7/1.5*1.2; - I_off_n[0][70] = 1.02e-6/1.5*1.2; - I_off_n[0][80] = 1.62e-6/1.5*1.2; - I_off_n[0][90] = 2.73e-6/1.5*1.2; - I_off_n[0][100] = 6.1e-6/1.5*1.2; - //for 22nm DG HP - I_g_on_n[0][0] = 1.81e-9;//A/micron - I_g_on_n[0][10] = 1.81e-9; - I_g_on_n[0][20] = 1.81e-9; - I_g_on_n[0][30] = 1.81e-9; - I_g_on_n[0][40] = 1.81e-9; - I_g_on_n[0][50] = 1.81e-9; - I_g_on_n[0][60] = 1.81e-9; - I_g_on_n[0][70] = 1.81e-9; - I_g_on_n[0][80] = 1.81e-9; - I_g_on_n[0][90] = 1.81e-9; - I_g_on_n[0][100] = 1.81e-9; + I_g_on_n[1][0] = 4.51e-10;//A/micron + I_g_on_n[1][10] = 4.51e-10; + I_g_on_n[1][20] = 4.51e-10; + I_g_on_n[1][30] = 4.51e-10; + I_g_on_n[1][40] = 4.51e-10; + I_g_on_n[1][50] = 4.51e-10; + I_g_on_n[1][60] = 4.51e-10; + I_g_on_n[1][70] = 4.51e-10; + I_g_on_n[1][80] = 4.51e-10; + I_g_on_n[1][90] = 4.51e-10; + I_g_on_n[1][100] = 4.51e-10; - //22 nm LSTP DG - vdd[1] = 0.8; - Lphy[1] = 0.014; - Lelec[1] = 0.008;//Lelec is the electrical gate-length. - t_ox[1] = 1.1e-3;//micron - v_th[1] = 0.40126;//V - c_ox[1] = 2.30e-14;//F/micron2 - mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[1] = 6.64e-2; //V/micron - c_g_ideal[1] = 3.22e-16;//F/micron - c_fringe[1] = 0.08e-15; - c_junc[1] = 0;//F/micron2 - I_on_n[1] = 727.6e-6;//A/micron - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron - long_channel_leakage_reduction[1] = 1/1.89; - I_off_n[1][0] = 2.43e-11; - I_off_n[1][10] = 4.85e-11; - I_off_n[1][20] = 9.68e-11; - I_off_n[1][30] = 1.94e-10; - I_off_n[1][40] = 3.87e-10; - I_off_n[1][50] = 7.73e-10; - I_off_n[1][60] = 3.55e-10; - I_off_n[1][70] = 3.09e-9; - I_off_n[1][80] = 6.19e-9; - I_off_n[1][90] = 1.24e-8; - I_off_n[1][100]= 2.48e-8; + //22 nm LOP + vdd[2] = 0.6; + Lphy[2] = 0.011; + Lelec[2] = 0.00604;//Lelec is the electrical gate-length. + t_ox[2] = 0.8e-3;//micron + v_th[2] = 0.2315;//V + c_ox[2] = 2.87e-14;//F/micron2 + mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[2] = 1.81e-2; //V/micron + c_g_ideal[2] = 3.16e-16;//F/micron + c_fringe[2] = 0.08e-15; + c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab + I_on_n[2] = 916.1e-6;//A/micron + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.73; + n_to_p_eff_curr_drv_ratio[2] = 2; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron + long_channel_leakage_reduction[2] = 1 / 2.38; - I_g_on_n[1][0] = 4.51e-10;//A/micron - I_g_on_n[1][10] = 4.51e-10; - I_g_on_n[1][20] = 4.51e-10; - I_g_on_n[1][30] = 4.51e-10; - I_g_on_n[1][40] = 4.51e-10; - I_g_on_n[1][50] = 4.51e-10; - I_g_on_n[1][60] = 4.51e-10; - I_g_on_n[1][70] = 4.51e-10; - I_g_on_n[1][80] = 4.51e-10; - I_g_on_n[1][90] = 4.51e-10; - I_g_on_n[1][100] = 4.51e-10; + I_off_n[2][0] = 1.31e-8; + I_off_n[2][10] = 2.60e-8; + I_off_n[2][20] = 5.14e-8; + I_off_n[2][30] = 1.02e-7; + I_off_n[2][40] = 2.02e-7; + I_off_n[2][50] = 3.99e-7; + I_off_n[2][60] = 7.91e-7; + I_off_n[2][70] = 1.09e-6; + I_off_n[2][80] = 2.09e-6; + I_off_n[2][90] = 4.04e-6; + I_off_n[2][100] = 4.48e-6; - //22 nm LOP - vdd[2] = 0.6; - Lphy[2] = 0.011; - Lelec[2] = 0.00604;//Lelec is the electrical gate-length. - t_ox[2] = 0.8e-3;//micron - v_th[2] = 0.2315;//V - c_ox[2] = 2.87e-14;//F/micron2 - mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[2] = 1.81e-2; //V/micron - c_g_ideal[2] = 3.16e-16;//F/micron - c_fringe[2] = 0.08e-15; - c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab - I_on_n[2] = 916.1e-6;//A/micron - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron - long_channel_leakage_reduction[2] = 1/2.38; - - I_off_n[2][0] = 1.31e-8; - I_off_n[2][10] = 2.60e-8; - I_off_n[2][20] = 5.14e-8; - I_off_n[2][30] = 1.02e-7; - I_off_n[2][40] = 2.02e-7; - I_off_n[2][50] = 3.99e-7; - I_off_n[2][60] = 7.91e-7; - I_off_n[2][70] = 1.09e-6; - I_off_n[2][80] = 2.09e-6; - I_off_n[2][90] = 4.04e-6; - I_off_n[2][100]= 4.48e-6; - - I_g_on_n[2][0] = 2.74e-9;//A/micron - I_g_on_n[2][10] = 2.74e-9; - I_g_on_n[2][20] = 2.74e-9; - I_g_on_n[2][30] = 2.74e-9; - I_g_on_n[2][40] = 2.74e-9; - I_g_on_n[2][50] = 2.74e-9; - I_g_on_n[2][60] = 2.74e-9; - I_g_on_n[2][70] = 2.74e-9; - I_g_on_n[2][80] = 2.74e-9; - I_g_on_n[2][90] = 2.74e-9; - I_g_on_n[2][100] = 2.74e-9; + I_g_on_n[2][0] = 2.74e-9;//A/micron + I_g_on_n[2][10] = 2.74e-9; + I_g_on_n[2][20] = 2.74e-9; + I_g_on_n[2][30] = 2.74e-9; + I_g_on_n[2][40] = 2.74e-9; + I_g_on_n[2][50] = 2.74e-9; + I_g_on_n[2][60] = 2.74e-9; + I_g_on_n[2][70] = 2.74e-9; + I_g_on_n[2][80] = 2.74e-9; + I_g_on_n[2][90] = 2.74e-9; + I_g_on_n[2][100] = 2.74e-9; - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. + if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) { + //22 nm commodity DRAM cell access transistor technology parameters. //parameters curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In //2005 ITRS, the value was about twice the value in 2007 ITRS @@ -1486,12 +1423,12 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. + curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2. curr_asp_ratio_cell_dram = 0.667; curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus //kept constant. - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. + //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. curr_vpp = 2.3;//vpp. V t_ox[3] = 3.5e-3;//micron v_th[3] = 1.0;//V @@ -1522,130 +1459,80 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][90] = 1.18e-11; I_off_n[3][100] = 1.72e-11; - } - else - { - //some error handler + } else { + //some error handler + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + if (tech == 16) { + //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm + //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. + //16 nm HP + vdd[0] = 0.7; + Lphy[0] = 0.006;//Lphy is the physical gate-length. + Lelec[0] = 0.00315;//Lelec is the electrical gate-length. + t_ox[0] = 0.5e-3;//micron + v_th[0] = 0.1489;//V + c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR + mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet + c_g_ideal[0] = 2.30e-16;//F/micron + c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 + c_junc[0] = 0;//F/micron2 MASTAR result dynamic + I_on_n[0] = 2768.4e-6;//A/micron + I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. + n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 2.655; + I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * 1.07; + //for 16nm DG HP + I_g_on_n[0][0] = 1.07e-9;//A/micron + I_g_on_n[0][10] = 1.07e-9; + I_g_on_n[0][20] = 1.07e-9; + I_g_on_n[0][30] = 1.07e-9; + I_g_on_n[0][40] = 1.07e-9; + I_g_on_n[0][50] = 1.07e-9; + I_g_on_n[0][60] = 1.07e-9; + I_g_on_n[0][70] = 1.07e-9; + I_g_on_n[0][80] = 1.07e-9; + I_g_on_n[0][90] = 1.07e-9; + I_g_on_n[0][100] = 1.07e-9; - if(tech == 16){ - //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm - //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. - //16 nm HP - vdd[0] = 0.7; - Lphy[0] = 0.006;//Lphy is the physical gate-length. - Lelec[0] = 0.00315;//Lelec is the electrical gate-length. - t_ox[0] = 0.5e-3;//micron - v_th[0] = 0.1489;//V - c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR - mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet - c_g_ideal[0] = 2.30e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 - c_junc[0] = 0;//F/micron2 MASTAR result dynamic - I_on_n[0] = 2768.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/2.655; - I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07; - I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07; - I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07; - I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07; - I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07; - I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07; - I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07; - I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07; - I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07; - I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07; - I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07; - //for 16nm DG HP - I_g_on_n[0][0] = 1.07e-9;//A/micron - I_g_on_n[0][10] = 1.07e-9; - I_g_on_n[0][20] = 1.07e-9; - I_g_on_n[0][30] = 1.07e-9; - I_g_on_n[0][40] = 1.07e-9; - I_g_on_n[0][50] = 1.07e-9; - I_g_on_n[0][60] = 1.07e-9; - I_g_on_n[0][70] = 1.07e-9; - I_g_on_n[0][80] = 1.07e-9; - I_g_on_n[0][90] = 1.07e-9; - I_g_on_n[0][100] = 1.07e-9; - -// //16 nm LSTP DG -// vdd[1] = 0.8; -// Lphy[1] = 0.014; -// Lelec[1] = 0.008;//Lelec is the electrical gate-length. -// t_ox[1] = 1.1e-3;//micron -// v_th[1] = 0.40126;//V -// c_ox[1] = 2.30e-14;//F/micron2 -// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs -// Vdsat[1] = 6.64e-2; //V/micron -// c_g_ideal[1] = 3.22e-16;//F/micron -// c_fringe[1] = 0.008e-15; -// c_junc[1] = 0;//F/micron2 -// I_on_n[1] = 727.6e-6;//A/micron -// I_on_p[1] = I_on_n[1] / 2; -// nmos_effective_resistance_multiplier = 1.99; -// n_to_p_eff_curr_drv_ratio[1] = 2; -// gmp_to_gmn_multiplier[1] = 0.99; -// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron -// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron -// I_off_n[1][0] = 2.43e-11; -// I_off_n[1][10] = 4.85e-11; -// I_off_n[1][20] = 9.68e-11; -// I_off_n[1][30] = 1.94e-10; -// I_off_n[1][40] = 3.87e-10; -// I_off_n[1][50] = 7.73e-10; -// I_off_n[1][60] = 3.55e-10; -// I_off_n[1][70] = 3.09e-9; -// I_off_n[1][80] = 6.19e-9; -// I_off_n[1][90] = 1.24e-8; -// I_off_n[1][100]= 2.48e-8; -// -// // for 22nm LSTP HP -// I_g_on_n[1][0] = 4.51e-10;//A/micron -// I_g_on_n[1][10] = 4.51e-10; -// I_g_on_n[1][20] = 4.51e-10; -// I_g_on_n[1][30] = 4.51e-10; -// I_g_on_n[1][40] = 4.51e-10; -// I_g_on_n[1][50] = 4.51e-10; -// I_g_on_n[1][60] = 4.51e-10; -// I_g_on_n[1][70] = 4.51e-10; -// I_g_on_n[1][80] = 4.51e-10; -// I_g_on_n[1][90] = 4.51e-10; -// I_g_on_n[1][100] = 4.51e-10; - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. + if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) { + //22 nm commodity DRAM cell access transistor technology parameters. //parameters curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In //2005 ITRS, the value was about twice the value in 2007 ITRS @@ -1659,12 +1546,12 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. + curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2. curr_asp_ratio_cell_dram = 0.667; curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus //kept constant. - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. + //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. curr_vpp = 2.3;//vpp. V t_ox[3] = 3.5e-3;//micron v_th[3] = 1.0;//V @@ -1695,930 +1582,340 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][90] = 1.18e-11; I_off_n[3][100] = 1.72e-11; - } - else - { - //some error handler + } else { + //some error handler + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; + + g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; + g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; + g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; + g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; + g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; + g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; + g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; + g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; + g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; + g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; + g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; + g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; + g_tp.peri_global.n_to_p_eff_curr_drv_ratio + += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; + g_tp.peri_global.long_channel_leakage_reduction + += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; + g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; + + g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; + g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; + g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; + g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; + g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; + g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; + g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; + g_tp.vpp += curr_alpha * curr_vpp; + g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; + g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; + g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; + g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + + g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; + g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; + g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; + g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; + area_cell_dram += curr_alpha * curr_area_cell_dram; + asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; + + g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; + g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; + g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; + area_cell_sram += curr_alpha * curr_area_cell_sram; + asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; + + g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng + g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; + g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; + area_cell_cam += curr_alpha * curr_area_cell_cam; + asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; + + //Sense amplifier latch Gm calculation + mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; + Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; + //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; + g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; + g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; + g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; + g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; + } + + + //Currently we are not modeling the resistance/capacitance of poly anywhere. + //Continuous function (or date have been processed) does not need linear interpolation + g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process + g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process + g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; + g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; + g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; + g_tp.cell_h_def = 50 * g_ip->F_sz_um; + g_tp.w_poly_contact = g_ip->F_sz_um; + g_tp.spacing_poly_to_contact = g_ip->F_sz_um; + g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; + g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; + + g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; + g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; + //was 10 micron for the 0.8 micron process + g_tp.w_iso = 12.5 * g_ip->F_sz_um; + // sense amplifier N-trans; was 3 micron for the 0.8 micron process + g_tp.w_sense_n = 3.75 * g_ip->F_sz_um; + // sense amplifier P-trans; was 6 micron for the 0.8 micron process + g_tp.w_sense_p = 7.5 * g_ip->F_sz_um; + // Sense enable transistor of the sense amplifier; was 4 micron for the + //0.8 micron process + g_tp.w_sense_en = 5 * g_ip->F_sz_um; + g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; + g_tp.w_nmos_sa_mux= 6 * g_tp.min_w_nmos_; + + if (ram_cell_tech_type == comm_dram) { + g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; + g_tp.h_dec = 8; // in the unit of memory cell height + } else { + g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; + g_tp.h_dec = 4; // in the unit of memory cell height + } + + g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; + g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; + g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; + + g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; + g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; + //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; + + g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; + + double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; + double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; + g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; + + g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); + g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; + g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); + g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; + g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng + g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; + + g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; + g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; + g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng + pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; + g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; + + + double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; + + for (iter = 0; iter <= 1; ++iter) { + // linear interpolation + if (iter == 0) { + tech = tech_lo; + if (tech_lo == tech_hi) { + curr_alpha = 1; + } else { + curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi); + } + } else { + tech = tech_hi; + if (tech_lo == tech_hi) { + break; + } else { + curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi); + } } + if (tech == 180) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron + aspect_ratio[0][0] = 2.0; + wire_width = wire_pitch[0][0] / 2; //micron + wire_thickness = aspect_ratio[0][0] * wire_width;//micron + wire_spacing = wire_pitch[0][0] - wire_width;//micron + barrier_thickness = 0.017;//micron + dishing_thickness = 0;//micron + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron + ild_thickness[0][0] = 0.75;//micron + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.709; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap);//F/micron. - g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; - g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; - g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; - g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; - g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; - g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; - g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; - g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; - g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; - g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; - g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; - g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; - g_tp.peri_global.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; - g_tp.peri_global.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; - g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.4; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.75;//micron + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.709; + vert_dielectric_constant[0][1] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); - g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.2; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 1.5; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.709; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); - g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; - g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; - g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; - g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; - g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; - g_tp.vpp += curr_alpha * curr_vpp; - g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; - g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; - g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.017; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.75; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); - g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; - g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; - g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; - area_cell_dram += curr_alpha * curr_area_cell_dram; - asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; - - g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; - g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; - g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; - area_cell_sram += curr_alpha * curr_area_cell_sram; - asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; - - g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng - g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; - g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; - area_cell_cam += curr_alpha * curr_area_cell_cam; - asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; - - //Sense amplifier latch Gm calculation - mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; - Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - - //Empirical undifferetiated core/FU coefficient - g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; - g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; - g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; - g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; - g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; - } - - - //Currently we are not modeling the resistance/capacitance of poly anywhere. - //Continuous function (or date have been processed) does not need linear interpolation - g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process - g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process - g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; - g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; - g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; - g_tp.cell_h_def = 50 * g_ip->F_sz_um; - g_tp.w_poly_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; - g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; - - g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; - g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; - g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process - g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process - g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process - g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process - g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; - g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_; - - if (ram_cell_tech_type == comm_dram) - { - g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; - g_tp.h_dec = 8; // in the unit of memory cell height - } - else - { - g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; - g_tp.h_dec = 4; // in the unit of memory cell height - } - - g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; - g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; - g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; - - g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; - g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; - //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; - - g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; - - double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; - double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; - g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; - - g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); - g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; - g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); - g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; - g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng - g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; - - g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; - g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; - g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - - - double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; - - for (iter=0; iter<=1; ++iter) - { - // linear interpolation - if (iter == 0) - { - tech = tech_lo; - if (tech_lo == tech_hi) - { - curr_alpha = 1; - } - else - { - curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi); - } - } - else - { - tech = tech_hi; - if (tech_lo == tech_hi) - { - break; - } - else - { - curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi); - } - } - - if (tech == 180) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.0; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.017;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.75;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.75;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.2; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 1.5; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0]= 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.017; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.75; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.75; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.98; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.18; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); - wire_r_per_micron[1][3] = 12 / 0.18; - } - else if (tech == 90) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.4; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.01;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.48;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.48;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.7; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.96; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.008; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.48; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.48; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.1; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.09; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); - wire_r_per_micron[1][3] = 12 / 0.09; - } - else if (tech == 65) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 2.7; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.405; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.303; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.7; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.405; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.303; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.8; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.81; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.303; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.006; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.405; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.734; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.405; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.734; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.77; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.734; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.065; - wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); - wire_r_per_micron[1][3] = 12 / 0.065; - } - else if (tech == 45) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.315; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.958; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.315; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.958; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.63; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.958; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.004; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.315; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.46; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.315; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.46; - vert_dielectric_constant[1][1] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.55; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.46; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.045; - wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); - wire_r_per_micron[1][3] = 12 / 0.045; - } - else if (tech == 32) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.21; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.664; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.21; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.664; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.42; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.664; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.21; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.214; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - aspect_ratio[1][1] = 2.0; - wire_width = wire_pitch[1][1] / 2; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.21; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.214; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.385; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.214; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.032;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron - wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron - } - else if (tech == 22) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.15; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.414; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.15; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.414; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.3; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.414; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.15; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.104; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.15; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.104; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.75; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2627,184 +1924,636 @@ void init_tech_params(double technology, bool is_tag) wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.98; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.18; + wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); + wire_r_per_micron[1][3] = 12 / 0.18; + } else if (tech == 90) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron + aspect_ratio[0][0] = 2.4; + wire_width = wire_pitch[0][0] / 2; //micron + wire_thickness = aspect_ratio[0][0] * wire_width;//micron + wire_spacing = wire_pitch[0][0] - wire_width;//micron + barrier_thickness = 0.01;//micron + dishing_thickness = 0;//micron + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron + ild_thickness[0][0] = 0.48;//micron + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.709; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap);//F/micron. + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.4; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.48;//micron + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.709; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.7; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.96; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.709; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.008; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.48; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.48; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.1; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.09; + wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); + wire_r_per_micron[1][3] = 12 / 0.09; + } else if (tech == 65) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 2.7; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.405; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.303; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.7; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.405; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.303; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.8; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.81; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.303; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.006; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.405; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.734; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.405; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.734; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.77; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.734; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.065; + wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); + wire_r_per_micron[1][3] = 12 / 0.065; + } else if (tech == 45) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.315; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.958; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.315; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.958; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.63; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.958; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.004; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.315; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.46; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.315; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.46; + vert_dielectric_constant[1][1] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.55; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.46; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.045; + wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); + wire_r_per_micron[1][3] = 12 / 0.045; + } else if (tech == 32) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.21; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.664; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.21; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.664; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.42; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.664; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.003; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.21; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.214; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + aspect_ratio[1][1] = 2.0; + wire_width = wire_pitch[1][1] / 2; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.21; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.214; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.385; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.214; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.032;//micron + wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron + wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron + } else if (tech == 22) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.15; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.414; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.15; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.414; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.3; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.414; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.003; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.15; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.104; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.15; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.104; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.275; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.104; vert_dielectric_constant[1][2] = 3.9; wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); //Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.022;//micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); } - else if (tech == 16) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.108; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.202; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); + else if (tech == 16) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.108; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.202; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - aspect_ratio[0][1] = 3.0; - wire_width = wire_pitch[0][1] / 2; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.108; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.202; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); + wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global + aspect_ratio[0][1] = 3.0; + wire_width = wire_pitch[0][1] / 2; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.108; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.202; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.216; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.202; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); + wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.216; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.202; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.002; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.108; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 1.998; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.002; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.108; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 1.998; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.108; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 1.998; - vert_dielectric_constant[1][1] = 3.9; + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.108; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 1.998; + vert_dielectric_constant[1][1] = 3.9; wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2813,109 +2562,101 @@ void init_tech_params(double technology, bool is_tag) wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.198; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 1.998; vert_dielectric_constant[1][2] = 3.9; wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); //Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.016;//micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); } - g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; + g_tp.wire_local.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; - g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant [g_ip->ic_proj_type] + [g_ip->wire_is_mat_type]; - g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant [g_ip->ic_proj_type] + [g_ip->wire_os_mat_type]; - g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; + g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * + g_tp.wire_inside_mat.C_per_um / 2; - g_tp.sense_delay += curr_alpha *SENSE_AMP_D; - g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P; -// g_tp.horiz_dielectric_constant += horiz_dielectric_constant; -// g_tp.vert_dielectric_constant += vert_dielectric_constant; -// g_tp.aspect_ratio += aspect_ratio; -// g_tp.miller_value += miller_value; -// g_tp.ild_thickness += ild_thickness; + g_tp.sense_delay += curr_alpha * SENSE_AMP_D; + g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P; - } - g_tp.fringe_cap = fringe_cap; + } + g_tp.fringe_cap = fringe_cap; - double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); - double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); - double tf = rd * c_load; - g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); - double KLOAD = 1; - c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); - tf = rd * c_load; - g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); + double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); + double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); + double tf = rd * c_load; + g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); + double KLOAD = 1; + c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); + tf = rd * c_load; + g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); } diff --git a/ext/mcpat/cacti/uca.cc b/ext/mcpat/cacti/uca.cc index 568cd9e44..703ad470f 100755 --- a/ext/mcpat/cacti/uca.cc +++ b/ext/mcpat/cacti/uca.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -37,390 +38,390 @@ #include "uca.h" UCA::UCA(const DynamicParameter & dyn_p) - :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) -{ - int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); - int num_banks_hor_dir = nbanks/num_banks_ver_dir; + : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { + int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) + / 2 : (_log2(nbanks) - _log2(nbanks) / 2)); + int num_banks_hor_dir = nbanks / num_banks_ver_dir; - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - } + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + } - num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); - num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); - num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; - num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; + num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) * + (RWP + ERP + EWP); + num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); + num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); + num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; + num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; - if (!dp.fully_assoc && !dp.pure_cam) - { + if (!dp.fully_assoc && !dp.pure_cam) { - if (g_ip->fast_access && dp.is_tag == false) - { - num_do_b_bank *= g_ip->data_assoc; - } + if (g_ip->fast_access && dp.is_tag == false) { + num_do_b_bank *= g_ip->data_assoc; + } - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - } + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + } - else - { + else { - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - } + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, + num_banks_ver_dir * 2, num_banks_hor_dir * 2, + Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, + num_banks_ver_dir * 2, num_banks_hor_dir * 2, + Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, + true); + } - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; - area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; + area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; // cout<<"area cell"<delay + bank.htree_in_add->delay; - double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; - delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_1_predec->delay + - bank.mat.sa_mux_lev_1_dec->delay; - delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_2_predec->delay + - bank.mat.sa_mux_lev_2_dec->delay; - double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; + double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; + delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_1_predec->delay + + bank.mat.sa_mux_lev_1_dec->delay; + delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_2_predec->delay + + bank.mat.sa_mux_lev_2_dec->delay; + double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; - delay_before_subarray_output_driver = - MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path - delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path - MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path - delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path - delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + - bank.htree_out_data->delay + htree_out_data->delay; - access_time = bank.mat.delay_comparator; + delay_before_subarray_output_driver = + MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path + delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path + MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path + delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path + delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + + bank.htree_out_data->delay + htree_out_data->delay; + access_time = bank.mat.delay_comparator; - double ram_delay_inside_mat; - if (dp.fully_assoc) - { - //delay of FA contains both CAM tag and RAM data - { //delay of CAM - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; - access_time = htree_in_add->delay + bank.htree_in_add->delay; - //delay of fully-associative data array - access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; - } - } - else - { - access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path - } - - if (dp.is_main_mem) - { - double t_rcd = max_delay_before_row_decoder + delay_inside_mat; - double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + - delay_from_subarray_out_drv_to_out; - access_time = t_rcd + cas_latency; - } - - double temp; - - if (!dp.fully_assoc) - { - temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit - if (dp.is_dram) - { - temp += bank.mat.delay_writeback; // temp stores random cycle time + double ram_delay_inside_mat; + if (dp.fully_assoc) { + //delay of FA contains both CAM tag and RAM data + { //delay of CAM + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + access_time = htree_in_add->delay + bank.htree_in_add->delay; + //delay of fully-associative data array + access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; + } + } else { + access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path } + if (dp.is_main_mem) { + double t_rcd = max_delay_before_row_decoder + delay_inside_mat; + double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + + delay_from_subarray_out_drv_to_out; + access_time = t_rcd + cas_latency; + } - temp = MAX(temp, bank.mat.r_predec->delay); - temp = MAX(temp, bank.mat.b_mux_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); - } - else - { - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; - temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore - + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; + double temp; - temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); - } + if (!dp.fully_assoc) { + temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit + if (dp.is_dram) { + temp += bank.mat.delay_writeback; // temp stores random cycle time + } - // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav - if (g_ip->rpters_in_htree == false) - { - temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); - } - cycle_time = temp; - double delay_req_network = max_delay_before_row_decoder; - double delay_rep_network = delay_from_subarray_out_drv_to_out; - multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); + temp = MAX(temp, bank.mat.r_predec->delay); + temp = MAX(temp, bank.mat.b_mux_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } else { + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore + + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; - if (dp.is_main_mem) - { - multisubbank_interleave_cycle_time = htree_in_add->delay; - precharge_delay = htree_in_add->delay + - bank.htree_in_add->delay + bank.mat.delay_writeback + - bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; - cycle_time = access_time + precharge_delay; - } - else - { - precharge_delay = 0; - } + temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } - double dram_array_availability = 0; - if (dp.is_dram) - { - dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; - } + // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav + if (g_ip->rpters_in_htree == false) { + temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); + } + cycle_time = temp; - return outrisetime; + double delay_req_network = max_delay_before_row_decoder; + double delay_rep_network = delay_from_subarray_out_drv_to_out; + multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); + + if (dp.is_main_mem) { + multisubbank_interleave_cycle_time = htree_in_add->delay; + precharge_delay = htree_in_add->delay + + bank.htree_in_add->delay + bank.mat.delay_writeback + + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; + cycle_time = access_time + precharge_delay; + } else { + precharge_delay = 0; + } + + double dram_array_availability = 0; + if (dp.is_dram) { + dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; + } + + return outrisetime; } // note: currently, power numbers are for a bank of an array -void UCA::compute_power_energy() -{ - bank.compute_power_energy(); - power = bank.power; +void UCA::compute_power_energy() { + bank.compute_power_energy(); + power = bank.power; - power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; - power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; - if (dp.fully_assoc || dp.pure_cam) - power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; + power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; + power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; + if (dp.fully_assoc || dp.pure_cam) + power_routing_to_bank.searchOp.dynamic = + htree_in_search->power.searchOp.dynamic + + htree_out_search->power.searchOp.dynamic; - power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + - htree_in_data->power.readOp.leakage + - htree_out_data->power.readOp.leakage; + power_routing_to_bank.readOp.leakage += + htree_in_add->power.readOp.leakage + + htree_in_data->power.readOp.leakage + + htree_out_data->power.readOp.leakage; - power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + - htree_in_data->power.readOp.gate_leakage + - htree_out_data->power.readOp.gate_leakage; - if (dp.fully_assoc || dp.pure_cam) - { + power_routing_to_bank.readOp.gate_leakage += + htree_in_add->power.readOp.gate_leakage + + htree_in_data->power.readOp.gate_leakage + + htree_out_data->power.readOp.gate_leakage; + if (dp.fully_assoc || dp.pure_cam) { power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; - } + } - power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; - power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; - power.readOp.leakage += power_routing_to_bank.readOp.leakage; - power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; + power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; + power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; + power.readOp.leakage += power_routing_to_bank.readOp.leakage; + power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; - // calculate total write energy per access - power.writeOp.dynamic = power.readOp.dynamic - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - - power_routing_to_bank.readOp.dynamic - + power_routing_to_bank.writeOp.dynamic - + bank.htree_in_data->power.readOp.dynamic - - bank.htree_out_data->power.readOp.dynamic; + // calculate total write energy per access + power.writeOp.dynamic = power.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + - power_routing_to_bank.readOp.dynamic + + power_routing_to_bank.writeOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + - bank.htree_out_data->power.readOp.dynamic; - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; - } + if (dp.is_dram == false) { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } - dyn_read_energy_from_closed_page = power.readOp.dynamic; - dyn_read_energy_from_open_page = power.readOp.dynamic - - (bank.mat.r_predec->power.readOp.dynamic + - bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; + dyn_read_energy_from_closed_page = power.readOp.dynamic; + dyn_read_energy_from_open_page = power.readOp.dynamic - + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic + + bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; - dyn_read_energy_remaining_words_in_burst = - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * - ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - power_routing_to_bank.readOp.dynamic); - dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; - dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; + dyn_read_energy_remaining_words_in_burst = + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * + ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + power_routing_to_bank.readOp.dynamic); + dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; + dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; - activate_energy = htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + - (bank.mat.r_predec->power.readOp.dynamic + - bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; - read_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - htree_in_data->power.readOp.dynamic) * g_ip->burst_len; - write_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - htree_in_data->power.readOp.dynamic + - bank.htree_in_data->power.readOp.dynamic + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; - precharge_energy = (bank.mat.power_bitline.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; + activate_energy = htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; + read_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + htree_in_data->power.readOp.dynamic) * g_ip->burst_len; + write_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + htree_in_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; + precharge_energy = (bank.mat.power_bitline.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; - leak_power_subbank_closed_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + leak_power_subbank_closed_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; - leak_power_subbank_closed_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ - //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + leak_power_subbank_closed_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ + //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; - leak_power_subbank_open_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + leak_power_subbank_open_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; - leak_power_subbank_open_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; - //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + leak_power_subbank_open_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; + //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; - leak_power_request_and_reply_networks = - power_routing_to_bank.readOp.leakage + - bank.htree_in_add->power.readOp.leakage + - bank.htree_in_data->power.readOp.leakage + - bank.htree_out_data->power.readOp.leakage; + leak_power_request_and_reply_networks = + power_routing_to_bank.readOp.leakage + + bank.htree_in_add->power.readOp.leakage + + bank.htree_in_data->power.readOp.leakage + + bank.htree_out_data->power.readOp.leakage; - leak_power_request_and_reply_networks += - power_routing_to_bank.readOp.gate_leakage + - bank.htree_in_add->power.readOp.gate_leakage + - bank.htree_in_data->power.readOp.gate_leakage + - bank.htree_out_data->power.readOp.gate_leakage; + leak_power_request_and_reply_networks += + power_routing_to_bank.readOp.gate_leakage + + bank.htree_in_add->power.readOp.gate_leakage + + bank.htree_in_data->power.readOp.gate_leakage + + bank.htree_out_data->power.readOp.gate_leakage; - if (dp.fully_assoc || dp.pure_cam) - { + if (dp.fully_assoc || dp.pure_cam) { leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; - } - - - if (dp.is_dram) - { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power - refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + - bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; - refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; - refresh_power /= dp.dram_refresh_period; - } - - - if (dp.is_tag == false) - { - power.readOp.dynamic = dyn_read_energy_from_closed_page; - power.writeOp.dynamic = dyn_read_energy_from_closed_page - - dyn_read_energy_remaining_words_in_burst - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - + (power_routing_to_bank.writeOp.dynamic - - power_routing_to_bank.readOp.dynamic - - bank.htree_out_data->power.readOp.dynamic + - bank.htree_in_data->power.readOp.dynamic) * - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME - - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; } - } - // if DRAM, add refresh power to total leakage - if (dp.is_dram) - { - power.readOp.leakage += refresh_power; - } - // TODO: below should be avoided. - /*if (dp.is_main_mem) - { - power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; - }*/ + // if DRAM, add contribution of power spent in row predecoder drivers, + // blocks and decoders to refresh power + if (dp.is_dram) { + refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power /= dp.dram_refresh_period; + } - assert(power.readOp.dynamic > 0); - assert(power.writeOp.dynamic > 0); - assert(power.readOp.leakage > 0); + + if (dp.is_tag == false) { + power.readOp.dynamic = dyn_read_energy_from_closed_page; + power.writeOp.dynamic = dyn_read_energy_from_closed_page + - dyn_read_energy_remaining_words_in_burst + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + + (power_routing_to_bank.writeOp.dynamic - + power_routing_to_bank.readOp.dynamic - + bank.htree_out_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic) * + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME + + if (dp.is_dram == false) { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } + } + + // if DRAM, add refresh power to total leakage + if (dp.is_dram) { + power.readOp.leakage += refresh_power; + } + + // TODO: below should be avoided. + /*if (dp.is_main_mem) + { + power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; + }*/ + + assert(power.readOp.dynamic > 0); + assert(power.writeOp.dynamic > 0); + assert(power.readOp.leakage > 0); } diff --git a/ext/mcpat/cacti/uca.h b/ext/mcpat/cacti/uca.h index fdab14fc7..402035f9a 100755 --- a/ext/mcpat/cacti/uca.h +++ b/ext/mcpat/cacti/uca.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -40,9 +41,8 @@ #include "htree2.h" #include "parameter.h" -class UCA : public Component -{ - public: +class UCA : public Component { +public: UCA(const DynamicParameter & dyn_p); ~UCA(); double compute_delays(double inrisetime); // returns outrisetime @@ -66,7 +66,10 @@ class UCA : public Component int num_do_b_bank; int num_si_b_bank; int num_so_b_bank; - int RWP, ERP, EWP,SCHP; + int RWP; + int ERP; + int EWP; + int SCHP; double area_all_dataramcells; double dyn_read_energy_from_closed_page; diff --git a/ext/mcpat/cacti/wire.cc b/ext/mcpat/cacti/wire.cc index 742000c85..b7d9e34ce 100644 --- a/ext/mcpat/cacti/wire.cc +++ b/ext/mcpat/cacti/wire.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -41,173 +42,173 @@ Wire::Wire( enum Wire_placement wp, double resistivity, TechnologyParameter::DeviceType *dt - ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s), - resistivity(resistivity), deviceType(dt) -{ - wire_placement = wp; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; - if (initialized != 1) { - cout << "Wire not initialized. Initializing it with default values\n"; - Wire winit; - } - calculate_wire_stats(); - // change everything back to seconds, microns, and Joules - repeater_spacing *= 1e6; - wire_length *= 1e6; - wire_width *= 1e6; - wire_spacing *= 1e6; - assert(wire_length > 0); - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); + ): wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), + s_scale(s_s), + resistivity(resistivity), deviceType(dt) { + wire_placement = wp; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; + if (initialized != 1) { + cout << "Wire not initialized. Initializing it with default values\n"; + Wire winit; + } + calculate_wire_stats(); + // change everything back to seconds, microns, and Joules + repeater_spacing *= 1e6; + wire_length *= 1e6; + wire_width *= 1e6; + wire_spacing *= 1e6; + assert(wire_length > 0); + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); } - // the following values are for peripheral global technology - // specified in the input config file - Component Wire::global; - Component Wire::global_5; - Component Wire::global_10; - Component Wire::global_20; - Component Wire::global_30; - Component Wire::low_swing; +// the following values are for peripheral global technology +// specified in the input config file +Component Wire::global; +Component Wire::global_5; +Component Wire::global_10; +Component Wire::global_20; +Component Wire::global_30; +Component Wire::low_swing; - int Wire::initialized; - double Wire::wire_width_init; - double Wire::wire_spacing_init; +int Wire::initialized; +double Wire::wire_width_init; +double Wire::wire_spacing_init; -Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt) -{ - w_scale = w_s; - s_scale = s_s; - deviceType = dt; - wire_placement = wp; - resistivity = resis; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; +Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, + TechnologyParameter::DeviceType *dt) { + w_scale = w_s; + s_scale = s_s; + deviceType = dt; + wire_placement = wp; + resistivity = resis; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; - switch (wire_placement) - { - case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break; - case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break; - default: wire_width = g_tp.wire_local.pitch; break; - } + switch (wire_placement) { + case outside_mat: + wire_width = g_tp.wire_outside_mat.pitch; + break; + case inside_mat : + wire_width = g_tp.wire_inside_mat.pitch; + break; + default: + wire_width = g_tp.wire_local.pitch; + break; + } - wire_spacing = wire_width; + wire_spacing = wire_width; - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; - initialized = 1; - init_wire(); - wire_width_init = wire_width; - wire_spacing_init = wire_spacing; + initialized = 1; + init_wire(); + wire_width_init = wire_width; + wire_spacing_init = wire_spacing; - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); } -Wire::~Wire() -{ +Wire::~Wire() { } void -Wire::calculate_wire_stats() -{ +Wire::calculate_wire_stats() { - if (wire_placement == outside_mat) { - wire_width = g_tp.wire_outside_mat.pitch; - } - else if (wire_placement == inside_mat) { - wire_width = g_tp.wire_inside_mat.pitch; - } - else { - wire_width = g_tp.wire_local.pitch; - } + if (wire_placement == outside_mat) { + wire_width = g_tp.wire_outside_mat.pitch; + } else if (wire_placement == inside_mat) { + wire_width = g_tp.wire_inside_mat.pitch; + } else { + wire_width = g_tp.wire_local.pitch; + } - wire_spacing = wire_width; + wire_spacing = wire_width; - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; - if (wt != Low_swing) { + if (wt != Low_swing) { - // delay_optimal_wire(); + // delay_optimal_wire(); - if (wt == Global) { - delay = global.delay * wire_length; - power.readOp.dynamic = global.power.readOp.dynamic * wire_length; - power.readOp.leakage = global.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; - repeater_spacing = global.area.w; - repeater_size = global.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_5) { - delay = global_5.delay * wire_length; - power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_5.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_5.area.w; - repeater_size = global_5.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_10) { - delay = global_10.delay * wire_length; - power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_10.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_10.area.w; - repeater_size = global_10.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_20) { - delay = global_20.delay * wire_length; - power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_20.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_20.area.w; - repeater_size = global_20.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_30) { - delay = global_30.delay * wire_length; - power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_30.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_30.area.w; - repeater_size = global_30.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - out_rise_time = delay*repeater_spacing/deviceType->Vth; - } - else if (wt == Low_swing) { - low_swing_model (); - repeater_spacing = wire_length; - repeater_size = 1; - } - else { - assert(0); - } + if (wt == Global) { + delay = global.delay * wire_length; + power.readOp.dynamic = global.power.readOp.dynamic * wire_length; + power.readOp.leakage = global.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; + repeater_spacing = global.area.w; + repeater_size = global.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_5) { + delay = global_5.delay * wire_length; + power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_5.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_5.area.w; + repeater_size = global_5.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_10) { + delay = global_10.delay * wire_length; + power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_10.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_10.area.w; + repeater_size = global_10.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_20) { + delay = global_20.delay * wire_length; + power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_20.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_20.area.w; + repeater_size = global_20.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_30) { + delay = global_30.delay * wire_length; + power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_30.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_30.area.w; + repeater_size = global_30.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } + out_rise_time = delay * repeater_spacing / deviceType->Vth; + } else if (wt == Low_swing) { + low_swing_model (); + repeater_spacing = wire_length; + repeater_size = 1; + } else { + assert(0); + } } @@ -218,51 +219,55 @@ Wire::calculate_wire_stats() * inverters connected in series (refer: CACTI 1 Technical report, * section 6.1.3) */ - double -Wire::signal_fall_time () -{ +double +Wire::signal_fall_time () { - /* rise time of inverter 1's output */ - double rt; - /* fall time of inverter 2's output */ - double ft; - double timeconst; + /* rise time of inverter 1's output */ + double rt; + /* fall time of inverter 2's output */ + double ft; + double timeconst; - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; - return ft; + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth; + return ft; } -double Wire::signal_rise_time () -{ +double Wire::signal_rise_time () { - /* rise time of inverter 1's output */ - double ft; - /* fall time of inverter 2's output */ - double rt; - double timeconst; + /* rise time of inverter 1's output */ + double ft; + /* fall time of inverter 2's output */ + double rt; + double timeconst; - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); - return ft; //sec + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth; + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); + return ft; //sec } @@ -281,111 +286,110 @@ double Wire::signal_rise_time () * */ -double Wire::wire_cap (double len /* in m */, bool call_from_outside) -{ - //TODO: this should be consistent with the wire_res in technology file - double sidewall, adj, tot_cap; - double wire_height; - double epsilon0 = 8.8542e-12; - double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness; +double Wire::wire_cap (double len /* in m */, bool call_from_outside) { + //TODO: this should be consistent with the wire_res in technology file + double sidewall, adj, tot_cap; + double wire_height; + double epsilon0 = 8.8542e-12; + double aspect_ratio; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double miller_value; + double ild_thickness; - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_outside_mat.miller_value; - ild_thickness = g_tp.wire_outside_mat.ild_thickness; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_inside_mat.miller_value; - ild_thickness = g_tp.wire_inside_mat.ild_thickness; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; - miller_value = g_tp.wire_local.miller_value; - ild_thickness = g_tp.wire_local.ild_thickness; - break; - } - } + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_outside_mat.miller_value; + ild_thickness = g_tp.wire_outside_mat.ild_thickness; + break; + } + case inside_mat : { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_inside_mat.miller_value; + ild_thickness = g_tp.wire_inside_mat.ild_thickness; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; + miller_value = g_tp.wire_local.miller_value; + ild_thickness = g_tp.wire_local.ild_thickness; + break; + } + } - if (call_from_outside) - { - wire_width *= 1e-6; - wire_spacing *= 1e-6; - } - wire_height = wire_width/w_scale*aspect_ratio; - /* - * assuming height does not change. wire_width = width_original*w_scale - * So wire_height does not change as wire width increases - */ + if (call_from_outside) { + wire_width *= 1e-6; + wire_spacing *= 1e-6; + } + wire_height = wire_width / w_scale * aspect_ratio; + /* + * assuming height does not change. wire_width = width_original*w_scale + * So wire_height does not change as wire width increases + */ // capacitance between wires in the same level // sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) // * epsilon0; - sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) - * epsilon0; + sidewall = miller_value * horiz_dielectric_constant * + (wire_height / wire_spacing) + * epsilon0; - // capacitance between wires in adjacent levels - //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; - //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; + // capacitance between wires in adjacent levels + //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; + //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; - adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; - //Change ild_thickness from micron to M + adj = miller_value * vert_dielectric_constant * wire_width / + (ild_thickness * 1e-6) * epsilon0; + //Change ild_thickness from micron to M - //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m - tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m + //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m + tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m - if (call_from_outside) - { - wire_width *= 1e6; - wire_spacing *= 1e6; - } - return (tot_cap*len); // (F) + if (call_from_outside) { + wire_width *= 1e6; + wire_spacing *= 1e6; + } + return (tot_cap*len); // (F) } - double -Wire::wire_res (double len /*(in m)*/) -{ +double +Wire::wire_res (double len /*(in m)*/) { - double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0; - //TODO: this should be consistent with the wire_res in technology file - //The whole computation should be consistent with the wire_res in technology.cc too! + double aspect_ratio; + double alpha_scatter = 1.05; + double dishing_thickness = 0; + double barrier_thickness = 0; + //TODO: this should be consistent with the wire_res in technology file + //The whole computation should be consistent with the wire_res in technology.cc too! - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - break; - } - } - return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)* - (wire_width-2*barrier_thickness))); + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + break; + } + case inside_mat : { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + break; + } + } + return (alpha_scatter * resistivity * 1e-6 * len / + ((aspect_ratio*wire_width / w_scale - dishing_thickness - + barrier_thickness)* + (wire_width - 2*barrier_thickness))); } /* @@ -395,438 +399,456 @@ Wire::wire_res (double len /*(in m)*/) * low swing nmos delay, and the wire delay * (ref: Technical report 6) */ - void -Wire::low_swing_model() -{ - double len = wire_length; - double beta = pmos_to_nmos_sz_ratio(); +void +Wire::low_swing_model() { + double len = wire_length; + double beta = pmos_to_nmos_sz_ratio(); - double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; + double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; - /* Final nmos low swing driver size calculation: - * Try to size the driver such that the delay - * is less than 8FO4. - * If the driver size is greater than - * the max allowable size, assume max size for the driver. - * In either case, recalculate the delay using - * the final driver size assuming slow input with - * finite rise time instead of ideal step input - * - * (ref: Technical report 6) - */ - double cwire = wire_cap(len); /* load capacitance */ - double rwire = wire_res(len); + /* Final nmos low swing driver size calculation: + * Try to size the driver such that the delay + * is less than 8FO4. + * If the driver size is greater than + * the max allowable size, assume max size for the driver. + * In either case, recalculate the delay using + * the final driver size assuming slow input with + * finite rise time instead of ideal step input + * + * (ref: Technical report 6) + */ + double cwire = wire_cap(len); /* load capacitance */ + double rwire = wire_res(len); #define RES_ADJ (8.6) // Increase in resistance due to low driving vol. - double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ; - double nsize = R_to_w(driver_res, NCH); + double driver_res = (-8 * g_tp.FO4 / (log(0.5) * cwire)) / RES_ADJ; + double nsize = R_to_w(driver_res, NCH); - nsize = MIN(nsize, g_tp.max_w_nmos_); - nsize = MAX(nsize, g_tp.min_w_nmos_); + nsize = MIN(nsize, g_tp.max_w_nmos_); + nsize = MAX(nsize, g_tp.min_w_nmos_); - if(rwire*cwire > 8*g_tp.FO4) - { - nsize = g_tp.max_w_nmos_; - } + if (rwire*cwire > 8*g_tp.FO4) { + nsize = g_tp.max_w_nmos_; + } - // size the inverter appropriately to minimize the transmitter delay - // Note - In order to minimize leakage, we are not adding a set of inverters to - // bring down delay. Instead, we are sizing the single gate - // based on the logical effort. - double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0) - + gate_C(2*min_w_pmos, 0))); - double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff; - double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)); - inv_size = MAX(inv_size, 1); + // size the inverter appropriately to minimize the transmitter delay + // Note - In order to minimize leakage, we are not adding a set of inverters to + // bring down delay. Instead, we are sizing the single gate + // based on the logical effort. + double st_eff = sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) / + (gate_C(2 * g_tp.min_w_nmos_, 0) + + gate_C(2 * min_w_pmos, 0))); + double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff; + double inv_size = req_cin / (gate_C(min_w_pmos, 0) + + gate_C(g_tp.min_w_nmos_, 0)); + inv_size = MAX(inv_size, 1); - /* nand gate delay */ - double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); - double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(inv_size*g_tp.min_w_nmos_, 0) + - gate_C(inv_size*min_w_pmos, 0); + /* nand gate delay */ + double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); + double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(inv_size * g_tp.min_w_nmos_, 0) + + gate_C(inv_size * min_w_pmos, 0); - double timeconst = res_eq * cap_eq; + double timeconst = res_eq * cap_eq; - delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, RISE); - double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd; + delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); + double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd; - inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ + inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ - /* Inverter delay: - * The load capacitance of this inv depends on - * the gate capacitance of the final stage nmos - * transistor which in turn depends on nsize - */ - res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1); - cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(nsize, 0); - timeconst = res_eq * cap_eq; + /* Inverter delay: + * The load capacitance of this inv depends on + * the gate capacitance of the final stage nmos + * transistor which in turn depends on nsize + */ + res_eq = tr_R_on(inv_size * min_w_pmos, PCH, 1); + cap_eq = drain_C_(inv_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(nsize, 0); + timeconst = res_eq * cap_eq; - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, FALL); - temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd; + delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL); + temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd; - transmitter.delay = delay; - transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/ - transmitter.power.readOp.leakage = deviceType->Vdd * - (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + transmitter.delay = delay; + /* since it is a diff. model*/ + transmitter.power.readOp.dynamic = temp_power * 2; + transmitter.power.readOp.leakage = deviceType->Vdd * + (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); - transmitter.power.readOp.gate_leakage = deviceType->Vdd * - (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + transmitter.power.readOp.gate_leakage = deviceType->Vdd * + (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); - inputrise = delay / deviceType->Vth; + inputrise = delay / deviceType->Vth; - /* nmos delay + wire delay */ - cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 + - nsense * sense_amp_input_cap(); //+receiver cap - /* - * NOTE: nmos is used as both pull up and pull down transistor - * in the transmitter. This is because for low voltage swing, drive - * resistance of nmos is less than pmos - * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) - */ - timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire + - drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) + - rwire*cwire/2 + - (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) * - nsense * sense_amp_input_cap(); + /* nmos delay + wire delay */ + cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 + + nsense * sense_amp_input_cap(); //+receiver cap + /* + * NOTE: nmos is used as both pull up and pull down transistor + * in the transmitter. This is because for low voltage swing, drive + * resistance of nmos is less than pmos + * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) + */ + timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire + + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) + + rwire * cwire / 2 + + (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) * + nsense * sense_amp_input_cap(); - /* - * since we are pre-equalizing and overdriving the low - * swing wires, the net time constant is less - * than the actual value - */ - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0); + /* + * since we are pre-equalizing and overdriving the low + * swing wires, the net time constant is less + * than the actual value + */ + delay += horowitz(inputrise, timeconst, deviceType->Vth / + deviceType->Vdd, .25, 0); #define VOL_SWING .1 - temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */ - temp_power *= 2; /* differential wire */ + temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */ + temp_power *= 2; /* differential wire */ - l_wire.delay = delay - transmitter.delay; - l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; - l_wire.power.readOp.leakage = deviceType->Vdd* - (4* cmos_Isub_leakage(nsize, 0, 1, nmos)); + l_wire.delay = delay - transmitter.delay; + l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; + l_wire.power.readOp.leakage = deviceType->Vdd * + (4 * cmos_Isub_leakage(nsize, 0, 1, nmos)); - l_wire.power.readOp.gate_leakage = deviceType->Vdd* - (4* cmos_Ig_leakage(nsize, 0, 1, nmos)); + l_wire.power.readOp.gate_leakage = deviceType->Vdd * + (4 * cmos_Ig_leakage(nsize, 0, 1, nmos)); - //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; + //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; - delay += g_tp.sense_delay; + delay += g_tp.sense_delay; - sense_amp.delay = g_tp.sense_delay; - out_rise_time = g_tp.sense_delay/(deviceType->Vth); - sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; - sense_amp.power.readOp.leakage = 0; //FIXME - sense_amp.power.readOp.gate_leakage = 0; + sense_amp.delay = g_tp.sense_delay; + out_rise_time = g_tp.sense_delay / (deviceType->Vth); + sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; + sense_amp.power.readOp.leakage = 0; //FIXME + sense_amp.power.readOp.gate_leakage = 0; - power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; - power.readOp.leakage = transmitter.power.readOp.leakage + - l_wire.power.readOp.leakage + - sense_amp.power.readOp.leakage; - power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + - l_wire.power.readOp.gate_leakage + - sense_amp.power.readOp.gate_leakage; + power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; + power.readOp.leakage = transmitter.power.readOp.leakage + + l_wire.power.readOp.leakage + + sense_amp.power.readOp.leakage; + power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + + l_wire.power.readOp.gate_leakage + + sense_amp.power.readOp.gate_leakage; } - double -Wire::sense_amp_input_cap() -{ - return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + - drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); +double +Wire::sense_amp_input_cap() { + return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + + drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); } -void Wire::delay_optimal_wire () -{ - double len = wire_length; - //double min_wire_width = wire_width; //m - double beta = pmos_to_nmos_sz_ratio(); - double switching = 0; // switching energy - double short_ckt = 0; // short-circuit energy - double tc = 0; // time constant - // input cap of min sized driver - double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); +void Wire::delay_optimal_wire () { + double len = wire_length; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + double switching = 0; // switching energy + double short_ckt = 0; // short-circuit energy + double tc = 0; // time constant + // input cap of min sized driver + double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); - // output parasitic capacitance of - // the min. sized driver - double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); - // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); //ohm - // wire cap /m - double wc = wire_cap(len); + // wire cap /m + double wc = wire_cap(len); - // size the repeater such that the delay of the wire is minimum - double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel + // size the repeater such that the delay of the wire is minimum + // len will cancel + double repeater_scaling = sqrt(out_res * wc / (wr * input_cap)); - // calc the optimum spacing between the repeaters (m) + // calc the optimum spacing between the repeaters (m) - repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/ - ((wr/len)*(wc/len))); - repeater_size = repeater_scaling; + repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap) / + ((wr / len) * (wc / len))); + repeater_size = repeater_scaling; - switching = (repeater_scaling * (input_cap + out_cap) + - repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + switching = (repeater_scaling * (input_cap + out_cap) + + repeater_spacing * (wc / len)) * deviceType->Vdd * + deviceType->Vdd; - tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing/repeater_scaling + - wr/len * repeater_spacing * input_cap * repeater_scaling + - 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + tc = out_res * (input_cap + out_cap) + + out_res * wc / len * repeater_spacing / repeater_scaling + + wr / len * repeater_spacing * input_cap * repeater_scaling + + 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing; - delay = 0.693 * tc * len/repeater_spacing; + delay = 0.693 * tc * len / repeater_spacing; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ - short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_scaling * tc; + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_scaling * tc; - area.set_area((len/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, - g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)); - power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); - power.readOp.leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); - power.readOp.gate_leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); + area.set_area((len / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, + g_tp.min_w_nmos_ * repeater_scaling, + g_tp.cell_h_def)); + power.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt)); + power.readOp.leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * + repeater_scaling, beta * + g_tp.min_w_nmos_ * + repeater_scaling, 1, inv)); + power.readOp.gate_leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * + repeater_scaling, beta * + g_tp.min_w_nmos_ * + repeater_scaling, 1, inv)); } // calculate power/delay values for wires with suboptimal repeater sizing/spacing void -Wire::init_wire(){ - wire_length = 1; - delay_optimal_wire(); +Wire::init_wire() { + wire_length = 1; + delay_optimal_wire(); double sp, si; - powerDef pow; - si = repeater_size; - sp = repeater_spacing; - sp *= 1e6; // in microns + powerDef pow; + si = repeater_size; + sp = repeater_spacing; + sp *= 1e6; // in microns - double i, j, del; - repeated_wire.push_back(Component()); - for (j=sp; j < 4*sp; j+=100) { - for (i = si; i > 1; i--) { - pow = wire_model(j*1e-6, i, &del); - if (j == sp && i == si) { - global.delay = del; - global.power = pow; - global.area.h = si; - global.area.w = sp*1e-6; // m - } + double i, j, del; + repeated_wire.push_back(Component()); + for (j = sp; j < 4*sp; j += 100) { + for (i = si; i > 1; i--) { + pow = wire_model(j * 1e-6, i, &del); + if (j == sp && i == si) { + global.delay = del; + global.power = pow; + global.area.h = si; + global.area.w = sp * 1e-6; // m + } // cout << "Repeater size - "<< i << // " Repeater spacing - " << j << // " Delay - " << del << // " PowerD - " << pow.readOp.dynamic << // " PowerL - " << pow.readOp.leakage <delay; - low_swing.power = l_wire->power; - delete l_wire; -} - - - -void Wire::update_fullswing() -{ - - list::iterator citer; - double del[4]; - del[3] = this->global.delay + this->global.delay*.3; - del[2] = global.delay + global.delay*.2; - del[1] = global.delay + global.delay*.1; - del[0] = global.delay + global.delay*.05; - double threshold; - double ncost; - double cost; - int i = 4; - while (i>0) { - threshold = del[i-1]; - cost = BIGNUM; - for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++) - { - if (citer->delay > threshold) { - citer = repeated_wire.erase(citer); - citer --; - } - else { - ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic + - citer->power.readOp.leakage/global.power.readOp.leakage; - if(ncost < cost) - { - cost = ncost; - if (i == 4) { - global_30.delay = citer->delay; - global_30.power = citer->power; - global_30.area = citer->area; - } - else if (i==3) { - global_20.delay = citer->delay; - global_20.power = citer->power; - global_20.area = citer->area; - } - else if(i==2) { - global_10.delay = citer->delay; - global_10.power = citer->power; - global_10.area = citer->area; - } - else if(i==1) { - global_5.delay = citer->delay; - global_5.power = citer->power; - global_5.area = citer->area; - } } - } } - i--; - } + repeated_wire.pop_back(); + update_fullswing(); + Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1); + low_swing.delay = l_wire->delay; + low_swing.power = l_wire->power; + delete l_wire; } -powerDef Wire::wire_model (double space, double size, double *delay) -{ - powerDef ptemp; - double len = 1; - //double min_wire_width = wire_width; //m - double beta = pmos_to_nmos_sz_ratio(); - // switching energy - double switching = 0; - // short-circuit energy - double short_ckt = 0; - // time constant - double tc = 0; - // input cap of min sized driver - double input_cap = gate_C (g_tp.min_w_nmos_ + - min_w_pmos, 0); +void Wire::update_fullswing() { - // output parasitic capacitance of - // the min. sized driver - double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); - // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm + list::iterator citer; + double del[4]; + del[3] = this->global.delay + this->global.delay * .3; + del[2] = global.delay + global.delay * .2; + del[1] = global.delay + global.delay * .1; + del[0] = global.delay + global.delay * .05; + double threshold; + double ncost; + double cost; + int i = 4; + while (i > 0) { + threshold = del[i-1]; + cost = BIGNUM; + for (citer = repeated_wire.begin(); citer != repeated_wire.end(); + citer++) { + if (citer->delay > threshold) { + citer = repeated_wire.erase(citer); + citer --; + } else { + ncost = citer->power.readOp.dynamic / + global.power.readOp.dynamic + + citer->power.readOp.leakage / global.power.readOp.leakage; + if (ncost < cost) { + cost = ncost; + if (i == 4) { + global_30.delay = citer->delay; + global_30.power = citer->power; + global_30.area = citer->area; + } else if (i == 3) { + global_20.delay = citer->delay; + global_20.power = citer->power; + global_20.area = citer->area; + } else if (i == 2) { + global_10.delay = citer->delay; + global_10.power = citer->power; + global_10.area = citer->area; + } else if (i == 1) { + global_5.delay = citer->delay; + global_5.power = citer->power; + global_5.area = citer->area; + } + } + } + } + i--; + } +} - // wire cap /m - double wc = wire_cap(len); - repeater_spacing = space; - repeater_size = size; - switching = (repeater_size * (input_cap + out_cap) + - repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; +powerDef Wire::wire_model (double space, double size, double *delay) { + powerDef ptemp; + double len = 1; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + // switching energy + double switching = 0; + // short-circuit energy + double short_ckt = 0; + // time constant + double tc = 0; + // input cap of min sized driver + double input_cap = gate_C (g_tp.min_w_nmos_ + + min_w_pmos, 0); - tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing/repeater_size + - wr/len * repeater_spacing * out_cap * repeater_size + - 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); //ohm - *delay = 0.693 * tc * len/repeater_spacing; + // wire cap /m + double wc = wire_cap(len); + + repeater_spacing = space; + repeater_size = size; + + switching = (repeater_size * (input_cap + out_cap) + + repeater_spacing * (wc / len)) * deviceType->Vdd * + deviceType->Vdd; + + tc = out_res * (input_cap + out_cap) + + out_res * wc / len * repeater_spacing / repeater_size + + wr / len * repeater_spacing * out_cap * repeater_size + + 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing; + + *delay = 0.693 * tc * len / repeater_spacing; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ - short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_size * tc; + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_size * tc; - ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); - ptemp.readOp.leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt)); + ptemp.readOp.leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * + repeater_size, beta * + g_tp.min_w_nmos_ * + repeater_size, 1, inv)); - ptemp.readOp.gate_leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); + ptemp.readOp.gate_leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * + repeater_size, beta * + g_tp.min_w_nmos_ * + repeater_size, 1, inv)); - return ptemp; + return ptemp; } void -Wire::print_wire() -{ +Wire::print_wire() { - cout << "\nWire Properties:\n\n"; - cout << " Delay Optimal\n\tRepeater size - "<< global.area.h << - " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)" - " \n\tDelay - " << global.delay*1e6 << " (ns/mm)" - " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)" - " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)" - " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n"; - cout << "\tWire width - " < + +#include "xmlParser.h" + +// Macro definitions to do string comparson to specific parameter/stat. +// Note: These macros assume node_name and value variables of type XMLCSTR +// to exist already. +#define STRCMP(var, str) else if (strcmp(var, str) == 0) + +#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \ +lhs = atoi(value) + +#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \ +lhs = atof(value) + +#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \ +lhs = string(value) + +#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \ +lhs = (etype)atoi(value) + + +// Constants shared across many system components +#define BITS_PER_BYTE 8.0 +#define MIN_BUFFER_SIZE 64 +// CAM structures do not have any associativity +#define CAM_ASSOC 0 + +#endif // __COMMON_H__ diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc index ba9106061..b25c23cac 100644 --- a/ext/mcpat/core.cc +++ b/ext/mcpat/core.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,491 +34,570 @@ #include #include #include +#include #include -#include "XML_Parse.h" #include "basic_circuit.h" +#include "basic_components.h" +#include "common.h" #include "const.h" #include "core.h" #include "io.h" #include "parameter.h" -//#include "globalvar.h" -InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IB (0), - BTB (0), - ID_inst (0), - ID_operand (0), - ID_misc (0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false, is_default = true; +int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; - //Assuming all L1 caches are virtually idxed physically tagged. - //cache +// The five bits are: busy, Issued, Finished, speculative, valid +int SchedulerU::ROB_STATUS_BITS = 5; - size = (int)XML->sys.core[ithCore].icache.icache_config[0]; - line = (int)XML->sys.core[ithCore].icache.icache_config[1]; - assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; - banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // interface_ip.obj_func_dyn_energy = 0; - // interface_ip.obj_func_dyn_power = 0; - // interface_ip.obj_func_leak_power = 0; - // interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty); - scktRatio = g_tp.sckt_co_eff; - chip_PR_overhead = g_tp.chip_layout_overhead; - macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area); - area.set_area(area.get_area()+ icache.caches->local_result.area); - //output_data_csv(icache.caches.local_result); +InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), + BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int idx, tag, data, size, line, assoc, banks; + bool is_default = true; + clockRate = core_params.clockRate; + name = "Instruction Fetch Unit"; + // Check if there is an icache child: + int i; + icache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); - /* - *iCache controllers - *miss buffer Each MSHR contains enough state - *to handle one or more accesses of any type to a single memory line. - *Due to the generality of the MSHR mechanism, - *the amount of state involved is non-trivial: - *including the address, pointers to the cache entry and destination register, - *written data, and various other pieces of state. - */ - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area); - area.set_area(area.get_area()+ icache.missb->local_result.area); - //output_data_csv(icache.missb.local_result); + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area); - area.set_area(area.get_area()+ icache.ifb->local_result.area); - //output_data_csv(icache.ifb.local_result); + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Instruction Cache") == 0 || + strcmp(name, "icache") == 0) { + icache = new CacheUnit(childXML, &interface_ip); + children.push_back(icache); + } + } + } - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area); - area.set_area(area.get_area()+ icache.prefetchb->local_result.area); - //output_data_csv(icache.prefetchb.local_result); + set_params_stats(); - //Instruction buffer - data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - interface_ip.pure_cam = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64? - XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions. - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - IB->area.set_area(IB->area.get_area()+ IB->local_result.area); - area.set_area(area.get_area()+ IB->local_result.area); - //output_data_csv(IB.IB.local_result); + //Instruction buffer + data = core_params.instruction_length * core_params.peak_issueW; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.num_hthreads * core_params.instruction_buffer_size * + line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } - // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; - // inst_decoder.init_decoder(is_default, &interface_ip); - // inst_decoder.full_decoder_power(); + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.instruction_buffer_assoc; + interface_ip.nbanks = core_params.instruction_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; + interface_ip.tag_w = core_params.instruction_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; - if (coredynp.predictionW>0) - { - /* - * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged - * It is only a cache without all the buffers in the cache controller since it is more like a - * look up table than a cache with cache controller. When access miss, no load from other places - * such as main memory (not actively fill the misses), it is passively updated under two circumstances: - * 1) when BPT@ID stage finds out current is a taken branch while BTB missed - * 2) When BPT@ID stage predicts differently than BTB - * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) - * 4) when EXEU find out wrong target has been provided from BTB. - * - */ - size = XML->sys.core[ithCore].BTB.BTB_config[0]; - line = XML->sys.core[ithCore].BTB.BTB_config[1]; - assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; - banks = XML->sys.core[ithCore].BTB.BTB_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); -// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:size; - interface_ip.line_sz = debug?64:line; - interface_ip.assoc = debug?8:assoc; - interface_ip.nbanks = debug?1:banks; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area); - area.set_area(area.get_area()+ BTB->local_result.area); - ///cout<<"area="<area.set_area(IB->area.get_area() + IB->local_result.area); + area.set_area(area.get_area() + IB->local_result.area); - BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp); - area.set_area(area.get_area()+ BPT->area.get_area()); - } - - ID_inst = new inst_decoder(is_default, &interface_ip, - coredynp.opcode_length, 1/*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_operand = new inst_decoder(is_default, &interface_ip, - coredynp.arch_ireg_width, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, &interface_ip, - 8/* Prefix field etc upto 14B*/, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer. - //So the dynamic power should be multiplied by a few times. - area.set_area(area.get_area()+ (ID_inst->area.get_area() - +ID_operand->area.get_area() - +ID_misc->area.get_area())*coredynp.decodeW); - -} - - -BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - globalBPT(0), - localBPT(0), - L1_localBPT(0), - L2_localBPT(0), - chooser(0), - RAS(0), - exist(exist_) -{ + if (core_params.predictionW > 0) { /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264, - * including global predictor, local two level predictor, and Chooser. - * The Branch predictor also includes a RAS (return address stack) for function calls - * Branch predictors are tagged by thread ID and modeled as 1-way associative $ - * However RAS return address stacks are duplicated for each thread. - * TODO:Data Width need to be computed more precisely * + * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged + * It is only a cache without all the buffers in the cache controller since it is more like a + * look up table than a cache with cache controller. When access miss, no load from other places + * such as main memory (not actively fill the misses), it is passively updated under two circumstances: + * 1) when BPT@ID stage finds out current is a taken branch while BTB missed + * 2) When BPT@ID stage predicts differently than BTB + * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) + * 4) when EXEU find out wrong target has been provided from BTB. + * */ - if (!exist) return; - int tag, data; + size = inst_fetch_params.btb_size; + line = inst_fetch_params.btb_block_size; + assoc = inst_fetch_params.btb_assoc; + banks = inst_fetch_params.btb_num_banks; + idx = int(ceil(log2(size / line / assoc))); + tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + + EXTRA_TAG_BITS; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.assoc = 1; - interface_ip.pure_cam = false; - if (coredynp.multithreaded) - { + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = assoc; + interface_ip.nbanks = banks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; + interface_ip.latency = inst_fetch_params.btb_latency / clockRate; - tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; + BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + BTB->local_result.area); - interface_ip.is_cache = true; - interface_ip.pure_ram = false; + BPT = new BranchPredictor(xml_data, &interface_ip, + core_params, core_stats); + area.set_area(area.get_area() + BPT->area.get_area()); + } + + ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", + is_default, &interface_ip, + core_params.opcode_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_operand = new InstructionDecoder(xml_data, + "Instruction Operand Decoder", + is_default, &interface_ip, + core_params.arch_ireg_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", + is_default, &interface_ip, + core_params.micro_opcode_length, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + area.set_area(area.get_area()+ (ID_inst->area.get_area() + + ID_operand->area.get_area() + + ID_misc->area.get_area()) + * core_params.decodeW); +} + +void +InstFetchU::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "BranchTargetBuffer") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("size", inst_fetch_params.btb_size); + ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); + ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); + ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); + ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); + ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); + ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); + + else { + warnUnrecognizedParam(node_name); } - else - { - interface_ip.is_cache = false; - interface_ip.pure_ram = true; + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + inst_fetch_stats.btb_read_accesses); + ASSIGN_FP_IF("write_accesses", + inst_fetch_stats.btb_write_accesses); + else { + warnUnrecognizedStat(node_name); + } + } } - //Global predictor - data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area); - area.set_area(area.get_area()+ globalBPT->local_result.area); + } - //Local BPT (Level 1) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area); - area.set_area(area.get_area()+ L1_localBPT->local_result.area); + // Parameter sanity check + if (inst_fetch_params.btb_size <= 0) { + errorNonPositiveParam("size"); + } - //Local BPT (Level 2) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area); - area.set_area(area.get_area()+ L2_localBPT->local_result.area); + if (inst_fetch_params.btb_block_size <= 0) { + errorNonPositiveParam("block_size"); + } - //Chooser - data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area); - area.set_area(area.get_area()+ chooser->local_result.area); + if (inst_fetch_params.btb_assoc <= 0) { + errorNonPositiveParam("assoc"); + } - //RAS return address stacks are Duplicated for each thread. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - data = int(ceil(coredynp.pc_width/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); - area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); + if (inst_fetch_params.btb_num_banks <= 0) { + errorNonPositiveParam("num_banks"); + } +} + +BranchPredictor::BranchPredictor(XMLNode* _xml_data, + InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_) + : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), + L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + + clockRate = core_params.clockRate; + name = "Branch Predictor"; + + // Common interface parameters for the branch predictor structures + interface_ip.pure_cam = false; + + if (core_params.multithreaded) { + tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + } else { + interface_ip.specific_tag = 0; + interface_ip.tag_w = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + } + + // Parse params and stats from XML + set_params_stats(); + + // Common interface parameters for the branch predictor structures + interface_ip.assoc = branch_pred_params.assoc; + interface_ip.nbanks = branch_pred_params.nbanks; + + //Global predictor + data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); + size = data * branch_pred_params.global_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + globalBPT->local_result.area); + + //Local BPT (Level 1) + data = int(ceil(branch_pred_params.local_l1_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L1_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 1", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + L1_localBPT->area.set_area(L1_localBPT->area.get_area() + + L1_localBPT->local_result.area); + area.set_area(area.get_area()+ L1_localBPT->local_result.area); + + //Local BPT (Level 2) + data = int(ceil(branch_pred_params.local_l2_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L2_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 2", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + L2_localBPT->local_result.area); + + //Chooser + data = int(ceil(branch_pred_params.chooser_predictor_bits / + BITS_PER_BYTE)); + size = data * branch_pred_params.chooser_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + chooser->local_result.area); + + //RAS return address stacks are Duplicated for each thread. + data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); + size = data * core_params.RAS_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, + core_params.opt_local, core_params.core_ty); + RAS->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + RAS->local_result.area * + core_params.num_hthreads); } -SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - int_inst_window(0), - fp_inst_window(0), - ROB(0), - instruction_selection(0), - exist(exist_) - { - if (!exist) return; - int tag, data; - bool is_default=true; - string tmp_name; +void +BranchPredictor::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if ((coredynp.core_ty==Inorder && coredynp.multithreaded)) - { - //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors - tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design - data = XML->sys.core[ithCore].instruction_length; - //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures - //Software Developer’s Manual - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - //output_data_csv(iRS.RS.local_result); - Iw_height =int_inst_window->local_result.cache_ht; + if (!type) + warnMissingComponentType(child->getAttribute("id")); - /* - * selection logic - * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up - * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who - * at the issue stage. - */ + STRCMP(type, "BranchPredictor") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, Core_device, coredynp.core_ty); + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("assoc", branch_pred_params.assoc); + ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); + ASSIGN_INT_IF("local_l1_predictor_size", + branch_pred_params.local_l1_predictor_size); + ASSIGN_INT_IF("local_l2_predictor_size", + branch_pred_params.local_l2_predictor_size); + ASSIGN_INT_IF("local_predictor_entries", + branch_pred_params.local_predictor_entries); + ASSIGN_INT_IF("global_predictor_entries", + branch_pred_params.global_predictor_entries); + ASSIGN_INT_IF("global_predictor_bits", + branch_pred_params.global_predictor_bits); + ASSIGN_INT_IF("chooser_predictor_entries", + branch_pred_params.chooser_predictor_entries); + ASSIGN_INT_IF("chooser_predictor_bits", + branch_pred_params.chooser_predictor_bits); + + else { + warnUnrecognizedParam(node_name); + } + } + // The core reads in the number of branches and the number of + // function calls and these values are passed through the + // core_stats variable, so we don't need to read them in here + } + } +} + +SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), int_inst_window(NULL), + fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), + fp_instruction_selection(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + int line; + bool is_default = true; + string tmp_name; + + clockRate = core_params.clockRate; + name = "Instruction Scheduler"; + if ((core_params.core_ty == Inorder && core_params.multithreaded)) { + //Instruction issue queue, in-order multi-issue or multithreaded + //processor also has this structure. Unified window for Inorder + //processors + //This tag width is the normal thread state bits based on + //Niagara Design + tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); + data = core_params.instruction_length; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.instruction_window_size * line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; } - if (coredynp.core_ty==OOO) - { + //NOTE: x86 inst can be very lengthy, up to 15B. + //Source: Intel® 64 and IA-32 Architectures + //Software Developer’s Manual + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, + "InstFetchQueue", Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + + /* + * selection logic + * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up + * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who + * at the issue stage. + */ + + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW * + core_params.num_hthreads, + &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW * + core_params.num_hthreads, + &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); + } + } + + if (core_params.core_ty == OOO) { /* * CAM based instruction window * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored @@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in * It is written once and read twice(two operands) before an instruction can be issued. * X86 instruction can be very long up to 15B. add instruction length in XML */ - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - tmp_name = "InstIssueQueue"; + if (core_params.scheu_ty == PhysicalRegFile) { + tag = core_params.phy_ireg_width; + data = int((ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width)) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Instruction Window"; + } else { + tag = core_params.phy_ireg_width; + data = int(ceil(((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width) + + 2 * core_params.int_data_width) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Reservation Station"; } - else - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+ - 2*coredynp.int_data_width)/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - tmp_name = "IntReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 2*1.0/clockRate; - interface_ip.latency = 2*1.0/clockRate; + size = data * core_params.instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - Iw_height =int_inst_window->local_result.cache_ht; - //FU inst window - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0)); - tmp_name = "FPIssueQueue"; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + + //FU inst window + if (core_params.scheu_ty == PhysicalRegFile) { + tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width)) / BITS_PER_BYTE)); + tmp_name = "FP Instruction Window"; + } else { + tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width) + + NUM_SOURCE_OPERANDS * core_params.fp_data_width) / + BITS_PER_BYTE)); + tmp_name = "FP Reservation Station"; } - else - { - tag = 2*coredynp.phy_ireg_width; - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+ - 2*coredynp.fp_data_width)/8.0)); - tmp_name = "FPReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + + size = data * core_params.fp_instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_issueW; - interface_ip.num_wr_ports = coredynp.fp_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - fp_Iw_height =fp_inst_window->local_result.cache_ht; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.fp_issueW; + interface_ip.num_wr_ports = core_params.fp_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fp_inst_window = + new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + fp_inst_window->output_data.area *= core_params.num_fp_pipelines; + area.set_area(area.get_area() + fp_inst_window->local_result.area + *core_params.num_fp_pipelines); + fp_Iw_height = fp_inst_window->local_result.cache_ht; - if (XML->sys.core[ithCore].ROB_size >0) - { - /* - * if ROB_size = 0, then the target processor does not support hardware-based - * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which - * means branch must be resolved before instruction issued into instruction window, since - * there is no change to flush miss-predict branch path after instructions are issued in this situation. - * - * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. - * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. - * However, this approach is abandoned due to its high power and poor scalablility. - * McPAT uses current implementation of ROB as circular buffer. - * ROB is written once when instruction is issued and read once when the instruction is committed. * - */ - int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); - //5 bits are: busy, Issued, Finished, speculative, valid - if(coredynp.scheu_ty==PhysicalRegFile) - { - //PC is to id the instruction for recover exception. - //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0)); - data = int(ceil((robExtra+coredynp.pc_width + - coredynp.phy_ireg_width)/8.0)); - } - else - { - //in RS based OOO, ROB also contains value of destination reg -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - data = int(ceil((robExtra + coredynp.pc_width + - coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - } - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_commitW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - ROB_height =ROB->local_result.cache_ht; - } + if (core_params.ROB_size > 0) { + /* + * if ROB_size = 0, then the target processor does not support hardware-based + * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which + * means branch must be resolved before instruction issued into instruction window, since + * there is no change to flush miss-predict branch path after instructions are issued in this situation. + * + * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. + * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. + * However, this approach is abandoned due to its high power and poor scalablility. + * McPAT uses current implementation of ROB as circular buffer. + * ROB is written once when instruction is issued and read once when the instruction is committed. * + */ + int robExtra = int(ceil(ROB_STATUS_BITS + + log2(core_params.num_hthreads))); + + if (core_params.scheu_ty == PhysicalRegFile) { + //PC is to id the instruction for recover exception. + //inst is used to map the renamed dest. registers. so that + //commit stage can know which reg/RRAT to update + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width) / BITS_PER_BYTE)); + } else { + //in RS based OOO, ROB also contains value of destination reg + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width + + core_params.fp_data_width) / BITS_PER_BYTE)); + } + + interface_ip.cache_sz = data * core_params.ROB_size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.ROB_assoc; + interface_ip.nbanks = core_params.ROB_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.ROB_tag_width > 0; + interface_ip.tag_w = core_params.ROB_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_commitW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ROB->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + ROB->local_result.area * + core_params.num_pipelines); + ROB_height = ROB->local_result.cache_ht; + } + + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW, &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW, &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); + } - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); } } -LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - LSQ(0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false; - int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16; +LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int line; + int size; + int ldst_opcode = core_params.opcode_width; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; + clockRate = core_params.clockRate; + name = "Load/Store Unit"; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - //Dcache - size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area); - area.set_area(area.get_area()+ dcache.caches->local_result.area); - //output_data_csv(dcache.caches.local_result); + // Check if there is a dcache child: + int i; + dcache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); - //dCache controllers - //miss buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area); - area.set_area(area.get_area()+ dcache.missb->local_result.area); - //output_data_csv(dcache.missb.local_result); + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area); - area.set_area(area.get_area()+ dcache.ifb->local_result.area); - //output_data_csv(dcache.ifb.local_result); + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Data Cache") == 0 || + strcmp(name, "dcache") == 0) { + dcache = new CacheUnit(childXML, &interface_ip); + children.push_back(dcache); + } + } + } - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area); - area.set_area(area.get_area()+ dcache.prefetchb->local_result.area); - //output_data_csv(dcache.prefetchb.local_result); + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ + */ + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.store_buffer_size * line * core_params.num_hthreads; - //WBB + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.store_buffer_assoc; + interface_ip.nbanks = core_params.store_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + LSQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); - if (cache_p==Write_back) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area); - area.set_area(area.get_area()+ dcache.wbb->local_result.area); - //output_data_csv(dcache.wbb.local_result); - } + if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.load_buffer_size * line * core_params.num_hthreads; - /* - * LSU--in-order processors do not have separate load queue: unified lsq - * partitioned among threads - * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - */ - tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS; - data = XML->sys.machine_bits; - interface_ip.is_cache = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LSQ.LSQ.local_result); - lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LoadQ.LoadQ.local_result); - lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - } + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.load_buffer_assoc; + interface_ip.nbanks = core_params.load_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + LoadQ->area.set_area(LoadQ->area.get_area() + + LoadQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = (LSQ->local_result.cache_ht + + LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); + } } -MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - itlb(0), - dtlb(0), - exist(exist_) -{ - if (!exist) return; - int tag, data; - bool debug= false; +MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int line; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.specific_tag = 1; - //Itlb TLBs are partioned among threads according to Nigara and Nehalem - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area); - area.set_area(area.get_area()+ itlb->local_result.area); - //output_data_csv(itlb.tlb.local_result); + clockRate = core_params.clockRate; + name = "Memory Management Unit"; - //dtlb - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area); - area.set_area(area.get_area()+ dtlb->local_result.area); - //output_data_csv(dtlb.tlb.local_result); + set_params_stats(); + + // These are shared between ITLB and DTLB + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + //Itlb TLBs are partioned among threads according to Nigara and Nehalem + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.itlb_assoc; + interface_ip.nbanks = mem_man_params.itlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; + interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; + interface_ip.latency = mem_man_params.itlb_latency / clockRate; + itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + itlb->local_result.area); + + //dtlb + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.dtlb_assoc; + interface_ip.nbanks = mem_man_params.dtlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; + interface_ip.latency = mem_man_params.dtlb_latency / clockRate; + dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + dtlb->local_result.area); } -RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IRF (0), - FRF (0), - RFWIN (0), - exist(exist_) - { - /* - * processors have separate architectural register files for each thread. - * therefore, the bypass buses need to travel across all the register files. - */ - if (!exist) return; - int data; +void +MemManU::set_params_stats() { + memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); + memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - //**********************************IRF*************************************** - data = coredynp.int_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(IRF.RF.local_result); + if (!type) + warnMissingComponentType(child->getAttribute("id")); - //**********************************FRF*************************************** - data = coredynp.fp_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + STRCMP(type, "InstructionTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.itlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("total_accesses", + mem_man_stats.itlb_total_accesses); + ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); + + else { + warnUnrecognizedStat(node_name); + } + } + } STRCMP(type, "DataTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.dtlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + mem_man_stats.dtlb_read_accesses); + ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); + ASSIGN_FP_IF("write_accesses", + mem_man_stats.dtlb_write_accesses); + ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); + + else { + warnUnrecognizedStat(node_name); + } + } + } + } +} + +RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + /* + * processors have separate architectural register files for each thread. + * therefore, the bypass buses need to travel across all the register files. + */ + if (!exist) return; + int data; + int line; + + clockRate = core_params.clockRate; + name = "Register File Unit"; + + //**********************************IRF************************************ + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_IRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_IRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + IRF->output_data.area *= core_params.num_hthreads * + core_params.num_pipelines * cdb_overhead; + area.set_area(area.get_area() + IRF->local_result.area * + core_params.num_hthreads * core_params.num_pipelines * + cdb_overhead); + + //**********************************FRF************************************ + data = core_params.fp_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_FRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_FRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + FRF->output_data.area *= core_params.num_hthreads * + core_params.num_fp_pipelines * cdb_overhead; + area.set_area(area.get_area() + FRF->local_result.area * + core_params.num_hthreads * core_params.num_fp_pipelines * + cdb_overhead); + int_regfile_height = IRF->local_result.cache_ht * + core_params.num_hthreads * sqrt(cdb_overhead); + fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * + sqrt(cdb_overhead); + //since a EXU is associated with each pipeline, the cdb should not have + //longer length. + + if (core_params.regWindowing) { + //*********************************REG_WIN***************************** + //ECC, and usually 2 regs are transfered together during window + //shifting.Niagara Mega cell + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.register_window_size * + IRF->l_ip.cache_sz * core_params.num_hthreads; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.register_window_assoc; + interface_ip.nbanks = core_params.register_window_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.register_window_tag_width > 0; + interface_ip.tag_w = core_params.register_window_tag_width; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.register_window_rw_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(FRF.RF.local_result); - int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - //since a EXU is associated with each pipeline, the cdb should not have longer length. - if (coredynp.regWindowing) - { - //*********************************REG_WIN************************************ - data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 4.0/clockRate; - interface_ip.latency = 4.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - //output_data_csv(RFWIN.RF.local_result); + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = + core_params.register_window_throughput / clockRate; + interface_ip.latency = + core_params.register_window_latency / clockRate; + RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + RFWIN->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + RFWIN->local_result.area * + core_params.num_pipelines); + } +} + +EXECU::EXECU(XMLNode* _xml_data, + InputParameter* interface_ip_, double lsq_height_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), + exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), + int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), + fpTagBypass(NULL), interface_ip(*interface_ip_), + lsq_height(lsq_height_), core_params(_core_params), + core_stats(_core_stats), exist(exist_) { + if (!exist) return; + double fu_height = 0.0; + clockRate = core_params.clockRate; + name = "Execution Unit"; + rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); + if (core_params.core_ty == OOO || + (core_params.core_ty == Inorder && core_params.multithreaded)) { + scheu = new SchedulerU(xml_data, &interface_ip, core_params, + core_stats); + area.set_area(area.get_area() + scheu->area.get_area() ); + } + exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, + core_stats, ALU); + area.set_area(area.get_area() + exeu->area.get_area() + + rfu->area.get_area()); + fu_height = exeu->FU_height; + if (core_params.num_fpus > 0) { + fp_u = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, FPU); + area.set_area(area.get_area() + fp_u->area.get_area()); + } + if (core_params.num_muls > 0) { + mul = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, MUL); + area.set_area(area.get_area() + mul->area.get_area()); + fu_height += mul->FU_height; + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; + * fp-broadcast; fp_tag-broadcast + * integer by pass has two paths and fp has 3 paths. + * on the same bus there are multiple tri-state drivers and muxes that go + * to different components on the same bus + */ + interface_ip.wt = core_params.execu_broadcast_wt; + interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; + interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; + interface_ip.throughput = core_params.broadcast_numerator / clockRate; + interface_ip.latency = core_params.broadcast_numerator / clockRate; + double scheu_Iw_height = 0.0; + double scheu_ROB_height = 0.0; + double scheu_fp_Iw_height = 0.0; + if (scheu) { + scheu_Iw_height = scheu->Iw_height; + scheu_ROB_height = scheu->ROB_height; + scheu_fp_Iw_height = scheu->fp_Iw_height; + } + + // Common bypass logic parameters + double base_w = core_params.execu_bypass_base_width; + double base_h = core_params.execu_bypass_base_height; + int level = core_params.execu_bypass_start_wiring_level; + double route_over_perc = core_params.execu_bypass_route_over_perc; + Wire_type wire_type = core_params.execu_bypass_wire_type; + int data_w; + double len; + + if (core_params.core_ty == Inorder) { + data_w = int(ceil(data_path_width / 32.0)*32); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + if (core_params.num_muls > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu_Iw_height; + intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); } + if (core_params.num_fpus > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); - } + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_Iw_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else {//OOO + if (core_params.scheu_ty == PhysicalRegFile) { + /* For physical register based OOO, + * data broadcast interconnects cover across functional units, lsq, + * inst windows and register files, + * while tag broadcast interconnects also cover across ROB + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); -EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - lsq_height(lsq_height_), - coredynp(dyn_p_), - rfu(0), - scheu(0), - fp_u(0), - exeu(0), - mul(0), - int_bypass(0), - intTagBypass(0), - int_mul_bypass(0), - intTag_mul_Bypass(0), - fp_bypass(0), - fpTagBypass(0), - exist(exist_) -{ - if (!exist) return; - double fu_height = 0.0; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip,coredynp); - scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp); - exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU); - area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() ); - fu_height = exeu->FU_height; - if (coredynp.num_fpus >0) - { - fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU); - area.set_area(area.get_area()+ fp_u->area.get_area()); - } - if (coredynp.num_muls >0) - { - mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL); - area.set_area(area.get_area()+ mul->area.get_area()); - fu_height += mul->FU_height; - } - /* - * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast - * integer by pass has two paths and fp has 3 paths. - * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus - */ - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used - interface_ip.wire_os_mat_type = 2; - interface_ip.throughput = 10.0/clockRate; //Do not care - interface_ip.latency = 10.0/clockRate; - } + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); - if (coredynp.core_ty==Inorder) - { - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - {//OOO - if (coredynp.scheu_ty==PhysicalRegFile) - { - /* For physical register based OOO, - * data broadcast interconnects cover across functional units, lsq, inst windows and register files, - * while tag broadcast interconnects also cover across ROB - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else { + /* + * In RS based processor both data and tag are broadcast together, + * covering functional units, lsq, nst windows, register files, and ROBs + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - { - /* - * In RS based processor both data and tag are broadcast together, - * covering functional units, lsq, nst windows, register files, and ROBs - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); - } - area.set_area(area.get_area()+ bypass.area.get_area()); + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } + } + if (int_bypass) { + children.push_back(int_bypass); + } + if (intTagBypass) { + children.push_back(intTagBypass); + } + if (int_mul_bypass) { + children.push_back(int_mul_bypass); + } + if (intTag_mul_Bypass) { + children.push_back(intTag_mul_Bypass); + } + if (fp_bypass) { + children.push_back(fp_bypass); + } + if (fpTagBypass) { + children.push_back(fpTagBypass); + } + + area.set_area(area.get_area() + int_bypass->area.get_area() + + intTagBypass->area.get_area()); + if (core_params.num_muls > 0) { + area.set_area(area.get_area() + int_mul_bypass->area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + if (core_params.num_fpus > 0) { + area.set_area(area.get_area() + fp_bypass->area.get_area() + + fpTagBypass->area.get_area()); + } } -RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - iFRAT(0), - fFRAT(0), - iRRAT(0), - fRRAT(0), - ifreeL(0), - ffreeL(0), - idcl(0), - fdcl(0), - RAHT(0), - exist(exist_) - { - /* - * Although renaming logic maybe be used in in-order processors, - * McPAT assumes no renaming logic is used since the performance gain is very limited and - * the only major inorder processor with renaming logic is Itainium - * that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all when context switch - * checkpointing is implicit - * Renaming logic is duplicated for each different hardware threads - * - * No Dual-RAT is needed in RS-based OOO processors, - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated at the same time. - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * Both RAM and CAM have same DCL - */ - if (!exist) return; - int tag, data, out_w; -// interface_ip.wire_is_mat_type = 0; -// interface_ip.wire_os_mat_type = 0; -// interface_ip.wt = Global_30; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if (coredynp.core_ty==OOO) - { +RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), + fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), + RAHT(NULL), interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int out_w; + int size; + + // Assumption: + // We make an implicit design assumption based on the specific structure + // that is being modeled. + // 1. RAM-based RATs are direct mapped. However, if the associated + // scheduler is a reservation station style, the RATs are fully + // associative. + // 2. Non-CAM based RATs and free lists do not have tags. + // 3. Free lists are direct mapped. + + const int RAM_BASED_RAT_ASSOC = 1; + const int RS_RAT_ASSOC = 0; + const int NON_CAM_BASED_TAG_WIDTH = 0; + const int FREELIST_ASSOC = 1; + + clockRate = core_params.clockRate; + name = "Rename Unit"; + if (core_params.core_ty == OOO) { //integer pipeline - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions - data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); -// data = int(ceil(coredynp.phy_ireg_width/8.0)); - out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); + if (core_params.scheu_ty == PhysicalRegFile) { + if (core_params.rm_ty == RAMbased) { + //FRAT with global checkpointing (GCs) please see paper tech + //report for detailed explaintions -// //RAHT According to Intel, combine GC with FRAT is very costly. -// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry); -// out_w = data; -// interface_ip.is_cache = false; -// interface_ip.pure_cam = false; -// interface_ip.pure_ram = true; -// interface_ip.line_sz = data; -// interface_ip.cache_sz = data*coredynp.globalCheckpoint; -// interface_ip.assoc = 1; -// interface_ip.nbanks = 1; -// interface_ip.out_w = out_w*8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = 1.0/clockRate; -// interface_ip.latency = 1.0/clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = 1;//the extra one port is for GCs -// interface_ip.num_rd_ports = 2*coredynp.decodeW; -// interface_ip.num_wr_ports = coredynp.decodeW; -// interface_ip.num_se_rd_ports = 0; -// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT"); -// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); -// area.set_area(area.get_area()+ iFRAT->area.get_area()); + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); - //FRAT floating point - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + size = data * core_params.archi_Regs_IRF_size; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT for FP - tag = coredynp.arch_freg_width; - data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - - //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iRRAT->area.get_area()); + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); - //RRAT for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + //FRAT floating point + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fRRAT->area.get_area()); + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); - //Freelist of renaming unit always RAM based - //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + } else if ((core_params.rm_ty == CAMbased)) { + //IRAT + tag = core_params.arch_ireg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_ireg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; - //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); - //freelist for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ffreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + //FRAT for FP + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_freg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ffreeL->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + } - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + //RRAT is always RAM based, does not have GCs, and is used only for + //record latest non-speculative mapping + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size * + NUM_SOURCE_OPERANDS; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased){ - /* - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time. - * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than - * CAM based RAT so that it is more scalable as number of ROB/physical regs increases. - */ - tag = coredynp.phy_ireg_width; - data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.commitW;//TODO - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->local_result.adjust_area(); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.commitW; + interface_ip.num_wr_ports = core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + iRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iRRAT->area.get_area()); - //FP - tag = coredynp.phy_freg_width; - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->local_result.adjust_area(); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + //RRAT for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size * + NUM_SOURCE_OPERANDS; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + fRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fRRAT->area.get_area()); - //FRAT - tag = coredynp.arch_freg_width; - data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + //Freelist of renaming unit always RAM based + //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; - } - //No RRAT for RS based OOO - //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); + + //freelist for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.num_ffreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = + core_params.fp_decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ffreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ffreeL->area.get_area()); + + } else if (core_params.scheu_ty == ReservationStation) { + if (core_params.rm_ty == RAMbased) { + tag = core_params.phy_ireg_width; + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); + interface_ip.num_search_ports = core_params.commitW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->local_result.adjust_area(); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + //FP + tag = core_params.phy_freg_width; + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->local_result.adjust_area(); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((core_params.rm_ty == CAMbased)) { + //FRAT + //the address of CAM needed to be sent out + tag = core_params.arch_ireg_width; + data = int(ceil (core_params.arch_ireg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil (core_params.arch_ireg_width / + BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil(core_params.arch_freg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } + //No RRAT for RS based OOO + //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); } + } + idcl = + new dep_resource_conflict_check(xml_data, + "Instruction Dependency Check?", + &interface_ip, core_params, + core_params.phy_ireg_width, + clockRate); + fdcl = + new dep_resource_conflict_check(xml_data, + "FP Dependency Check?", &interface_ip, + core_params, + core_params.phy_freg_width, clockRate); } - if (coredynp.core_ty==Inorder&& coredynp.issueW>1) - { - /* Dependency check logic will only present when decode(issue) width>1. - * Multiple issue in order processor can do without renaming, but dcl is a must. - */ - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + +Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) + : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), + exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), + ithCore(_ithCore), interface_ip(*interface_ip_) { + + ostringstream os; + os << ithCore; + name = "Core " + os.str(); + + int i = 0; + XMLNode* childXML; + for (i = 0; i < xml_data->nChildNode("component"); i++) { + childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR comp_name = childXML->getAttribute("id"); + if (!comp_name) + continue; + + STRCMP(comp_name, "system.L20") { + l2cache = new CacheUnit(childXML, &interface_ip); + children.push_back(l2cache); + } + } + } + + set_core_param(); + clockRate = core_params.clockRate; + + ifu = new InstFetchU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(ifu); + lsu = new LoadStoreU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(lsu); + mmu = new MemManU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(mmu); + exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, + core_params, core_stats); + children.push_back(exu); + undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); + children.push_back(undiffCore); + if (core_params.core_ty == OOO) { + rnu = new RENAMINGU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(rnu); + } + corepipe = new Pipeline(xml_data, &interface_ip, core_params); + children.push_back(corepipe); + + double pipeline_area_per_unit; + if (core_params.core_ty == OOO) { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 5.0; + if (rnu->exist) { + rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + } + } else { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 4.0; + } + + // Move all of this to computeArea + //area.set_area(area.get_area()+ corepipe->area.get_area()); + if (ifu->exist) { + ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu->area.get_area()); + } + if (lsu->exist) { + lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu->area.get_area()); + } + if (exu->exist) { + exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu->area.get_area()); + } + if (mmu->exist) { + mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu->area.get_area()); + } + + if (core_params.core_ty == OOO) { + if (rnu->exist) { + + area.set_area(area.get_area() + rnu->area.get_area()); + } + } + + if (undiffCore->exist) { + area.set_area(area.get_area() + undiffCore->area.get_area()); + } + + if (l2cache) { + area.set_area(area.get_area() + l2cache->area.get_area()); } } -Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - ifu (0), - lsu (0), - mmu (0), - exu (0), - rnu (0), - corepipe (0), - undiffCore (0), - l2cache (0) -{ - /* - * initialize, compute and optimize individual components. - */ - double pipeline_area_per_unit; - if (XML->sys.Private_L2) - { - l2cache = new SharedCache(XML,ithCore, &interface_ip); +void BranchPredictor::computeEnergy() { + if (!exist) return; - } -// interface_ip.wire_is_mat_type = 2; -// interface_ip.wire_os_mat_type = 2; -// interface_ip.wt =Global_30; - set_core_param(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp); - lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp); - mmu = new MemManU (XML, ithCore, &interface_ip,coredynp); - exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp); - if (coredynp.core_ty==OOO) - { - rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp); - } - corepipe = new Pipeline(&interface_ip,coredynp); + // ASSUMPTION: All instructions access the branch predictors at Fetch and + // only branch instrucions update the predictors regardless + // of the correctness of the prediction. + double tdp_read_accesses = + core_params.predictionW * core_stats.BR_duty_cycle; + globalBPT->tdp_stats.reset(); + globalBPT->tdp_stats.readAc.access = tdp_read_accesses; + globalBPT->tdp_stats.writeAc.access = 0; + globalBPT->rtp_stats.reset(); + globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; + globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + globalBPT->power_t.reset(); + globalBPT->power_t.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->tdp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->tdp_stats.writeAc.access; + globalBPT->power_t = globalBPT->power_t + + globalBPT->local_result.power * pppm_lkg; + globalBPT->rt_power.reset(); + globalBPT->rt_power.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->rtp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->rtp_stats.writeAc.access; - if (coredynp.core_ty==OOO) - { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0; - if (rnu->exist) - { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); - } - } - else { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0; - } + L1_localBPT->tdp_stats.reset(); + L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L1_localBPT->tdp_stats.writeAc.access = 0; + L1_localBPT->rtp_stats.reset(); + L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; + L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L1_localBPT->power_t.reset(); + L1_localBPT->power_t.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->tdp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->tdp_stats.writeAc.access; + L1_localBPT->power_t = L1_localBPT->power_t + + L1_localBPT->local_result.power * pppm_lkg; + L1_localBPT->rt_power.reset(); + L1_localBPT->rt_power.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->rtp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->rtp_stats.writeAc.access; - //area.set_area(area.get_area()+ corepipe->area.get_area()); - if (ifu->exist) - { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); - } - if (lsu->exist) - { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); - } - if (exu->exist) - { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+exu->area.get_area()); - } - if (mmu->exist) - { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+mmu->area.get_area()); - } + L2_localBPT->tdp_stats.reset(); + L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L2_localBPT->tdp_stats.writeAc.access = 0; + L2_localBPT->rtp_stats.reset(); + L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; + L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L2_localBPT->power_t.reset(); + L2_localBPT->power_t.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->tdp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->tdp_stats.writeAc.access; + L2_localBPT->power_t = L2_localBPT->power_t + + L2_localBPT->local_result.power * pppm_lkg; + L2_localBPT->rt_power.reset(); + L2_localBPT->rt_power.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->rtp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->rtp_stats.writeAc.access; - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { + chooser->tdp_stats.reset(); + chooser->tdp_stats.readAc.access = tdp_read_accesses; + chooser->tdp_stats.writeAc.access = 0; + chooser->rtp_stats.reset(); + chooser->rtp_stats.readAc.access = core_stats.total_instructions; + chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; + chooser->power_t.reset(); + chooser->power_t.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->tdp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->tdp_stats.writeAc.access; + chooser->power_t = + chooser->power_t + chooser->local_result.power * pppm_lkg; + chooser->rt_power.reset(); + chooser->rt_power.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->rtp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->rtp_stats.writeAc.access; - area.set_area(area.get_area() + rnu->area.get_area()); - } - } + RAS->tdp_stats.reset(); + RAS->tdp_stats.readAc.access = tdp_read_accesses; + RAS->tdp_stats.writeAc.access = 0; + RAS->rtp_stats.reset(); + RAS->rtp_stats.readAc.access = core_stats.function_calls; + RAS->rtp_stats.writeAc.access = core_stats.function_calls; + RAS->power_t.reset(); + RAS->power_t.readOp.dynamic += + RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->tdp_stats.writeAc.access; + RAS->power_t = RAS->power_t + RAS->local_result.power * + core_params.pppm_lkg_multhread; + RAS->rt_power.reset(); + RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * + RAS->rtp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->rtp_stats.writeAc.access; - if (undiffCore->exist) - { - area.set_area(area.get_area() + undiffCore->area.get_area()); - } - - if (XML->sys.Private_L2) - { - area.set_area(area.get_area() + l2cache->area.get_area()); - - } -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = corepipe.tot_stage_vector; -// clockNetwork.optimize_wire(); -} - - -void BranchPredictor::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double r_access; - double w_access; - if (is_tdp) - { - r_access = coredynp.predictionW*coredynp.BR_duty_cycle; - w_access = 0*coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; + output_data.reset(); + if (globalBPT) { + globalBPT->output_data.peak_dynamic_power = + globalBPT->power_t.readOp.dynamic * clockRate; + globalBPT->output_data.runtime_dynamic_energy = + globalBPT->rt_power.readOp.dynamic; + output_data += globalBPT->output_data; } - else - { - //The resolution of BPT accesses is coarse, but this is - //because most simulators cannot track finer grained details - r_access = XML->sys.core[ithCore].branch_instructions; - w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; - } - - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - - power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power; + if (L1_localBPT) { + L1_localBPT->output_data.peak_dynamic_power = + L1_localBPT->power_t.readOp.dynamic * clockRate; + L1_localBPT->output_data.runtime_dynamic_energy = + L1_localBPT->rt_power.readOp.dynamic; + output_data += L1_localBPT->output_data; } - else - { - globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + if (L2_localBPT) { + L2_localBPT->output_data.peak_dynamic_power = + L2_localBPT->power_t.readOp.dynamic * clockRate; + L2_localBPT->output_data.runtime_dynamic_energy = + L2_localBPT->rt_power.readOp.dynamic; + output_data += L2_localBPT->output_data; + } + if (chooser) { + chooser->output_data.peak_dynamic_power = + chooser->power_t.readOp.dynamic * clockRate; + chooser->output_data.runtime_dynamic_energy = + chooser->rt_power.readOp.dynamic; + output_data += chooser->output_data; + } + if (RAS) { + RAS->output_data.peak_dynamic_power = + RAS->power_t.readOp.dynamic * clockRate; + RAS->output_data.subthreshold_leakage_power = + RAS->power_t.readOp.leakage * core_params.num_hthreads; + RAS->output_data.gate_leakage_power = + RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; + RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; + output_data += RAS->output_data; } } -void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << indent_str<< "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <displayData(indent + 4, plevel); + L1_localBPT->displayData(indent + 4, plevel); + L2_localBPT->displayData(indent + 4, plevel); + chooser->displayData(indent + 4, plevel); + RAS->displayData(indent + 4, plevel); } -void InstFetchU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->tdp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->tdp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; - - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses; - } - - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; - - - } - else - { - //init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; - - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; - } - - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; +void InstFetchU::computeEnergy() { + if (!exist) return; + if (BPT) { + BPT->computeEnergy(); } - icache.power_t.reset(); + IB->tdp_stats.reset(); + IB->tdp_stats.readAc.access = core_params.peak_issueW; + IB->tdp_stats.writeAc.access = core_params.peak_issueW; + IB->rtp_stats.reset(); + IB->rtp_stats.readAc.access = core_stats.total_instructions; + IB->rtp_stats.writeAc.access = core_stats.total_instructions; IB->power_t.reset(); -// ID_inst->power_t.reset(); -// ID_operand->power_t.reset(); -// ID_misc->power_t.reset(); - if (coredynp.predictionW>0) - { + IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->tdp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; + IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; + IB->rt_power.reset(); + IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->rtp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; + + if (core_params.predictionW > 0) { + BTB->tdp_stats.reset(); + BTB->tdp_stats.readAc.access = core_params.predictionW; + BTB->tdp_stats.writeAc.access = 0; + BTB->rtp_stats.reset(); + BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; + BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; BTB->power_t.reset(); + BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * + BTB->tdp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->tdp_stats.writeAc.access; + BTB->rt_power.reset(); + BTB->rt_power.readOp.dynamic += + BTB->local_result.power.readOp.dynamic * + BTB->rtp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->rtp_stats.writeAc.access; } - icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+ - //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache - icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic; - icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic; + ID_inst->tdp_stats.reset(); + ID_inst->tdp_stats.readAc.access = core_params.decodeW; + ID_inst->power_t.reset(); + ID_inst->power_t = ID_misc->power; + ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->tdp_stats.readAc.access; + ID_inst->rtp_stats.reset(); + ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; + ID_inst->rt_power.reset(); + ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->rtp_stats.readAc.access; - IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access + - IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic; + ID_operand->tdp_stats.reset(); + ID_operand->tdp_stats.readAc.access = core_params.decodeW; + ID_operand->power_t.reset(); + ID_operand->power_t = ID_misc->power; + ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->tdp_stats.readAc.access; + ID_operand->rtp_stats.reset(); + ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; + ID_operand->rt_power.reset(); + ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->rtp_stats.readAc.access; - if (coredynp.predictionW>0) - { - BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic; + ID_misc->tdp_stats.reset(); + ID_misc->tdp_stats.readAc.access = core_params.decodeW; + ID_misc->power_t.reset(); + ID_misc->power_t = ID_misc->power; + ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->tdp_stats.readAc.access; + ID_misc->rtp_stats.reset(); + ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; + ID_misc->rt_power.reset(); + ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->rtp_stats.readAc.access; - BPT->computeEnergy(is_tdp); - } + power.reset(); + rt_power.reset(); + McPATComponent::computeEnergy(); - if (is_tdp) - { -// icache.power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->power = IB->power_t + IB->local_result.power*pppm_lkg; - power = power + icache.power + IB->power; - if (coredynp.predictionW>0) - { - BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg; - power = power + BTB->power + BPT->power; - } - - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; - - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; - - power = power + (ID_inst->power + - ID_operand->power + - ID_misc->power); + output_data.reset(); + if (icache) { + output_data += icache->output_data; } - else - { -// icache.rt_power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; - if (coredynp.predictionW>0) - { - BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; - } - - ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; - - rt_power = rt_power + (ID_inst->rt_power + - ID_operand->rt_power + - ID_misc->rt_power); + if (IB) { + IB->output_data.peak_dynamic_power = + IB->power_t.readOp.dynamic * clockRate; + IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; + output_data += IB->output_data; + } + if (BTB) { + BTB->output_data.peak_dynamic_power = + BTB->power_t.readOp.dynamic * clockRate; + BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; + output_data += BTB->output_data; + } + if (BPT) { + output_data += BPT->output_data; + } + if (ID_inst) { + ID_inst->output_data.peak_dynamic_power = + ID_inst->power_t.readOp.dynamic * clockRate; + ID_inst->output_data.runtime_dynamic_energy = + ID_inst->rt_power.readOp.dynamic; + output_data += ID_inst->output_data; + } + if (ID_operand) { + ID_operand->output_data.peak_dynamic_power = + ID_operand->power_t.readOp.dynamic * clockRate; + ID_operand->output_data.runtime_dynamic_energy = + ID_operand->rt_power.readOp.dynamic; + output_data += ID_operand->output_data; + } + if (ID_misc) { + ID_misc->output_data.peak_dynamic_power = + ID_misc->power_t.readOp.dynamic * clockRate; + ID_misc->output_data.runtime_dynamic_energy = + ID_misc->rt_power.readOp.dynamic; + output_data += ID_misc->output_data; } } -void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; +void InstFetchU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + McPATComponent::displayData(indent, plevel); - if (is_tdp) - { - - cout << indent_str<< "Instruction Cache:" << endl; - cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <0) - { - cout << indent_str<< "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <exist) - { - cout << indent_str<< "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3) - { - BPT->displayEnergy(indent+4, plevel, is_tdp); - } - } - } - cout << indent_str<< "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - } - - } - else - {//init stats for Runtime Dynamic (RTP) - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp since the ROB act as physcial registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group - //are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - } - - } - /* Compute engine */ - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic - +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic - +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic - +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic); - - } - else if (coredynp.scheu_ty==ReservationStation) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic - +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic - +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - } - - } - else - { - if (coredynp.issueW>1) - { - idcl->power_t.reset(); - fdcl->power_t.reset(); - set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; - } - - } - - //assign value to tpd and rtp - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + ifreeL->power; - } - } - else - { - power = power + idcl->power_t + fdcl->power_t; - } - - } - else - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + (iRRAT->rt_power + fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + ifreeL->rt_power; - } - } - else - { - rt_power = rt_power + idcl->power_t + fdcl->power_t; - } - - } -} - -void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - - if (coredynp.core_ty==OOO) - { - cout << indent_str<< "Int Front End RAT:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; - ROB_duty_cycle = 1; - //init stats - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; - - /* - * When inst commits, ROB must be read. - * Because for Physcial register based cores, physical register tag in ROB - * need to be read out and write into RRAT/CAM based RAT. - * For RS based cores, register content that stored in ROB must be - * read out and stored in architectural registers. - * - * if no-register is involved, the ROB read out operation when instruction commits can be ignored. - * assuming 20% insts. belong this type. - * TODO: ROB duty_cycle need to be revisited - */ - } - - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - } - - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; - /* ROB need to be updated in RS based OOO when new values are produced, - * this update may happen before the commit stage when ROB entry is released - * 1. ROB write at instruction inserted in - * 2. ROB write as results produced (for RS based OOO only) - * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF - * For Physical reg based OOO, no data stored in ROB, but register tags need to be - * read out and used to set the RRAT and to recycle the register tag to free list buffer - */ - ROB->rtp_stats = ROB->stats_t; - } - - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; - } - } - - //computation engine - if (coredynp.core_ty==OOO) - { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); - - /* each instruction needs to write to scheduler, read out when all resources and source operands are ready - * two search ops with one for each source operand - * - */ - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic; - - fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access - + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access - + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access - + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic; - } - - - - - } - else if (coredynp.multithreaded) - { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - } - - //assign values - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg; - power = power + ROB->power; - } - - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power; - } - - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg; - rt_power = rt_power + ROB->rt_power; - } - - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; + if (core_params.predictionW > 0) { + BTB->displayData(indent + 4, plevel); + if (BPT->exist) { + BPT->displayData(indent + 4, plevel); } } -// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); -// cout<<"Scheduler power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<displayData(indent + 4, plevel); + ID_inst->displayData(indent + 4, plevel); + ID_operand->displayData(indent + 4, plevel); + ID_misc->displayData(indent + 4, plevel); } -void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; +void RENAMINGU::computeEnergy() { + if (!exist) return; + idcl->tdp_stats.reset(); + idcl->rtp_stats.reset(); + idcl->power_t.reset(); + idcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * + core_params.decodeW * core_stats.rename_reads; + } else if (core_params.issueW > 1) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; + } + idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * + idcl->power.readOp.dynamic; + idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * + core_params.num_hthreads; + idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * + core_params.num_hthreads; + idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * + idcl->power.readOp.dynamic; - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].ROB_size >0) - { - cout << indent_str<<"ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <tdp_stats.reset(); + fdcl->rtp_stats.reset(); + fdcl->power_t.reset(); + fdcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * + core_params.fp_issueW * core_stats.fp_rename_writes; + } else if (core_params.issueW > 1) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; + } + fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * + fdcl->power.readOp.dynamic; + fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * + core_params.num_hthreads; + fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * + core_params.num_hthreads; + fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * + fdcl->power.readOp.dynamic; + + if (iRRAT) { + iRRAT->tdp_stats.reset(); + iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->rtp_stats.reset(); + iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; + iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + iRRAT->power_t.reset(); + iRRAT->power_t.readOp.dynamic += + iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->rt_power.reset(); + iRRAT->rt_power.readOp.dynamic += + iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->power_t.readOp.leakage = + iRRAT->power.readOp.leakage * core_params.num_hthreads; + iRRAT->power_t.readOp.gate_leakage = + iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } + + if (ifreeL) { + ifreeL->tdp_stats.reset(); + ifreeL->tdp_stats.readAc.access = core_params.decodeW; + ifreeL->tdp_stats.writeAc.access = core_params.decodeW; + ifreeL->rtp_stats.reset(); + if (core_params.scheu_ty == PhysicalRegFile) { + ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; + ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; + } else if (core_params.scheu_ty == ReservationStation) { + ifreeL->rtp_stats.readAc.access = + core_stats.rename_reads + core_stats.fp_rename_reads; + ifreeL->rtp_stats.writeAc.access = + 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); } - else - { - if (coredynp.core_ty==OOO) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - if (XML->sys.core[ithCore].ROB_size >0) - { - cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl; - } - } - else if (coredynp.multithreaded) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - } + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->rt_power.reset(); + ifreeL->rt_power.readOp.dynamic += + ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->power_t.readOp.leakage = + ifreeL->power.readOp.leakage * core_params.num_hthreads; + ifreeL->power_t.readOp.gate_leakage = + ifreeL->power.readOp.gate_leakage * core_params.num_hthreads; + } + + if (fRRAT) { + fRRAT->tdp_stats.reset(); + fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->rtp_stats.reset(); + fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes; + fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + fRRAT->power_t.reset(); + fRRAT->power_t.readOp.dynamic += + fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->rt_power.reset(); + fRRAT->rt_power.readOp.dynamic += + fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->power_t.readOp.leakage = + fRRAT->power.readOp.leakage * core_params.num_hthreads; + fRRAT->power_t.readOp.gate_leakage = + fRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } + + if (ffreeL) { + ffreeL->tdp_stats.reset(); + ffreeL->tdp_stats.readAc.access = core_params.decodeW; + ffreeL->tdp_stats.writeAc.access = core_params.decodeW; + ffreeL->rtp_stats.reset(); + ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads; + ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes; + ffreeL->power_t.reset(); + ffreeL->power_t.readOp.dynamic += + ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->rt_power.reset(); + ffreeL->rt_power.readOp.dynamic += + ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->power_t.readOp.leakage = + ffreeL->power.readOp.leakage * core_params.num_hthreads; + ffreeL->power_t.readOp.gate_leakage = + ffreeL->power.readOp.gate_leakage * core_params.num_hthreads; + } + + if (iFRAT) { + tdp_stats.reset(); + if (core_params.rm_ty == RAMbased) { + iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports; + } else if ((core_params.rm_ty == CAMbased)) { + iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; } + rtp_stats.reset(); + iFRAT->rtp_stats.readAc.access = core_stats.rename_reads; + iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + iFRAT->rtp_stats.searchAc.access = + core_stats.committed_int_instructions; + } + iFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->tdp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->tdp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + iFRAT->power_t.readOp.leakage = + iFRAT->power.readOp.leakage * core_params.num_hthreads; + iFRAT->power_t.readOp.gate_leakage = + iFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + iFRAT->rt_power.reset(); + iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->rtp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->rtp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + } -} - -void LoadStoreU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports; - dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; - if (cache_p==Write_back) - { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; - } - - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; - } + if (fFRAT) { + tdp_stats.reset(); + fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports; + if ((core_params.rm_ty == CAMbased)) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports; + } else if (core_params.rm_ty == RAMbased) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports; + if (core_params.scheu_ty == ReservationStation) { + fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports; } - else - { - //init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; - - if (cache_p==Write_back) - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; - } - else - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - } - - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2; - LSQ->rtp_stats = LSQ->stats_t; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; - } - - } - - dcache.power_t.reset(); - LSQ->power_t.reset(); - dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic); - - if (cache_p==Write_back) - {//write miss will generate a write later - dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic; + } + rtp_stats.reset(); + fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads; + fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + fFRAT->rtp_stats.searchAc.access = + core_stats.committed_fp_instructions; + } + fFRAT->power_t.reset(); + fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->tdp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->tdp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; + fFRAT->power_t.readOp.leakage = + fFRAT->power.readOp.leakage * core_params.num_hthreads; + fFRAT->power_t.readOp.gate_leakage = + fFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + fFRAT->rt_power.reset(); + fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->rtp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->rtp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; } - dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic; - dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic; - if (cache_p==Write_back) - { - dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic - + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic; + output_data.reset(); + if (iFRAT) { + iFRAT->output_data.peak_dynamic_power = + iFRAT->power_t.readOp.dynamic * clockRate; + iFRAT->output_data.subthreshold_leakage_power = + iFRAT->power_t.readOp.leakage; + iFRAT->output_data.gate_leakage_power = + iFRAT->power_t.readOp.gate_leakage; + iFRAT->output_data.runtime_dynamic_energy = + iFRAT->rt_power.readOp.dynamic; + output_data += iFRAT->output_data; + } + if (fFRAT) { + fFRAT->output_data.peak_dynamic_power = + fFRAT->power_t.readOp.dynamic * clockRate; + fFRAT->output_data.subthreshold_leakage_power = + fFRAT->power_t.readOp.leakage; + fFRAT->output_data.gate_leakage_power = + fFRAT->power_t.readOp.gate_leakage; + fFRAT->output_data.runtime_dynamic_energy = + fFRAT->rt_power.readOp.dynamic; + output_data += fFRAT->output_data; + } + if (iRRAT) { + iRRAT->output_data.peak_dynamic_power = + iRRAT->power_t.readOp.dynamic * clockRate; + iRRAT->output_data.subthreshold_leakage_power = + iRRAT->power_t.readOp.leakage; + iRRAT->output_data.gate_leakage_power = + iRRAT->power_t.readOp.gate_leakage; + iRRAT->output_data.runtime_dynamic_energy = + iRRAT->rt_power.readOp.dynamic; + output_data += iRRAT->output_data; + } + if (fRRAT) { + fRRAT->output_data.peak_dynamic_power = + fRRAT->power_t.readOp.dynamic * clockRate; + fRRAT->output_data.subthreshold_leakage_power = + fRRAT->power_t.readOp.leakage; + fRRAT->output_data.gate_leakage_power = + fRRAT->power_t.readOp.gate_leakage; + fRRAT->output_data.runtime_dynamic_energy = + fRRAT->rt_power.readOp.dynamic; + output_data += fRRAT->output_data; + } + if (ifreeL) { + ifreeL->output_data.peak_dynamic_power = + ifreeL->power_t.readOp.dynamic * clockRate; + ifreeL->output_data.subthreshold_leakage_power = + ifreeL->power_t.readOp.leakage; + ifreeL->output_data.gate_leakage_power = + ifreeL->power_t.readOp.gate_leakage; + ifreeL->output_data.runtime_dynamic_energy = + ifreeL->rt_power.readOp.dynamic; + output_data += ifreeL->output_data; + } + if (ffreeL) { + ffreeL->output_data.peak_dynamic_power = + ffreeL->power_t.readOp.dynamic * clockRate; + ffreeL->output_data.subthreshold_leakage_power = + ffreeL->power_t.readOp.leakage; + ffreeL->output_data.gate_leakage_power = + ffreeL->power_t.readOp.gate_leakage; + ffreeL->output_data.runtime_dynamic_energy = + ffreeL->rt_power.readOp.dynamic; + output_data += ffreeL->output_data; + } + if (idcl) { + idcl->output_data.peak_dynamic_power = + idcl->power_t.readOp.dynamic * clockRate; + idcl->output_data.subthreshold_leakage_power = + idcl->power_t.readOp.leakage; + idcl->output_data.gate_leakage_power = + idcl->power_t.readOp.gate_leakage; + idcl->output_data.runtime_dynamic_energy = + idcl->rt_power.readOp.dynamic; + output_data += idcl->output_data; + } + if (fdcl) { + fdcl->output_data.peak_dynamic_power = + fdcl->power_t.readOp.dynamic * clockRate; + fdcl->output_data.subthreshold_leakage_power = + fdcl->power_t.readOp.leakage; + fdcl->output_data.gate_leakage_power = + fdcl->power_t.readOp.gate_leakage; + fdcl->output_data.runtime_dynamic_energy = + fdcl->rt_power.readOp.dynamic; + output_data += fdcl->output_data; + } + if (RAHT) { + output_data += RAHT->output_data; + } +} + +void RENAMINGU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + + if (core_params.core_ty == OOO) { + iFRAT->displayData(indent + 4, plevel); + fFRAT->displayData(indent + 4, plevel); + ifreeL->displayData(indent + 4, plevel); + + if (core_params.scheu_ty == PhysicalRegFile) { + iRRAT->displayData(indent + 4, plevel); + fRRAT->displayData(indent + 4, plevel); + ffreeL->displayData(indent + 4, plevel); + } + } + idcl->displayData(indent + 4, plevel); + fdcl->displayData(indent + 4, plevel); +} + +void SchedulerU::computeEnergy() { + if (!exist) return; + + double ROB_duty_cycle; + ROB_duty_cycle = 1; + + if (int_instruction_selection) { + int_instruction_selection->computeEnergy(); } - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { + if (fp_instruction_selection) { + fp_instruction_selection->computeEnergy(); + } + + if (int_inst_window) { + int_inst_window->tdp_stats.reset(); + int_inst_window->rtp_stats.reset(); + int_inst_window->power_t.reset(); + int_inst_window->rt_power.reset(); + if (core_params.core_ty == OOO) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.inst_window_reads; + int_inst_window->rtp_stats.writeAc.access = + core_stats.inst_window_writes; + int_inst_window->rtp_stats.searchAc.access = + core_stats.inst_window_wakeup_accesses; + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; + } else if (core_params.multithreaded) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.writeAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.searchAc.access = + 2 * (core_stats.int_instructions + core_stats.fp_instructions); + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; + } + } + + if (fp_inst_window) { + fp_inst_window->tdp_stats.reset(); + fp_inst_window->tdp_stats.readAc.access = + fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.writeAc.access = + fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.searchAc.access = + fp_inst_window->l_ip.num_search_ports * + core_params.num_fp_pipelines; + + fp_inst_window->rtp_stats.reset(); + fp_inst_window->rtp_stats.readAc.access = + core_stats.fp_inst_window_reads; + fp_inst_window->rtp_stats.writeAc.access = + core_stats.fp_inst_window_writes; + fp_inst_window->rtp_stats.searchAc.access = + core_stats.fp_inst_window_wakeup_accesses; + + fp_inst_window->power_t.reset(); + fp_inst_window->power_t.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->tdp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->tdp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->tdp_stats.writeAc.access; + + fp_inst_window->rt_power.reset(); + fp_inst_window->rt_power.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->rtp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->rtp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->rtp_stats.writeAc.access; + } + + if (ROB) { + ROB->tdp_stats.reset(); + ROB->tdp_stats.readAc.access = core_params.commitW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->tdp_stats.writeAc.access = core_params.issueW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->rtp_stats.reset(); + ROB->rtp_stats.readAc.access = core_stats.ROB_reads; + ROB->rtp_stats.writeAc.access = core_stats.ROB_writes; + ROB->power_t.reset(); + ROB->power_t.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->tdp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->tdp_stats.writeAc.access; + ROB->rt_power.reset(); + ROB->rt_power.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->rtp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->rtp_stats.writeAc.access; + } + + output_data.reset(); + if (int_inst_window) { + int_inst_window->output_data.subthreshold_leakage_power = + int_inst_window->power_t.readOp.leakage; + int_inst_window->output_data.gate_leakage_power = + int_inst_window->power_t.readOp.gate_leakage; + int_inst_window->output_data.peak_dynamic_power = + int_inst_window->power_t.readOp.dynamic * clockRate; + int_inst_window->output_data.runtime_dynamic_energy = + int_inst_window->rt_power.readOp.dynamic; + output_data += int_inst_window->output_data; + } + if (fp_inst_window) { + fp_inst_window->output_data.subthreshold_leakage_power = + fp_inst_window->power_t.readOp.leakage; + fp_inst_window->output_data.gate_leakage_power = + fp_inst_window->power_t.readOp.gate_leakage; + fp_inst_window->output_data.peak_dynamic_power = + fp_inst_window->power_t.readOp.dynamic * clockRate; + fp_inst_window->output_data.runtime_dynamic_energy = + fp_inst_window->rt_power.readOp.dynamic; + output_data += fp_inst_window->output_data; + } + if (ROB) { + ROB->output_data.peak_dynamic_power = + ROB->power_t.readOp.dynamic * clockRate; + ROB->output_data.runtime_dynamic_energy = + ROB->rt_power.readOp.dynamic; + output_data += ROB->output_data; + } + + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + output_data += int_instruction_selection->output_data; + } + if (fp_instruction_selection) { + output_data += fp_instruction_selection->output_data; + } + */ +} + +void SchedulerU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + + if (core_params.core_ty == OOO) { + int_inst_window->displayData(indent + 4, plevel); + fp_inst_window->displayData(indent + 4, plevel); + if (core_params.ROB_size > 0) { + ROB->displayData(indent + 4, plevel); + } + } else if (core_params.multithreaded) { + int_inst_window->displayData(indent + 4, plevel); + } + + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + int_instruction_selection->displayData(indent + 4, plevel); + } + if (fp_instruction_selection) { + fp_instruction_selection->displayData(indent + 4, plevel); + } + */ +} + +void LoadStoreU::computeEnergy() { + if (!exist) return; + + LSQ->tdp_stats.reset(); + LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->rtp_stats.reset(); + // Flush overhead conidered + LSQ->rtp_stats.readAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->power_t.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + LSQ->rt_power.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + + if (LoadQ) { + LoadQ->tdp_stats.reset(); + LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->rtp_stats.reset(); + LoadQ->rtp_stats.readAc.access = core_stats.load_instructions + + core_stats.store_instructions; + LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions + + core_stats.store_instructions; LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+ - LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ - - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - - } - else - { - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - + //every memory access invloves at least two operations on LoadQ + LoadQ->power_t.readOp.dynamic += + LoadQ->tdp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->tdp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; + LoadQ->rt_power.reset(); + //every memory access invloves at least two operations on LoadQ + LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->rtp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; } - if (is_tdp) - { -// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg + -// (dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) *pppm_lkg; - if (cache_p==Write_back) - { - dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg; - } + McPATComponent::computeEnergy(); - LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - power = power + dcache.power + LSQ->power; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - power = power + LoadQ->power; - } + output_data.reset(); + if (dcache) { + output_data += dcache->output_data; } - else - { -// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + -// dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power )*pppm_lkg; - - if (cache_p==Write_back) - { - dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg; - } - - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; - } + if (LSQ) { + LSQ->output_data.peak_dynamic_power = + LSQ->power_t.readOp.dynamic * clockRate; + LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic; + output_data += LSQ->output_data; + } + if (LoadQ) { + LoadQ->output_data.peak_dynamic_power = + LoadQ->power_t.readOp.dynamic * clockRate; + LoadQ->output_data.runtime_dynamic_energy = + LoadQ->rt_power.readOp.dynamic; + output_data += LoadQ->output_data; } } +void LoadStoreU::displayData(uint32_t indent, int plevel) { + if (!exist) return; -void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; + McPATComponent::displayData(indent, plevel); - - if (is_tdp) - { - cout << indent_str << "Data Cache:" << endl; - cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].load_buffer_size >0) - { - cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <displayData(indent + 4, plevel); + } + LSQ->displayData(indent + 4, plevel); } -void MemManU::computeEnergy(bool is_tdp) -{ +void MemManU::computeEnergy() { + if (!exist) return; - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; - } - else - { - //init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; - } + itlb->tdp_stats.reset(); + itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports; + itlb->tdp_stats.readAc.miss = 0; + itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access - + itlb->tdp_stats.readAc.miss; + itlb->rtp_stats.reset(); + itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses; + itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses; itlb->power_t.reset(); + //FA spent most power in tag, so use total access not hits + itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->tdp_stats.readAc.miss * + itlb->local_result.power.writeOp.dynamic; + itlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->rtp_stats.writeAc.access * + itlb->local_result.power.writeOp.dynamic; + + dtlb->tdp_stats.reset(); + dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + dtlb->tdp_stats.readAc.miss = 0; + dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access - + dtlb->tdp_stats.readAc.miss; + dtlb->rtp_stats.reset(); + dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses + + mem_man_stats.dtlb_write_misses; + dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses + + mem_man_stats.dtlb_read_misses; + dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic; + //FA spent most power in tag, so use total access not hits + dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->tdp_stats.readAc.miss * + dtlb->local_result.power.writeOp.dynamic; + dtlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->rtp_stats.writeAc.access * + dtlb->local_result.power.writeOp.dynamic; - if (is_tdp) - { - itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - power = power + itlb->power + dtlb->power; - } - else - { - itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; - } + output_data.reset(); + if (itlb) { + itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic * + clockRate; + itlb->output_data.runtime_dynamic_energy = + itlb->rt_power.readOp.dynamic; + output_data += itlb->output_data; + } + if (dtlb) { + dtlb->output_data.peak_dynamic_power = + dtlb->power_t.readOp.dynamic * clockRate; + dtlb->output_data.runtime_dynamic_energy = + dtlb->rt_power.readOp.dynamic; + output_data += dtlb->output_data; + } } -void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; +void MemManU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + McPATComponent::displayData(indent, plevel); - - - if (is_tdp) - { - cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <displayData(indent + 4, plevel); + dtlb->displayData(indent + 4, plevel); } -void RegFU::computeEnergy(bool is_tdp) -{ -/* - * Architecture RF and physical RF cannot be present at the same time. - * Therefore, the RF stats can only refer to either ARF or PRF; - * And the same stats can be used for both. - */ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - //Rule of Thumb: about 10% RF related instructions do not need to access ALUs - IRF->tdp_stats = IRF->stats_t; +void RegFU::computeEnergy() { + /* + * Architecture RF and physical RF cannot be present at the same time. + * Therefore, the RF stats can only refer to either ARF or PRF; + * And the same stats can be used for both. + */ + if (!exist) return; - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; + IRF->tdp_stats.reset(); + IRF->tdp_stats.readAc.access = + core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->tdp_stats.writeAc.access = + core_params.issueW * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->rtp_stats.reset(); + IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads; + IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes; + if (core_params.regWindowing) { + IRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + IRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + IRF->power_t.reset(); + IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access * + IRF->local_result.power.readOp.dynamic + + IRF->tdp_stats.writeAc.access * + IRF->local_result.power.writeOp.dynamic; + IRF->rt_power.reset(); + IRF->rt_power.readOp.dynamic += + IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + + IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; + + FRF->tdp_stats.reset(); + FRF->tdp_stats.readAc.access = + FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->tdp_stats.writeAc.access = + FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->rtp_stats.reset(); + FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads; + FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes; + if (core_params.regWindowing) { + FRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + FRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + FRF->power_t.reset(); + FRF->power_t.readOp.dynamic += + FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + FRF->rt_power.reset(); + FRF->rt_power.readOp.dynamic += + FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + + if (core_params.regWindowing) { + RFWIN->tdp_stats.reset(); + RFWIN->tdp_stats.readAc.access = 0; + RFWIN->tdp_stats.writeAc.access = 0; + RFWIN->rtp_stats.reset(); + RFWIN->rtp_stats.readAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->rtp_stats.writeAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->power_t.reset(); + RFWIN->power_t.readOp.dynamic += + RFWIN->tdp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->tdp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + RFWIN->rt_power.reset(); + RFWIN->rt_power.readOp.dynamic += + RFWIN->rtp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->rtp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + } + + output_data.reset(); + if (IRF) { + IRF->output_data.peak_dynamic_power = + IRF->power_t.readOp.dynamic * clockRate; + IRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + IRF->output_data.gate_leakage_power *= core_params.num_hthreads; + IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic; + output_data += IRF->output_data; + } + if (FRF) { + FRF->output_data.peak_dynamic_power = + FRF->power_t.readOp.dynamic * clockRate; + FRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + FRF->output_data.gate_leakage_power *= core_params.num_hthreads; + FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic; + output_data += FRF->output_data; + } + if (RFWIN) { + RFWIN->output_data.peak_dynamic_power = + RFWIN->power_t.readOp.dynamic * clockRate; + RFWIN->output_data.runtime_dynamic_energy = + RFWIN->rt_power.readOp.dynamic; + output_data += RFWIN->output_data; + } +} + +void RegFU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + + IRF->displayData(indent + 4, plevel); + FRF->displayData(indent + 4, plevel); + if (core_params.regWindowing) { + RFWIN->displayData(indent + 4, plevel); + } +} + +void EXECU::computeEnergy() { + if (!exist) return; + + int_bypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); + + intTagBypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); + + if (core_params.num_muls > 0) { + int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); + + intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); + } + + if (core_params.num_fpus > 0) { + fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); + + fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); + } + + McPATComponent::computeEnergy(); + + if (rfu) { + rfu->computeEnergy(); + output_data += rfu->output_data; + } + if (scheu) { + scheu->computeEnergy(); + output_data += scheu->output_data; + } + if (fp_u) { + fp_u->computeEnergy(); + output_data += fp_u->output_data; + } + if (exeu) { + exeu->computeEnergy(); + output_data += exeu->output_data; + } + if (mul) { + mul->computeEnergy(); + output_data += mul->output_data; + } +} + +void EXECU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + + rfu->displayData(indent + 4, plevel); + if (scheu) { + scheu->displayData(indent + 4, plevel); + } + exeu->displayData(indent + 4, plevel); + if (core_params.num_fpus > 0) { + fp_u->displayData(indent + 4, plevel); + } + if (core_params.num_muls > 0) { + mul->displayData(indent + 4, plevel); + } +} + +void Core::computeEnergy() { + ifu->computeEnergy(); + lsu->computeEnergy(); + mmu->computeEnergy(); + exu->computeEnergy(); + if (core_params.core_ty == OOO) { + rnu->computeEnergy(); + } + + output_data.reset(); + if (ifu) { + output_data += ifu->output_data; + } + if (lsu) { + output_data += lsu->output_data; + } + if (mmu) { + output_data += mmu->output_data; + } + if (exu) { + output_data += exu->output_data; + } + if (rnu) { + output_data += rnu->output_data; + } + if (corepipe) { + output_data += corepipe->output_data; + } + if (undiffCore) { + output_data += undiffCore->output_data; + } + if (l2cache) { + output_data += l2cache->output_data; + } +} + +InstFetchU ::~InstFetchU() { + + if (!exist) return; + if (IB) { + delete IB; + IB = NULL; + } + if (ID_inst) { + delete ID_inst; + ID_inst = NULL; + } + if (ID_operand) { + delete ID_operand; + ID_operand = NULL; + } + if (ID_misc) { + delete ID_misc; + ID_misc = NULL; + } + if (core_params.predictionW > 0) { + if (BTB) { + delete BTB; + BTB = NULL; } - } - else - { - //init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->rtp_stats = RFWIN->stats_t; - - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + - XML->sys.core[ithCore].function_calls*16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + - XML->sys.core[ithCore].function_calls*16; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + - XML->sys.core[ithCore].function_calls*16;; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+ - XML->sys.core[ithCore].function_calls*16;; - FRF->rtp_stats = FRF->stats_t; + if (BPT) { + delete BPT; + BPT = NULL; } } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic - +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic - +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic); - if (coredynp.regWindowing) - { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - power = power + (IRF->power + FRF->power); - if (coredynp.regWindowing) - { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - power = power + RFWIN->power; - } - } - else - { - IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - rt_power = rt_power + (IRF->power_t + FRF->power_t); - if (coredynp.regWindowing) - { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; - } - } + if (icache) { + delete icache; + } } +BranchPredictor ::~BranchPredictor() { -void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <power.reset(); -// rfu->rt_power.reset(); -// scheu->power.reset(); -// scheu->rt_power.reset(); -// exeu->power.reset(); -// exeu->rt_power.reset(); - - rfu->computeEnergy(is_tdp); - scheu->computeEnergy(is_tdp); - exeu->computeEnergy(is_tdp); - if (coredynp.num_fpus >0) - { - fp_u->computeEnergy(is_tdp); - } - if (coredynp.num_muls >0) - { - mul->computeEnergy(is_tdp); - } - - if (is_tdp) - { - set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t; - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - power = power + mul->power; - } - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ; - power = power + fp_u->power; - } - - power = power + rfu->power + exeu->power + bypass.power + scheu->power; - } - else - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t; - - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - rt_power = rt_power + mul->rt_power; - } - - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t; - rt_power = rt_power + fp_u->rt_power; - } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power; - } + if (!exist) return; + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (fFRAT) { + delete fFRAT; + fFRAT = NULL; + } + if (iRRAT) { + delete iRRAT; + iRRAT = NULL; + } + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (ifreeL) { + delete ifreeL; + ifreeL = NULL; + } + if (ffreeL) { + delete ffreeL; + ffreeL = NULL; + } + if (idcl) { + delete idcl; + idcl = NULL; + } + if (fdcl) { + delete fdcl; + fdcl = NULL; + } + if (RAHT) { + delete RAHT; + RAHT = NULL; + } } -void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - rfu->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - scheu->displayEnergy(indent+4,is_tdp); - } - exeu->displayEnergy(indent,is_tdp); - if (coredynp.num_fpus>0) - { - fp_u->displayEnergy(indent,is_tdp); - } - if (coredynp.num_muls >0) - { - mul->displayEnergy(indent,is_tdp); - } - cout << indent_str << "Results Broadcast Bus:" << endl; - cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->power = rnu->power + corepipe->power*pppm_t; - power = power + rnu->power; - } - } - - if (ifu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power*pppm_t; -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; -// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (lsu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - lsu->power = lsu->power + corepipe->power*pppm_t; -// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (exu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - exu->power = exu->power + corepipe->power*pppm_t; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (mmu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - mmu->power = mmu->power + corepipe->power*pppm_t; -// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - - power = power + undiffCore->power; - - if (XML->sys.Private_L2) - { - - l2cache->computeEnergy(is_tdp); - set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1); - //l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power*pppm_t; - } - } - else - { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t; - - rt_power = rt_power + rnu->rt_power; - } - } - else - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - } - - if (ifu->exist) - { - ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + ifu->rt_power ; - } - if (lsu->exist) - { - lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + lsu->rt_power; - } - if (exu->exist) - { - exu->rt_power = exu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + exu->rt_power; - } - if (mmu->exist) - { - mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + mmu->rt_power ; - } - - rt_power = rt_power + undiffCore->power; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - if (XML->sys.Private_L2) - { - - l2cache->computeEnergy(is_tdp); - //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - //l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; - } - } +MemManU ::~MemManU() { + if (!exist) return; + if (itlb) { + delete itlb; + itlb = NULL; + } + if (dtlb) { + delete dtlb; + dtlb = NULL; + } } -void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << "Core:" << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout<exist) - { - cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - ifu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - cout << indent_str<< "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - rnu->displayEnergy(indent+4,plevel,is_tdp); - } - } +RegFU ::~RegFU() { - } - if (lsu->exist) - { - cout << indent_str<< "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - lsu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (mmu->exist) - { - cout << indent_str<< "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - mmu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (exu->exist) - { - cout << indent_str<< "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - exu->displayEnergy(indent+4,plevel,is_tdp); - } - } -// if (plevel >2) -// { -// if (undiffCore->exist) -// { -// cout << indent_str << "Undifferentiated Core" << endl; -// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl; -// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl; -//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl; -// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl; -// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; -// cout <sys.Private_L2) - { - - l2cache->displayEnergy(4,is_tdp); - } - - } - else - { -// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl; - } -} -InstFetchU ::~InstFetchU(){ - - if (!exist) return; - if(IB) {delete IB; IB = 0;} - if(ID_inst) {delete ID_inst; ID_inst = 0;} - if(ID_operand) {delete ID_operand; ID_operand = 0;} - if(ID_misc) {delete ID_misc; ID_misc = 0;} - if (coredynp.predictionW>0) - { - if(BTB) {delete BTB; BTB = 0;} - if(BPT) {delete BPT; BPT = 0;} - } + if (!exist) return; + if (IRF) { + delete IRF; + IRF = NULL; + } + if (FRF) { + delete FRF; + FRF = NULL; + } + if (RFWIN) { + delete RFWIN; + RFWIN = NULL; + } } -BranchPredictor ::~BranchPredictor(){ +SchedulerU ::~SchedulerU() { - if (!exist) return; - if(globalBPT) {delete globalBPT; globalBPT = 0;} - if(localBPT) {delete localBPT; localBPT = 0;} - if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;} - if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;} - if(chooser) {delete chooser; chooser = 0;} - if(RAS) {delete RAS; RAS = 0;} + if (!exist) return; + if (int_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (fp_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (ROB) { + delete ROB; + ROB = NULL; + } + if (int_instruction_selection) { + delete int_instruction_selection; + int_instruction_selection = NULL; + } + if (fp_instruction_selection) { + delete fp_instruction_selection; + fp_instruction_selection = NULL; + } +} + +EXECU ::~EXECU() { + + if (!exist) return; + if (int_bypass) { + delete int_bypass; + int_bypass = NULL; + } + if (intTagBypass) { + delete intTagBypass; + intTagBypass = NULL; + } + if (int_mul_bypass) { + delete int_mul_bypass; + int_mul_bypass = NULL; + } + if (intTag_mul_Bypass) { + delete intTag_mul_Bypass; + intTag_mul_Bypass = NULL; + } + if (fp_bypass) { + delete fp_bypass; + fp_bypass = NULL; + } + if (fpTagBypass) { + delete fpTagBypass; + fpTagBypass = NULL; + } + if (fp_u) { + delete fp_u; + fp_u = NULL; + } + if (exeu) { + delete exeu; + exeu = NULL; + } + if (mul) { + delete mul; + mul = NULL; + } + if (rfu) { + delete rfu; + rfu = NULL; + } + if (scheu) { + delete scheu; + scheu = NULL; + } +} + +Core::~Core() { + + if (ifu) { + delete ifu; + ifu = NULL; + } + if (lsu) { + delete lsu; + lsu = NULL; + } + if (rnu) { + delete rnu; + rnu = NULL; + } + if (mmu) { + delete mmu; + mmu = NULL; + } + if (exu) { + delete exu; + exu = NULL; + } + if (corepipe) { + delete corepipe; + corepipe = NULL; + } + if (undiffCore) { + delete undiffCore; + undiffCore = NULL; + } + if (l2cache) { + delete l2cache; + l2cache = NULL; + } +} + +void Core::initialize_params() { + memset(&core_params, 0, sizeof(CoreParameters)); + core_params.peak_issueW = -1; + core_params.peak_commitW = -1; +} + +void Core::initialize_stats() { + memset(&core_stats, 0, sizeof(CoreStatistics)); + core_stats.IFU_duty_cycle = 1.0; + core_stats.ALU_duty_cycle = 1.0; + core_stats.FPU_duty_cycle = 1.0; + core_stats.MUL_duty_cycle = 1.0; + core_stats.ALU_cdb_duty_cycle = 1.0; + core_stats.FPU_cdb_duty_cycle = 1.0; + core_stats.MUL_cdb_duty_cycle = 1.0; + core_stats.pipeline_duty_cycle = 1.0; + core_stats.IFU_duty_cycle = 1.0; + core_stats.LSU_duty_cycle = 1.0; + core_stats.MemManU_D_duty_cycle = 1.0; + core_stats.MemManU_I_duty_cycle = 1.0; +} + +void Core::set_core_param() { + initialize_params(); + initialize_stats(); + + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_STR_IF("name", name); + ASSIGN_INT_IF("opt_local", core_params.opt_local); + ASSIGN_FP_IF("clock_rate", core_params.clockRate); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + ASSIGN_INT_IF("opcode_width", core_params.opcode_width); + ASSIGN_INT_IF("x86", core_params.x86); + ASSIGN_INT_IF("Embedded", core_params.Embedded); + ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type); + ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length); + ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads); + ASSIGN_INT_IF("fetch_width", core_params.fetchW); + ASSIGN_INT_IF("decode_width", core_params.decodeW); + ASSIGN_INT_IF("issue_width", core_params.issueW); + ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW); + ASSIGN_INT_IF("commit_width", core_params.commitW); + ASSIGN_INT_IF("prediction_width", core_params.predictionW); + ASSIGN_INT_IF("ALU_per_core", core_params.num_alus); + ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus); + ASSIGN_INT_IF("MUL_per_core", core_params.num_muls); + ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW); + ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty, + Scheduler_type); + ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type); + ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size); + ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size); + ASSIGN_INT_IF("ROB_size", core_params.ROB_size); + ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc); + ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks); + ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width); + ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc); + ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks); + ASSIGN_INT_IF("register_window_size", + core_params.register_window_size); + ASSIGN_INT_IF("register_window_throughput", + core_params.register_window_throughput); + ASSIGN_INT_IF("register_window_latency", + core_params.register_window_latency); + ASSIGN_INT_IF("register_window_assoc", + core_params.register_window_assoc); + ASSIGN_INT_IF("register_window_nbanks", + core_params.register_window_nbanks); + ASSIGN_INT_IF("register_window_tag_width", + core_params.register_window_tag_width); + ASSIGN_INT_IF("register_window_rw_ports", + core_params.register_window_rw_ports); + ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size); + ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc); + ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks); + ASSIGN_INT_IF("phy_Regs_IRF_tag_width", + core_params.phy_Regs_IRF_tag_width); + ASSIGN_INT_IF("phy_Regs_IRF_rd_ports", + core_params.phy_Regs_IRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_IRF_wr_ports", + core_params.phy_Regs_IRF_wr_ports); + ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size); + ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc); + ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks); + ASSIGN_INT_IF("phy_Regs_FRF_tag_width", + core_params.phy_Regs_FRF_tag_width); + ASSIGN_INT_IF("phy_Regs_FRF_rd_ports", + core_params.phy_Regs_FRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_FRF_wr_ports", + core_params.phy_Regs_FRF_wr_ports); + ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks); + ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports); + ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks); + ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports); + ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks); + ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports); + ASSIGN_INT_IF("memory_ports", core_params.memory_ports); + ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size); + ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc); + ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks); + ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size); + ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc); + ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks); + ASSIGN_INT_IF("instruction_window_size", + core_params.instruction_window_size); + ASSIGN_INT_IF("fp_instruction_window_size", + core_params.fp_instruction_window_size); + ASSIGN_INT_IF("instruction_buffer_size", + core_params.instruction_buffer_size); + ASSIGN_INT_IF("instruction_buffer_assoc", + core_params.instruction_buffer_assoc); + ASSIGN_INT_IF("instruction_buffer_nbanks", + core_params.instruction_buffer_nbanks); + ASSIGN_INT_IF("instruction_buffer_tag_width", + core_params.instruction_buffer_tag_width); + ASSIGN_INT_IF("number_instruction_fetch_ports", + core_params.number_instruction_fetch_ports); + ASSIGN_INT_IF("RAS_size", core_params.RAS_size); + ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt, + Wire_type); + ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type); + ASSIGN_INT_IF("execu_int_bypass_ports", + core_params.execu_int_bypass_ports); + ASSIGN_INT_IF("execu_mul_bypass_ports", + core_params.execu_mul_bypass_ports); + ASSIGN_INT_IF("execu_fp_bypass_ports", + core_params.execu_fp_bypass_ports); + ASSIGN_ENUM_IF("execu_bypass_wire_type", + core_params.execu_bypass_wire_type, Wire_type); + ASSIGN_FP_IF("execu_bypass_base_width", + core_params.execu_bypass_base_width); + ASSIGN_FP_IF("execu_bypass_base_height", + core_params.execu_bypass_base_height); + ASSIGN_INT_IF("execu_bypass_start_wiring_level", + core_params.execu_bypass_start_wiring_level); + ASSIGN_FP_IF("execu_bypass_route_over_perc", + core_params.execu_bypass_route_over_perc); + ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator); + ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages); + ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages); + ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines); + ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines); + ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint); + ASSIGN_INT_IF("perThreadState", core_params.perThreadState); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + + else { + warnUnrecognizedParam(node_name); } + } -RENAMINGU ::~RENAMINGU(){ + // Change from MHz to Hz + core_params.clockRate *= 1e6; + clockRate = core_params.clockRate; - if (!exist) return; - if(iFRAT ) {delete iFRAT; iFRAT = 0;} - if(fFRAT ) {delete fFRAT; fFRAT =0;} - if(iRRAT) {delete iRRAT; iRRAT = 0;} - if(iFRAT) {delete iFRAT; iFRAT = 0;} - if(ifreeL) {delete ifreeL;ifreeL= 0;} - if(ffreeL) {delete ffreeL;ffreeL= 0;} - if(idcl) {delete idcl; idcl = 0;} - if(fdcl) {delete fdcl; fdcl = 0;} - if(RAHT) {delete RAHT; RAHT = 0;} + core_params.peak_commitW = core_params.peak_issueW; + core_params.fp_decodeW = core_params.fp_issueW; + + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle); + ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle); + ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle); + ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle); + ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle); + ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle); + ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle); + ASSIGN_FP_IF("total_cycles", core_stats.total_cycles); + ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles); + ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles); + ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle); + ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle); + ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle); + ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle); + ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle); + ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses); + ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses); + ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses); + ASSIGN_FP_IF("function_calls", core_stats.function_calls); + ASSIGN_FP_IF("total_instructions", core_stats.total_instructions); + ASSIGN_FP_IF("int_instructions", core_stats.int_instructions); + ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions); + ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions); + ASSIGN_FP_IF("branch_mispredictions", + core_stats.branch_mispredictions); + ASSIGN_FP_IF("load_instructions", core_stats.load_instructions); + ASSIGN_FP_IF("store_instructions", core_stats.store_instructions); + ASSIGN_FP_IF("committed_instructions", + core_stats.committed_instructions); + ASSIGN_FP_IF("committed_int_instructions", + core_stats.committed_int_instructions); + ASSIGN_FP_IF("committed_fp_instructions", + core_stats.committed_fp_instructions); + ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads); + ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes); + ASSIGN_FP_IF("rename_reads", core_stats.rename_reads); + ASSIGN_FP_IF("rename_writes", core_stats.rename_writes); + ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads); + ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes); + ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads); + ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes); + ASSIGN_FP_IF("inst_window_wakeup_accesses", + core_stats.inst_window_wakeup_accesses); + ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads); + ASSIGN_FP_IF("fp_inst_window_writes", + core_stats.fp_inst_window_writes); + ASSIGN_FP_IF("fp_inst_window_wakeup_accesses", + core_stats.fp_inst_window_wakeup_accesses); + ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads); + ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads); + ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes); + ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes); + ASSIGN_FP_IF("context_switches", core_stats.context_switches); + ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses); + ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses); + ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses); + + else { + warnUnrecognizedStat(node_name); } + } -LoadStoreU ::~LoadStoreU(){ + // Initialize a few variables + core_params.multithreaded = core_params.num_hthreads > 1 ? true : false; + core_params.pc_width = virtual_address_width; + core_params.v_address_width = virtual_address_width; + core_params.p_address_width = physical_address_width; + core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32; + core_params.fp_data_width = core_params.int_data_width; + core_params.arch_ireg_width = + int(ceil(log2(core_params.archi_Regs_IRF_size))); + core_params.arch_freg_width + = int(ceil(log2(core_params.archi_Regs_FRF_size))); + core_params.num_IRF_entry = core_params.archi_Regs_IRF_size; + core_params.num_FRF_entry = core_params.archi_Regs_FRF_size; - if (!exist) return; - if(LSQ) {delete LSQ; LSQ = 0;} + if (core_params.instruction_length <= 0) { + errorNonPositiveParam("instruction_length"); + } + + if (core_params.num_hthreads <= 0) { + errorNonPositiveParam("number_hardware_threads"); + } + + if (core_params.opcode_width <= 0) { + errorNonPositiveParam("opcode_width"); + } + + if (core_params.instruction_buffer_size <= 0) { + errorNonPositiveParam("instruction_buffer_size"); + } + + if (core_params.number_instruction_fetch_ports <= 0) { + errorNonPositiveParam("number_instruction_fetch_ports"); + } + + if (core_params.peak_issueW <= 0) { + errorNonPositiveParam("peak_issue_width"); + } else { + assert(core_params.peak_commitW > 0); + } + + if (core_params.core_ty == OOO) { + if (core_params.scheu_ty == PhysicalRegFile) { + core_params.phy_ireg_width = + int(ceil(log2(core_params.phy_Regs_IRF_size))); + core_params.phy_freg_width = + int(ceil(log2(core_params.phy_Regs_FRF_size))); + core_params.num_ifreelist_entries = + core_params.num_IRF_entry = core_params.phy_Regs_IRF_size; + core_params.num_ffreelist_entries = + core_params.num_FRF_entry = core_params.phy_Regs_FRF_size; + } else if (core_params.scheu_ty == ReservationStation) { + core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size))); + core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size))); + core_params.num_ifreelist_entries = core_params.ROB_size; + core_params.num_ffreelist_entries = core_params.ROB_size; } + } -MemManU ::~MemManU(){ + core_params.regWindowing = + (core_params.register_window_size > 0 && + core_params.core_ty == Inorder) ? true : false; - if (!exist) return; - if(itlb) {delete itlb; itlb = 0;} - if(dtlb) {delete dtlb; dtlb = 0;} + if (core_params.regWindowing) { + if (core_params.register_window_throughput <= 0) { + errorNonPositiveParam("register_window_throughput"); + } else if (core_params.register_window_latency <= 0) { + errorNonPositiveParam("register_window_latency"); } + } -RegFU ::~RegFU(){ + set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads, + core_params.num_hthreads, 0); - if (!exist) return; - if(IRF) {delete IRF; IRF = 0;} - if(FRF) {delete FRF; FRF = 0;} - if(RFWIN) {delete RFWIN; RFWIN = 0;} - } + if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) { + cout << "Invalid Core Type" << endl; + exit(0); + } -SchedulerU ::~SchedulerU(){ + if (!((core_params.scheu_ty == PhysicalRegFile) || + (core_params.scheu_ty == ReservationStation))) { + cout << "Invalid OOO Scheduler Type" << endl; + exit(0); + } - if (!exist) return; - if(int_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(ROB) {delete ROB; ROB = 0;} - if(instruction_selection) {delete instruction_selection;instruction_selection = 0;} - } - -EXECU ::~EXECU(){ - - if (!exist) return; - if(int_bypass) {delete int_bypass; int_bypass = 0;} - if(intTagBypass) {delete intTagBypass; intTagBypass =0;} - if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;} - if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;} - if(fp_bypass) {delete fp_bypass;fp_bypass = 0;} - if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;} - if(fp_u) {delete fp_u;fp_u = 0;} - if(exeu) {delete exeu;exeu = 0;} - if(mul) {delete mul;mul = 0;} - if(rfu) {delete rfu;rfu = 0;} - if(scheu) {delete scheu; scheu = 0;} - } - -Core ::~Core(){ - - if(ifu) {delete ifu; ifu = 0;} - if(lsu) {delete lsu; lsu = 0;} - if(rnu) {delete rnu; rnu = 0;} - if(mmu) {delete mmu; mmu = 0;} - if(exu) {delete exu; exu = 0;} - if(corepipe) {delete corepipe; corepipe = 0;} - if(undiffCore) {delete undiffCore;undiffCore = 0;} - if(l2cache) {delete l2cache;l2cache = 0;} - } - -void Core::set_core_param() -{ - coredynp.opt_local = XML->sys.core[ithCore].opt_local; - coredynp.x86 = XML->sys.core[ithCore].x86; - coredynp.Embedded = XML->sys.Embedded; - coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; - coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; - coredynp.fetchW = XML->sys.core[ithCore].fetch_width; - coredynp.decodeW = XML->sys.core[ithCore].decode_width; - coredynp.issueW = XML->sys.core[ithCore].issue_width; - coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; - coredynp.commitW = XML->sys.core[ithCore].commit_width; - coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; - coredynp.predictionW = XML->sys.core[ithCore].prediction_width; - coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; - coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; - coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; - coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; - coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; - - - coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; - coredynp.multithreaded = coredynp.num_hthreads>1? true:false; - coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; - coredynp.pc_width = XML->sys.virtual_address_width; - - coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; - coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; - coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; - coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; - coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; - coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; - coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32; - coredynp.fp_data_width = coredynp.int_data_width; - coredynp.v_address_width = XML->sys.virtual_address_width; - coredynp.p_address_width = XML->sys.physical_address_width; - - coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; - coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); - coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); - coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; - coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; - coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; - coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; - coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; - coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; - - //Max power duty cycle for peak power estimation -// if (coredynp.core_ty==OOO) -// { -// coredynp.IFU_duty_cycle = 1; -// coredynp.LSU_duty_cycle = 1; -// coredynp.MemManU_I_duty_cycle =1; -// coredynp.MemManU_D_duty_cycle =1; -// coredynp.ALU_duty_cycle =1; -// coredynp.MUL_duty_cycle =1; -// coredynp.FPU_duty_cycle =1; -// coredynp.ALU_cdb_duty_cycle =1; -// coredynp.MUL_cdb_duty_cycle =1; -// coredynp.FPU_cdb_duty_cycle =1; -// } -// else -// { - coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; - coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; - coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; - coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; - coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; - coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; - coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; - coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; - coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; - coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; - coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; -// } - - - if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder))) - { - cout<<"Invalid Core Type"<sys.core[ithCore].phy_Regs_IRF_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); - coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size; - coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size; - } - else if (coredynp.scheu_ty==ReservationStation) - {//ROB serves as Phy RF in RS based OOO - coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; - coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; - - } + if (!((core_params.rm_ty == RAMbased) || + (core_params.rm_ty == CAMbased))) { + cout << "Invalid OOO Renaming Type" << endl; + exit(0); + } -} - coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference. - coredynp.perThreadState = 8; - coredynp.instruction_length = 32; - coredynp.clockRate = XML->sys.core[ithCore].clock_rate; - coredynp.clockRate *= 1e6; - coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false; - coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0); } diff --git a/ext/mcpat/core.h b/ext/mcpat/core.h index 8ef3babdd..206fe6d58 100644 --- a/ext/mcpat/core.h +++ b/ext/mcpat/core.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,230 +34,305 @@ #ifndef CORE_H_ #define CORE_H_ -#include "XML_Parse.h" #include "array.h" #include "basic_components.h" +#include "cacheunit.h" #include "interconnect.h" #include "logic.h" #include "parameter.h" -#include "sharedcache.h" -class BranchPredictor :public Component { - public: +// Macros used in the various core-related classes +#define NUM_SOURCE_OPERANDS 2 +#define NUM_INT_INST_SOURCE_OPERANDS 2 - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * globalBPT; - ArrayST * localBPT; - ArrayST * L1_localBPT; - ArrayST * L2_localBPT; - ArrayST * chooser; - ArrayST * RAS; - bool exist; - - BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~BranchPredictor(); +class BranchPredictorParameters { +public: + int assoc; + int nbanks; + int local_l1_predictor_size; + int local_l2_predictor_size; + int local_predictor_entries; + int global_predictor_bits; + int global_predictor_entries; + int chooser_predictor_bits; + int chooser_predictor_entries; }; +class BranchPredictor : public McPATComponent { +public: + ArrayST* globalBPT; + ArrayST* localBPT; + ArrayST* L1_localBPT; + ArrayST* L2_localBPT; + ArrayST* chooser; + ArrayST* RAS; -class InstFetchU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Cache_policy cache_p; - InstCache icache; - ArrayST * IB; - ArrayST * BTB; - BranchPredictor * BPT; - inst_decoder * ID_inst; - inst_decoder * ID_operand; - inst_decoder * ID_misc; - bool exist; - - InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~InstFetchU(); -}; - - -class SchedulerU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double Iw_height, fp_Iw_height,ROB_height; - ArrayST * int_inst_window; - ArrayST * fp_inst_window; - ArrayST * ROB; - selection_logic * instruction_selection; + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + BranchPredictorParameters branch_pred_params; + double scktRatio, chip_PR_overhead, macro_PR_overhead; bool exist; - SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~SchedulerU(); + BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exsit = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~BranchPredictor(); }; -class RENAMINGU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - CoreDynParam coredynp; - ArrayST * iFRAT; - ArrayST * fFRAT; - ArrayST * iRRAT; - ArrayST * fRRAT; - ArrayST * ifreeL; - ArrayST * ffreeL; - dep_resource_conflict_check * idcl; - dep_resource_conflict_check * fdcl; - ArrayST * RAHT;//register alias history table Used to store GC - bool exist; - - - RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RENAMINGU(); +class InstFetchParameters { +public: + int btb_size; + int btb_block_size; + int btb_assoc; + int btb_num_banks; + int btb_latency; + int btb_throughput; + int btb_rw_ports; }; -class LoadStoreU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - enum Cache_policy cache_p; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - DataCache dcache; - ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - ArrayST * LoadQ; - bool exist; - - LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~LoadStoreU(); +class InstFetchStatistics { +public: + double btb_read_accesses; + double btb_write_accesses; }; -class MemManU :public Component { - public: +class InstFetchU : public McPATComponent { +public: + CacheUnit* icache; + ArrayST* IB; + ArrayST* BTB; + BranchPredictor* BPT; + InstructionDecoder* ID_inst; + InstructionDecoder* ID_operand; + InstructionDecoder* ID_misc; - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * itlb; - ArrayST * dtlb; - bool exist; + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + InstFetchParameters inst_fetch_params; + InstFetchStatistics inst_fetch_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + enum Cache_policy cache_p; + bool exist; - MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MemManU(); -}; - -class RegFU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double int_regfile_height, fp_regfile_height; - ArrayST * IRF; - ArrayST * FRF; - ArrayST * RFWIN; - bool exist; - - RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RegFU(); -}; - -class EXECU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - CoreDynParam coredynp; - RegFU * rfu; - SchedulerU * scheu; - FunctionalUnit * fp_u; - FunctionalUnit * exeu; - FunctionalUnit * mul; - interconnect * int_bypass; - interconnect * intTagBypass; - interconnect * int_mul_bypass; - interconnect * intTag_mul_Bypass; - interconnect * fp_bypass; - interconnect * fpTagBypass; - - Component bypass; - bool exist; - - EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~EXECU(); + InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exsit = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~InstFetchU(); }; -class Core :public Component { - public: +class SchedulerU : public McPATComponent { +public: + static int ROB_STATUS_BITS; - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - InstFetchU * ifu; - LoadStoreU * lsu; - MemManU * mmu; - EXECU * exu; - RENAMINGU * rnu; - Pipeline * corepipe; - UndiffCore * undiffCore; - SharedCache * l2cache; - CoreDynParam coredynp; - //full_decoder inst_decoder; - //clock_network clockNetwork; - Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_); - void set_core_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~Core(); + ArrayST* int_inst_window; + ArrayST* fp_inst_window; + ArrayST* ROB; + selection_logic* int_instruction_selection; + selection_logic* fp_instruction_selection; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double Iw_height, fp_Iw_height, ROB_height; + bool exist; + + SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~SchedulerU(); +}; + +class RENAMINGU : public McPATComponent { +public: + ArrayST* iFRAT; + ArrayST* fFRAT; + ArrayST* iRRAT; + ArrayST* fRRAT; + ArrayST* ifreeL; + ArrayST* ffreeL; + dep_resource_conflict_check* idcl; + dep_resource_conflict_check* fdcl; + ArrayST* RAHT; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + bool exist; + + RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~RENAMINGU(); +}; + +class LoadStoreU : public McPATComponent { +public: + CacheUnit* dcache; + ArrayST* LSQ; + ArrayST* LoadQ; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + enum Cache_policy cache_p; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; + bool exist; + + LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~LoadStoreU(); +}; + +class MemoryManagementParams { +public: + int itlb_number_entries; + double itlb_latency; + double itlb_throughput; + int itlb_assoc; + int itlb_nbanks; + int dtlb_number_entries; + double dtlb_latency; + double dtlb_throughput; + int dtlb_assoc; + int dtlb_nbanks; +}; + +class MemoryManagementStats { +public: + double itlb_total_accesses; + double itlb_total_misses; + double itlb_conflicts; + double dtlb_read_accesses; + double dtlb_read_misses; + double dtlb_write_accesses; + double dtlb_write_misses; + double dtlb_conflicts; +}; + +class MemManU : public McPATComponent { +public: + ArrayST* itlb; + ArrayST* dtlb; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + MemoryManagementParams mem_man_params; + MemoryManagementStats mem_man_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + bool exist; + + MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_ = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~MemManU(); +}; + +class RegFU : public McPATComponent { +public: + static int RFWIN_ACCESS_MULTIPLIER; + + ArrayST* IRF; + ArrayST* FRF; + ArrayST* RFWIN; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double int_regfile_height, fp_regfile_height; + bool exist; + + RegFU(XMLNode* _xml_data, + InputParameter* interface_ip_, const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~RegFU(); +}; + +class EXECU : public McPATComponent { +public: + RegFU* rfu; + SchedulerU* scheu; + FunctionalUnit* fp_u; + FunctionalUnit* exeu; + FunctionalUnit* mul; + Interconnect* int_bypass; + Interconnect* intTagBypass; + Interconnect* int_mul_bypass; + Interconnect* intTag_mul_Bypass; + Interconnect* fp_bypass; + Interconnect* fpTagBypass; + + InputParameter interface_ip; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; + CoreParameters core_params; + CoreStatistics core_stats; + bool exist; + + EXECU(XMLNode* _xml_data, InputParameter* interface_ip_, + double lsq_height_, const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~EXECU(); +}; + + +class Core : public McPATComponent { +public: + InstFetchU* ifu; + LoadStoreU* lsu; + MemManU* mmu; + EXECU* exu; + RENAMINGU* rnu; + Pipeline* corepipe; + UndiffCore* undiffCore; + CacheUnit* l2cache; + + int ithCore; + InputParameter interface_ip; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + CoreParameters core_params; + CoreStatistics core_stats; + + // TODO: Migrate component ID handling into the XML data to remove this + // ithCore variable + Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_); + void initialize_params(); + void initialize_stats(); + void set_core_param(); + void computeEnergy(); + ~Core(); }; #endif /* CORE_H_ */ diff --git a/ext/mcpat/interconnect.cc b/ext/mcpat/interconnect.cc index ba502b6a8..98fbc3e54 100644 --- a/ext/mcpat/interconnect.cc +++ b/ext/mcpat/interconnect.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,130 +34,190 @@ #include #include -#include "globalvar.h" +#include "basic_components.h" #include "interconnect.h" #include "wire.h" -interconnect::interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, - int data_w, double len,const InputParameter *configure_interface, - int start_wiring_level_, - bool pipelinable_ , - double route_over_perc_ , - bool opt_local_, - enum Core_type core_ty_, - enum Wire_type wire_model, - double width_s, double space_s, - TechnologyParameter::DeviceType *dt -) - :name(name_), - device_ty(device_ty_), - in_rise_time(0), - out_rise_time(0), - base_width(base_w), - base_height(base_h), - data_width(data_w), - wt(wire_model), - width_scaling(width_s), - space_scaling(space_s), - start_wiring_level(start_wiring_level_), - length(len), - //interconnect_latency(1e-12), - //interconnect_throughput(1e-12), - opt_local(opt_local_), - core_ty(core_ty_), - pipelinable(pipelinable_), - route_over_perc(route_over_perc_), - deviceType(dt) -{ +double Interconnect::width_scaling_threshold = 3.0; - wt = Global; - l_ip=*configure_interface; - local_result = init_interface(&l_ip); +Interconnect::Interconnect(XMLNode* _xml_data, string name_, + enum Device_ty device_ty_, double base_w, + double base_h, int data_w, + double len, + const InputParameter *configure_interface, + int start_wiring_level_, double _clockRate, + bool pipelinable_, double route_over_perc_, + bool opt_local_, enum Core_type core_ty_, + enum Wire_type wire_model, + double width_s, double space_s, + TechnologyParameter::DeviceType *dt) + : McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0), + out_rise_time(0), base_width(base_w), base_height(base_h), + data_width(data_w), wt(wire_model), width_scaling(width_s), + space_scaling(space_s), start_wiring_level(start_wiring_level_), + length(len), opt_local(opt_local_), core_ty(core_ty_), + pipelinable(pipelinable_), route_over_perc(route_over_perc_), + deviceType(dt) { + name = name_; + clockRate = _clockRate; + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); - - max_unpipelined_link_delay = 0; //TODO - min_w_nmos = g_tp.min_w_nmos_; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + max_unpipelined_link_delay = 0; + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - latency = l_ip.latency; - throughput = l_ip.throughput; - latency_overflow=false; - throughput_overflow=false; + latency = l_ip.latency; + throughput = l_ip.throughput; + latency_overflow = false; + throughput_overflow = false; - /* - * TODO: Add wiring option from semi-global to global automatically - * And directly jump to global if semi-global cannot satisfy timing - * Fat wires only available for global wires, thus - * if signal wiring layer starts from semi-global, - * the next layer up will be global, i.e., semi-global does - * not have fat wires. - */ - if (pipelinable == false) - //Non-pipelinable wires, such as bypass logic, care latency - { - compute(); - if (opt_for_clk && opt_local) - { - while (delay > latency && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > latency) - { - latency_overflow=true; - } - } - } - else //Pipelinable wires, such as bus, does not care latency but throughput - { - /* - * TODO: Add pipe regs power, area, and timing; - * Pipelinable wires optimize latency first. - */ - compute(); - if (opt_for_clk && opt_local) - { - while (delay > throughput && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > throughput) - // insert pipeline stages - { - num_pipe_stages = (int)ceil(delay/throughput); - assert(num_pipe_stages>0); - delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay; - } - } - } + if (pipelinable == false) { + //Non-pipelinable wires, such as bypass logic, care latency + calcWireData(); + if (opt_for_clk && opt_local) { + while (delay > latency && + width_scaling < width_scaling_threshold) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + calcWireData(); + } + if (delay > latency) { + latency_overflow = true; + } + } + } else { + //Pipelinable wires, such as bus, does not care latency but throughput + calcWireData(); + if (opt_for_clk && opt_local) { + while (delay > throughput && + width_scaling < width_scaling_threshold) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + calcWireData(); + } + if (delay > throughput) { + // insert pipeline stages + num_pipe_stages = (int)ceil(delay / throughput); + assert(num_pipe_stages > 0); + delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay; + } + } + } + + power_bit = power; + power.readOp.dynamic *= data_width; + power.readOp.leakage *= data_width; + power.readOp.gate_leakage *= data_width; + area.set_area(area.get_area()*data_width); + no_device_under_wire_area.h *= data_width; + + if (latency_overflow == true) { + cout << "Warning: " << name + << " wire structure cannot satisfy latency constraint." << endl; + } + + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + //Only global wires has the option to choose whether routing over or not + if (pipelinable) + area.set_area(area.get_area() * route_over_perc + + no_device_under_wire_area.get_area() * + (1 - route_over_perc)); + + Wire wreset(); +} + + + +void +Interconnect::calcWireData() { + + Wire *wtemp1 = 0; + wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); + delay = wtemp1->delay; + power.readOp.dynamic = wtemp1->power.readOp.dynamic; + power.readOp.leakage = wtemp1->power.readOp.leakage; + power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage; + + area.set_area(wtemp1->area.get_area()); + no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); + no_device_under_wire_area.w = length; + + if (wtemp1) + delete wtemp1; + +} + +void +Interconnect::computeEnergy() { + double pppm_t[4] = {1, 1, 1, 1}; + + // Compute TDP + power_t.reset(); + set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle, + int_params.active_ports, int_params.active_ports, + int_params.active_ports * int_stats.duty_cycle); + power_t = power * pppm_t; + + rt_power.reset(); + set_pppm(pppm_t, int_stats.accesses, int_params.active_ports, + int_params.active_ports, int_stats.accesses); + rt_power = power * pppm_t; + + output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power_t.readOp.leakage; + output_data.gate_leakage_power = power_t.readOp.gate_leakage; + output_data.runtime_dynamic_energy = rt_power.readOp.dynamic; +} + +void +Interconnect::computeArea() { + output_data.area = area.get_area() / 1e6; +} + +void +Interconnect::set_params_stats(double active_ports, + double duty_cycle, double accesses) { + int_params.active_ports = active_ports; + int_stats.duty_cycle = duty_cycle; + int_stats.accesses = accesses; +} + +void Interconnect::leakage_feedback(double temperature) { + l_ip.temp = (unsigned int)round(temperature/10.0)*10; + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy + + calcWireData(); power_bit = power; power.readOp.dynamic *= data_width; power.readOp.leakage *= data_width; power.readOp.gate_leakage *= data_width; - area.set_area(area.get_area()*data_width); - no_device_under_wire_area.h *= data_width; - - if (latency_overflow==true) - cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl; - assert(power.readOp.dynamic > 0); assert(power.readOp.leakage > 0); assert(power.readOp.gate_leakage > 0); - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty,core_ty); double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; @@ -164,59 +225,6 @@ interconnect::interconnect( power.searchOp.dynamic *= sckRation; power.readOp.longer_channel_leakage = - power.readOp.leakage*long_channel_device_reduction; - - if (pipelinable)//Only global wires has the option to choose whether routing over or not - area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc)); - - Wire wreset(); -} - - - -void -interconnect::compute() -{ - - Wire *wtemp1 = 0; - wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); - delay = wtemp1->delay; - power.readOp.dynamic = wtemp1->power.readOp.dynamic; - power.readOp.leakage = wtemp1->power.readOp.leakage; - power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage; - - area.set_area(wtemp1->area.get_area()); - no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); - no_device_under_wire_area.w = length; - - if (wtemp1) - delete wtemp1; - -} - -void interconnect::leakage_feedback(double temperature) -{ - l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy - - compute(); - - power_bit = power; - power.readOp.dynamic *= data_width; - power.readOp.leakage *= data_width; - power.readOp.gate_leakage *= data_width; - - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; + power.readOp.leakage*long_channel_device_reduction; } diff --git a/ext/mcpat/interconnect.h b/ext/mcpat/interconnect.h index 4cf42dafd..2ae39c5a2 100644 --- a/ext/mcpat/interconnect.h +++ b/ext/mcpat/interconnect.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,46 +43,31 @@ #include "subarray.h" #include "wire.h" -// leakge power includes entire htree in a bank (when uca_tree == false) -// leakge power includes only part to one bank when uca_tree == true +class InterconnectParameters { +public: + double active_ports; +}; -class interconnect : public Component -{ - public: - interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, int data_w, double len, - const InputParameter *configure_interface, int start_wiring_level_, - bool pipelinable_ = false, - double route_over_perc_ =0.5, - bool opt_local_=true, - enum Core_type core_ty_=Inorder, - enum Wire_type wire_model=Global, - double width_s=1.0, double space_s=1.0, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ); +class InterconnectStatistics { +public: + double duty_cycle; + double accesses; +}; - ~interconnect() {}; +class Interconnect : public McPATComponent { +public: + static double width_scaling_threshold; - void compute(); - string name; - enum Device_ty device_ty; + enum Device_ty device_ty; double in_rise_time, out_rise_time; - InputParameter l_ip; - uca_org_t local_result; + InputParameter l_ip; + uca_org_t local_result; Area no_device_under_wire_area; - void set_in_rise_time(double rt) - { - in_rise_time = rt; - } - - void leakage_feedback(double temperature); double max_unpipelined_link_delay; powerDef power_bit; double wire_bw; - double init_wire_bw; // bus width at root + double init_wire_bw; double base_width; double base_height; int data_width; @@ -92,19 +78,39 @@ class interconnect : public Component double min_w_nmos; double min_w_pmos; double latency, throughput; - bool latency_overflow; - bool throughput_overflow; - double interconnect_latency; - double interconnect_throughput; + bool latency_overflow; + bool throughput_overflow; + double interconnect_latency; + double interconnect_throughput; bool opt_local; enum Core_type core_ty; bool pipelinable; double route_over_perc; - int num_pipe_stages; - - private: - TechnologyParameter::DeviceType *deviceType; + int num_pipe_stages; + TechnologyParameter::DeviceType* deviceType; + InterconnectParameters int_params; + InterconnectStatistics int_stats; + Interconnect(XMLNode* _xml_data, string name_, + enum Device_ty device_ty_, double base_w, + double base_h, int data_w, double len, + const InputParameter *configure_interface, + int start_wiring_level_, + double _clockRate = 0.0f, + bool pipelinable_ = false, double route_over_perc_ = 0.5, + bool opt_local_ = true, enum Core_type core_ty_ = Inorder, + enum Wire_type wire_model = Global, double width_s = 1.0, + double space_s = 1.0, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); +private: + void calcWireData(); +public: + void computeArea(); + void computeEnergy(); + void set_params_stats(double active_ports, + double duty_cycle, double accesses); + void leakage_feedback(double temperature); + ~Interconnect() {}; }; #endif diff --git a/ext/mcpat/iocontrollers.cc b/ext/mcpat/iocontrollers.cc index 70b0f2dcb..4a175d841 100644 --- a/ext/mcpat/iocontrollers.cc +++ b/ext/mcpat/iocontrollers.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #include @@ -34,14 +35,12 @@ #include #include -#include "XML_Parse.h" #include "basic_circuit.h" -#include "basic_components.h" +#include "common.h" #include "const.h" #include "io.h" #include "iocontrollers.h" #include "logic.h" -#include "parameter.h" /* SUN Niagara 2 I/O power analysis: @@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F * */ -NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); +NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_) + : McPATComponent(_xml_data, interface_ip_) { + name = "NIU"; + set_niu_param(); +} - double frontend_area, phy_area, mac_area, SerDer_area; - double frontend_dyn, mac_dyn, SerDer_dyn; - double frontend_gates, mac_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; +void NIUController::computeArea() { + double mac_area; + double frontend_area; + double SerDer_area; - set_niu_param(); + if (niup.type == 0) { //high performance NIU + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate using 65nm. + mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2, ISSCC + //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" + //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface + //With Robust VCO Tuning Technique" Frontend is PCS + frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * + (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate hard IP @65nm. + //SerDer is very hard to scale + SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um / + 0.065);//* (interface_ip.F_sz_um/0.065); + } else { + //Low power implementations are mostly from Cadence ChipEstimator; + //Ignore the multiple IP effect + // ---When there are multiple IP (same kind or not) selected, Cadence + //ChipEstimator results are not a simple summation of all IPs. + //Ignore this effect + mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer + SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um/0.065); + //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet + //Transceiver and XAUI Interface With Robust VCO Tuning Technique" + //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can + //scale perfectly with the technology + } - if (niup.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm. - mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" - //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS - frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - //Cadence ChipEstimate using 65nm - mac_gates = 111700; - frontend_gates = 320000; - SerDer_gates = 200000; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - - } - else - {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect - // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not - // a simple summation of all IPs. Ignore this effect - mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer - SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" - //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm - SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - mac_gates = 111700; - frontend_gates = 52000; - SerDer_gates = 199260; - - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - - power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; - power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W + //total area + output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6; } -void NIUController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { +void NIUController::computeEnergy() { + double mac_dyn; + double frontend_dyn; + double SerDer_dyn; + double frontend_gates; + double mac_gates; + double SerDer_gates; + double NMOS_sizing; + double PMOS_sizing; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + if (niup.type == 0) { //high performance NIU + //Power + //Cadence ChipEstimate using 65nm (mac, front_end are all energy. + //E=P*T = P/F = 1.37/1Ghz = 1.37e-9); + //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm + mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; + //Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; - power = power_t; - power.readOp.dynamic *= niup.duty_cycle; + //Cadence ChipEstimate using 65nm + mac_gates = 111700; + frontend_gates = 320000; + SerDer_gates = 200000; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + //Power + //Cadence ChipEstimate using 65nm (mac, front_end are all energy. + ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9); + //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm + mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd + / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; + //Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm + SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + mac_gates = 111700; + frontend_gates = 52000; + SerDer_gates = 199260; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= niup.perc_load; + + //covert to energy per clock cycle of whole NIU + SerDer_dyn /= niup.clockRate; + + power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; + power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + + // Output power + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle; + output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load; +} + +void NIUController::set_niu_param() { + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("niu_clockRate", niup.clockRate); + ASSIGN_INT_IF("num_units", niup.num_units); + ASSIGN_INT_IF("type", niup.type); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + niup.clockRate *= 1e6; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", nius.duty_cycle); + ASSIGN_FP_IF("perc_load", nius.perc_load); + + else { + warnUnrecognizedStat(node_name); + } } } -void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "NIU:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl; - cout<sys.niu.clockrate; - niup.clockRate *= 1e6; - niup.num_units = XML->sys.niu.number_units; - niup.duty_cycle = XML->sys.niu.duty_cycle; - niup.perc_load = XML->sys.niu.total_load_perc; - niup.type = XML->sys.niu.type; -// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); +void PCIeController::computeArea() { + double ctrl_area; + double SerDer_area; + + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ + + if (pciep.type == 0) { //high performance PCIe + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate @ 65nm. + ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate hard IP @65nm. + //SerDer is very hard to scale + SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um / + 0.065);//* (interface_ip.F_sz_um/0.065); + } else { + ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2, and + //Cadence ChipEstimate @ 65nm. + SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + } + + // Total area + output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 * + pciep.num_channels) * 1e6; } -PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; +void PCIeController::computeEnergy() { + double ctrl_dyn; + double SerDer_dyn; + double ctrl_gates; + double SerDer_gates = 0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing; + double PMOS_sizing; - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ - set_pcie_param(); - if (pciep.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm. - ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle + if (pciep.type == 0) { //high performance PCIe + //Power + //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer + ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + //PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; - //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; - //Cadence ChipEstimate using 65nm - ctrl_gates = 900000/8*pciep.num_channels; - // frontend_gates = 120000/8; - // SerDer_gates = 200000/8; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - } - else - { - ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle + //Cadence ChipEstimate using 65nm + ctrl_gates = 900000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + // SerDer_gates = 200000/8; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + //Power + //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer + ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + //PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; - //Cadence ChipEstimate using 65nm - ctrl_gates = 200000/8*pciep.num_channels; - // frontend_gates = 120000/8; - SerDer_gates = 200000/8*pciep.num_channels; - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6); - power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels; - power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } - -void PCIeController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - - - power = power_t; - power.readOp.dynamic *= pciep.duty_cycle; + //Cadence ChipEstimate using 65nm + ctrl_gates = 200000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + SerDer_gates = 200000 / 8 * pciep.num_channels; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= pciep.perc_load; + + //covert to energy per clock cycle + SerDer_dyn /= pciep.clockRate; + + power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * + pciep.num_channels; + power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = (ctrl_gates + + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + + // Output power + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle; + output_data.runtime_dynamic_energy = + power.readOp.dynamic * pcies.perc_load; +} + +void PCIeController::set_pcie_param() { + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate); + ASSIGN_INT_IF("num_units", pciep.num_units); + ASSIGN_INT_IF("num_channels", pciep.num_channels); + ASSIGN_INT_IF("type", pciep.type); + ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + pciep.clockRate *= 1e6; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle); + ASSIGN_FP_IF("perc_load", pcies.perc_load); + + else { + warnUnrecognizedStat(node_name); + } } } -void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "PCIe:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout<sys.pcie.clockrate; - pciep.clockRate *= 1e6; - pciep.num_units = XML->sys.pcie.number_units; - pciep.num_channels = XML->sys.pcie.num_channels; - pciep.duty_cycle = XML->sys.pcie.duty_cycle; - pciep.perc_load = XML->sys.pcie.total_load_perc; - pciep.type = XML->sys.pcie.type; - pciep.withPHY = XML->sys.pcie.withPHY; -// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); +void FlashController::computeArea() { + double ctrl_area; + double SerDer_area; -} - -FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - - set_fc_param(); - if (fcp.type == 0) //high performance NIU - { - cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<sys.longer_channel_device; +void FlashController::computeEnergy() { + double ctrl_dyn; + double SerDer_dyn; + double ctrl_gates; + double SerDer_gates; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing; + double PMOS_sizing; - if (is_tdp) - { - cout << "Flash Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl; - cout<sys.flashc.mc_clock; -// fcp.clockRate *= 1e6; - fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; - fcp.num_channels = ceil(fcp.peakDataTransferRate/200); - fcp.num_mcs = XML->sys.flashc.number_mcs; - fcp.duty_cycle = XML->sys.flashc.duty_cycle; - fcp.perc_load = XML->sys.flashc.total_load_perc; - fcp.type = XML->sys.flashc.type; - fcp.withPHY = XML->sys.flashc.withPHY; -// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("num_channels", fcp.num_channels); + ASSIGN_INT_IF("type", fcp.type); + ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool); + + else { + warnUnrecognizedParam(node_name); + } + } + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle); + ASSIGN_FP_IF("perc_load", fcs.perc_load); + + else { + warnUnrecognizedStat(node_name); + } + } } diff --git a/ext/mcpat/iocontrollers.h b/ext/mcpat/iocontrollers.h index 818580abb..39cfb0eb3 100644 --- a/ext/mcpat/iocontrollers.h +++ b/ext/mcpat/iocontrollers.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,63 +26,52 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef IOCONTROLLERS_H_ #define IOCONTROLLERS_H_ - -#endif /* IOCONTROLLERS_H_ */ - -#include "XML_Parse.h" -#include "parameter.h" -//#include "io.h" -#include "array.h" -//#include "Undifferentiated_Core_Area.h" #include +#include "array.h" #include "basic_components.h" +#include "parameter.h" -class NIUController : public Component { +class NIUController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - NIUParam niup; - powerDef power_t; - uca_org_t local_result; - NIUController(ParseXML *XML_interface,InputParameter* interface_ip_); + NIUParameters niup; + NIUStatistics nius; + + NIUController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_niu_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~NIUController(){}; }; -class PCIeController : public Component { +class PCIeController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - PCIeParam pciep; - powerDef power_t; - uca_org_t local_result; - PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_); + PCIeParameters pciep; + PCIeStatistics pcies; + + PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_pcie_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~PCIeController(){}; }; -class FlashController : public Component { +class FlashController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - MCParam fcp; - powerDef power_t; - uca_org_t local_result; - FlashController(ParseXML *XML_interface,InputParameter* interface_ip_); + MCParameters fcp; + MCStatistics fcs; + + FlashController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_fc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~FlashController(){}; }; +#endif /* IOCONTROLLERS_H_ */ diff --git a/ext/mcpat/logic.cc b/ext/mcpat/logic.cc index 11519d863..43823e77b 100644 --- a/ext/mcpat/logic.cc +++ b/ext/mcpat/logic.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,416 +26,500 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ +#include "common.h" #include "logic.h" - //selection_logic -selection_logic::selection_logic( - bool _is_default, - int win_entries_, - int issue_width_, - const InputParameter *configure_interface, - enum Device_ty device_ty_, - enum Core_type core_ty_) - //const ParseXML *_XML_interface) - :is_default(_is_default), - win_entries(win_entries_), - issue_width(issue_width_), - device_ty(device_ty_), - core_ty(core_ty_) - { - //uca_org_t result2; - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - //init_tech_params(l_ip.F_sz_um, false); - //win_entries=numIBEntries;//IQentries; - //issue_width=issueWidth; - selection_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - } - -void selection_logic::selection_power() -{//based on cost effective superscalar processor TR pp27-31 - double Ctotal, Cor, Cpencode; - int num_arbiter; - double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; - - //TODO: the 0.8um process data is used. - WSelORn = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - WSelORprequ = 50 * l_ip.F_sz_um;//this was 40 micron for the 0.8 micron process - WSelPn = 12.5 * l_ip.F_sz_um;//this was 10mcron for the 0.8 micron process - WSelPp = 18.75 * l_ip.F_sz_um;//this was 15 micron for the 0.8 micron process - WSelEnn = 6.25 * l_ip.F_sz_um;//this was 5 micron for the 0.8 micron process - WSelEnp = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - - - Ctotal=0; - num_arbiter=1; - while(win_entries > 4) - { - win_entries = (int)ceil((double)win_entries / 4.0); - num_arbiter += win_entries; - } - //the 4-input OR logic to generate anyreq - Cor = 4 * drain_C_(WSelORn,NCH,1,1, g_tp.cell_h_def) + drain_C_(WSelORprequ,PCH,1,1, g_tp.cell_h_def); - power.readOp.gate_leakage = cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor)*g_tp.peri_global.Vdd; - - //The total capacity of the 4-bit priority encoder - Cpencode = drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,1, 1, g_tp.cell_h_def) + - 2*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,2, 1, g_tp.cell_h_def) + - 3*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,3, 1, g_tp.cell_h_def) + - 4*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,4, 1, g_tp.cell_h_def) +//precompute priority logic - 2*4*gate_C(WSelEnn+WSelEnp,20.0)+ - 4*drain_C_(WSelEnn,NCH,1, 1, g_tp.cell_h_def) + 2*4*drain_C_(WSelEnp,PCH,1, 1, g_tp.cell_h_def)+//enable logic - (2*4+2*3+2*2+2)*gate_C(WSelPn+WSelPp,10.0);//requests signal - - Ctotal += issue_width * num_arbiter*(Cor+Cpencode); - - power.readOp.dynamic = Ctotal*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*2;//2 means the abitration signal need to travel round trip - power.readOp.leakage = issue_width * num_arbiter * - (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals - )*g_tp.peri_global.Vdd; - power.readOp.gate_leakage = issue_width * num_arbiter * - (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals - )*g_tp.peri_global.Vdd; +selection_logic::selection_logic(XMLNode* _xml_data, bool _is_default, + int _win_entries, int issue_width_, + const InputParameter *configure_interface, + string _name, double _accesses, + double clockRate_, enum Device_ty device_ty_, + enum Core_type core_ty_) + : McPATComponent(_xml_data), is_default(_is_default), + win_entries(_win_entries), + issue_width(issue_width_), + accesses(_accesses), + device_ty(device_ty_), + core_ty(core_ty_) { + clockRate = clockRate_; + name = _name; + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); } +void selection_logic::computeArea() { + output_data.area = local_result.area; +} + +void selection_logic::computeEnergy() { + //based on cost effective superscalar processor TR pp27-31 + double Ctotal, Cor, Cpencode; + int num_arbiter; + double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; + + //the 0.8um process data is used. + //this was 10 micron for the 0.8 micron process + WSelORn = 12.5 * l_ip.F_sz_um; + //this was 40 micron for the 0.8 micron process + WSelORprequ = 50 * l_ip.F_sz_um; + //this was 10mcron for the 0.8 micron process + WSelPn = 12.5 * l_ip.F_sz_um; + //this was 15 micron for the 0.8 micron process + WSelPp = 18.75 * l_ip.F_sz_um; + //this was 5 micron for the 0.8 micron process + WSelEnn = 6.25 * l_ip.F_sz_um; + //this was 10 micron for the 0.8 micron process + WSelEnp = 12.5 * l_ip.F_sz_um; + + Ctotal = 0; + num_arbiter = 1; + while (win_entries > 4) { + win_entries = (int)ceil((double)win_entries / 4.0); + num_arbiter += win_entries; + } + //the 4-input OR logic to generate anyreq + Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def); + power.readOp.gate_leakage = + cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd; + + //The total capacity of the 4-bit priority encoder + Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) + + 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) + + 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic + 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) + + 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) + + 2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic + (2 * 4 + 2 * 3 + 2 * 2 + 2) * + gate_C(WSelPn + WSelPp, 10.0);//requests signal + + Ctotal += issue_width * num_arbiter * (Cor + Cpencode); + + //2 means the abitration signal need to travel round trip + power.readOp.dynamic = + Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2; + power.readOp.leakage = issue_width * num_arbiter * + (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p + + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p + + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p + + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic + + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals + ) * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = issue_width * num_arbiter * + (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p + + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p + + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p + + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic + + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals + ) * g_tp.peri_global.Vdd; + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses; +} dep_resource_conflict_check::dep_resource_conflict_check( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - int compare_bits_, - bool _is_default) - : l_ip(*configure_interface), - coredynp(dyn_p_), - compare_bits(compare_bits_), - is_default(_is_default) -{ - Wcompn = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvp = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvn = 100 * l_ip.F_sz_um;//this was 80.0 mcron for the 0.8 micron process - Wcomppreequ = 50 * l_ip.F_sz_um;//this was 40.0 micron for the 0.8 micron process - WNORn = 6.75 * l_ip.F_sz_um;//this was 5.4 micron for the 0.8 micron process - WNORp = 38.125 * l_ip.F_sz_um;//this was 30.5 micron for the 0.8 micron process + XMLNode* _xml_data, const string _name, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, int compare_bits_, + double clockRate_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) { - local_result = init_interface(&l_ip); + name = _name; + clockRate = clockRate_; + //this was 20.0 micron for the 0.8 micron process + Wcompn = 25 * l_ip.F_sz_um; + //this was 20.0 micron for the 0.8 micron process + Wevalinvp = 25 * l_ip.F_sz_um; + //this was 80.0 mcron for the 0.8 micron process + Wevalinvn = 100 * l_ip.F_sz_um; + //this was 40.0 micron for the 0.8 micron process + Wcomppreequ = 50 * l_ip.F_sz_um; + //this was 5.4 micron for the 0.8 micron process + WNORn = 6.75 * l_ip.F_sz_um; + //this was 30.5 micron for the 0.8 micron process + WNORp = 38.125 * l_ip.F_sz_um; - if (coredynp.core_ty==Inorder) - compare_bits += 16 + 8 + 8;//TODO: opcode bits + log(shared resources) + REG TAG BITS-->opcode comparator - else - compare_bits += 16 + 8 + 8; + // To make CACTI happy. + l_ip.cache_sz = MIN_BUFFER_SIZE; + local_result = init_interface(&l_ip, name); - conflict_check_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; + if (coredynp.core_ty == Inorder) + //TODO: opcode bits + log(shared resources) + REG TAG BITS --> + //opcode comparator + compare_bits += 16 + 8 + 8; + else + compare_bits += 16 + 8 + 8; + + conflict_check_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; } -void dep_resource_conflict_check::conflict_check_power() -{ - double Ctotal; - int num_comparators; - num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - //When decode-width ==1, no dcl logic +void dep_resource_conflict_check::conflict_check_power() { + double Ctotal; + int num_comparators; + //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for + //dest to dest comparision. + num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); - Ctotal = num_comparators * compare_cap(); - //printf("%i,%s\n",XML_interface->sys.core[0].predictor.predictor_entries,XML_interface->sys.core[0].predictor.prediction_scheme); + Ctotal = num_comparators * compare_cap(); - power.readOp.dynamic=Ctotal*/*CLOCKRATE*/g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/*AF*/; - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); + power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd /*AF*/; + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = num_comparators * compare_bits * 2 * + cmos_Ig_leakage(Wcompn, 0, 2, nmos); } /* estimate comparator power consumption (this comparator is similar to the tag-match structure in a CAM */ -double dep_resource_conflict_check::compare_cap() -{ - double c1, c2; +double dep_resource_conflict_check::compare_cap() { + double c1, c2; - WNORp = WNORp * compare_bits/2.0;//resize the big NOR gate at the DCL according to fan in. - /* bottom part of comparator */ - c2 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def))+ - drain_C_(Wevalinvp,PCH,1,1, g_tp.cell_h_def) + drain_C_(Wevalinvn,NCH,1,1, g_tp.cell_h_def); + //resize the big NOR gate at the DCL according to fan in. + WNORp = WNORp * compare_bits / 2.0; + /* bottom part of comparator */ + c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) + + drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def); - /* top part of comparator */ - c1 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def)+ - drain_C_(Wcomppreequ,NCH,1,1, g_tp.cell_h_def)) + gate_C(WNORn + WNORp,10.0) + - drain_C_(WNORp,NCH,2,1, g_tp.cell_h_def) + compare_bits*drain_C_(WNORn,NCH,2,1, g_tp.cell_h_def); - return(c1 + c2); + /* top part of comparator */ + c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) + + drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(WNORn + WNORp, 10.0) + + drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits * + drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def); + return(c1 + c2); } void dep_resource_conflict_check::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy // This is part of conflict_check_power() - int num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); + // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest + // to dest comparison. + int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + power.readOp.gate_leakage = num_comparators * compare_bits * 2 * + cmos_Ig_leakage(Wcompn, 0, 2, nmos); } -//TODO: add inverter and transmission gate base DFF. DFFCell::DFFCell( - bool _is_dram, - double _WdecNANDn, - double _WdecNANDp, - double _cell_load, - const InputParameter *configure_interface) -:is_dram(_is_dram), -cell_load(_cell_load), -WdecNANDn(_WdecNANDn), -WdecNANDp(_WdecNANDp) -{//this model is based on the NAND2 based DFF. - l_ip=*configure_interface; -// area.set_area(730*l_ip.F_sz_um*l_ip.F_sz_um); - area.set_area(5*compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, g_tp.cell_h_def) - + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, g_tp.cell_h_def)); + bool _is_dram, + double _WdecNANDn, + double _WdecNANDp, + double _cell_load, + const InputParameter *configure_interface) + : is_dram(_is_dram), + cell_load(_cell_load), + WdecNANDn(_WdecNANDn), + WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF. + l_ip = *configure_interface; + area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, + g_tp.cell_h_def) + + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, + g_tp.cell_h_def)); } -double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) -{ - double Ctotal = 0; - //printf("WdecNANDn = %E\n", WdecNANDn); +double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) { + double Ctotal = 0; - /* part 1: drain cap of NAND gate */ - Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); + /* part 1: drain cap of NAND gate */ + Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); - /* part 2: gate cap of NAND gates */ - Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); + /* part 2: gate cap of NAND gates */ + Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); - return Ctotal; + return Ctotal; } -void DFFCell::compute_DFF_cell() -{ - double c1, c2, c3, c4, c5, c6; - /* node 5 and node 6 are identical to node 1 in capacitance */ - c1 = c5 = c6 = fpfp_node_cap(2, 1); - c2 = fpfp_node_cap(2, 3); - c3 = fpfp_node_cap(3, 2); - c4 = fpfp_node_cap(2, 2); +void DFFCell::compute_DFF_cell() { + double c1, c2, c3, c4, c5, c6; + /* node 5 and node 6 are identical to node 1 in capacitance */ + c1 = c5 = c6 = fpfp_node_cap(2, 1); + c2 = fpfp_node_cap(2, 3); + c3 = fpfp_node_cap(3, 2); + c4 = fpfp_node_cap(2, 2); - //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 - clock_cap= 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); - e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2*cell_load)*0.5*g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; + //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 + clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); + e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) * + 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ - e_keep_1.readOp.dynamic += c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_keep_0.readOp.dynamic += c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_clock.readOp.dynamic += clock_cap* g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; + /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ + e_keep_1.readOp.dynamic += + c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; + e_keep_0.readOp.dynamic += + c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; + e_clock.readOp.dynamic += + clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - /* static power */ - e_switch.readOp.leakage += (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - e_switch.readOp.gate_leakage += (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - //printf("leakage =%E\n",cmos_Ileak(1, is_dram) ); + /* static power */ + e_switch.readOp.leakage += + (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5//5 NAND2 and 1 NAND3 in a DFF + + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; + e_switch.readOp.gate_leakage += + (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5//5 NAND2 and 1 NAND3 in a DFF + + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; } -Pipeline::Pipeline( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - enum Device_ty device_ty_, - bool _is_core_pipeline, - bool _is_default) -: l_ip(*configure_interface), - coredynp(dyn_p_), - device_ty(device_ty_), - is_core_pipeline(_is_core_pipeline), - is_default(_is_default), - num_piperegs(0.0) +Pipeline::Pipeline(XMLNode* _xml_data, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, + enum Device_ty device_ty_, + bool _is_core_pipeline, + bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + coredynp(dyn_p_), device_ty(device_ty_), + is_core_pipeline(_is_core_pipeline), is_default(_is_default), + num_piperegs(0.0) { + name = "Pipeline?"; - { - local_result = init_interface(&l_ip); + local_result = init_interface(&l_ip, name); + if (!coredynp.Embedded) { + process_ind = true; + } else { + process_ind = false; + } + //this was 20 micron for the 0.8 micron process + WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ; + //this was 30 micron for the 0.8 micron process + WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ * + pmos_to_nmos_sz_ratio(); + load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); + compute(); + +} + +void Pipeline::compute() { + compute_stage_vector(); + DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); + pipe_reg.compute_DFF_cell(); + + double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; + //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider + //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. + double pipe_reg_power = num_piperegs * + (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic + + pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg; + double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; + double pipe_reg_gate_leakage = num_piperegs * + pipe_reg.e_switch.readOp.gate_leakage; + power.readOp.dynamic += pipe_reg_power; + power.readOp.leakage += pipe_reg_leakage; + power.readOp.gate_leakage += pipe_reg_gate_leakage; + area.set_area(num_piperegs * pipe_reg.area.get_area()); + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, coredynp.core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + double macro_layout_overhead = g_tp.macro_layout_overhead; if (!coredynp.Embedded) - process_ind = true; - else - process_ind = false; - WNANDn = (process_ind)? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;//this was 20 micron for the 0.8 micron process - WNANDp = (process_ind)? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_*pmos_to_nmos_sz_ratio();//this was 30 micron for the 0.8 micron process - load_per_pipeline_stage = 2*gate_C(WNANDn + WNANDp, 0, false); - compute(); + area.set_area(area.get_area() * macro_layout_overhead); + output_data.area = area.get_area() / 1e6; + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles; } -void Pipeline::compute() -{ - compute_stage_vector(); - DFFCell pipe_reg(false, WNANDn,WNANDp, load_per_pipeline_stage, &l_ip); - pipe_reg.compute_DFF_cell(); +void Pipeline::compute_stage_vector() { + double num_stages, tot_stage_vector, per_stage_vector; + int opcode_length = coredynp.x86 ? + coredynp.micro_opcode_length : coredynp.opcode_width; - double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; - //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider - //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. - double pipe_reg_power = num_piperegs * (pipe_reg.e_switch.readOp.dynamic+pipe_reg.e_keep_0.readOp.dynamic+pipe_reg.e_keep_1.readOp.dynamic)/3+clock_power_pipereg; - double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; - double pipe_reg_gate_leakage = num_piperegs * pipe_reg.e_switch.readOp.gate_leakage; - power.readOp.dynamic +=pipe_reg_power; - power.readOp.leakage +=pipe_reg_leakage; - power.readOp.gate_leakage +=pipe_reg_gate_leakage; - area.set_area(num_piperegs * pipe_reg.area.get_area()); + if (!is_core_pipeline) { + //The number of pipeline stages are calculated based on the achievable + //throughput and required throughput + num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector; + } else { + if (coredynp.core_ty == Inorder) { + /* assume 6 pipe stages and try to estimate bits per pipe stage */ + /* pipe stage 0/IF */ + num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads; + /* pipe stage IF/ID */ + num_piperegs += coredynp.fetchW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads; + /* pipe stage IF/ThreadSEL */ + if (coredynp.multithreaded) { + num_piperegs += coredynp.num_hthreads * + coredynp.perThreadState; //8 bit thread states + } + /* pipe stage ID/EXE */ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width + + pow(2.0, opcode_length) + 2 * coredynp.int_data_width) * + coredynp.num_hthreads; + /* pipe stage EXE/MEM */ + num_piperegs += coredynp.issueW * + (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 * + 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); + /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 * + 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); + num_stages = 6; + } else { + /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ + /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ - double long_channel_device_reduction = longer_channel_device_reduction(device_ty, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; + /* pipe stage 0/1F*/ + num_piperegs += + coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC + /* pipe stage IF/ID */ + num_piperegs += coredynp.fetchW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads;//PC is used to feed branch predictor in ID + /* pipe stage 1D/Renaming*/ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads;//PC is for branch exe in later stage. + /* pipe stage Renaming/wire_drive */ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width); + /* pipe stage Renaming/IssueQ */ + //3*coredynp.phy_ireg_width means 2 sources and 1 dest + num_piperegs += coredynp.issueW * + (coredynp.instruction_length + coredynp.pc_width + 3 * + coredynp.phy_ireg_width) * coredynp.num_hthreads; + /* pipe stage IssueQ/Dispatch */ + num_piperegs += coredynp.issueW * + (coredynp.instruction_length + 3 * coredynp.phy_ireg_width); + /* pipe stage Dispatch/EXE */ - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - double macro_layout_overhead = g_tp.macro_layout_overhead; - if (!coredynp.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); -} - -void Pipeline::compute_stage_vector() -{ - double num_stages, tot_stage_vector, per_stage_vector; - int opcode_length = coredynp.x86? coredynp.micro_opcode_length:coredynp.opcode_length; - //Hthread = thread_clock_gated? 1:num_thread; - - if (!is_core_pipeline) - { - num_piperegs=l_ip.pipeline_stages*l_ip.per_stage_vector;//The number of pipeline stages are calculated based on the achievable throughput and required throughput - } - else - { - if (coredynp.core_ty==Inorder) - { - /* assume 6 pipe stages and try to estimate bits per pipe stage */ - /* pipe stage 0/IF */ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads; - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads; - /* pipe stage IF/ThreadSEL */ - if (coredynp.multithreaded) num_piperegs += coredynp.num_hthreads*coredynp.perThreadState; //8 bit thread states - /* pipe stage ID/EXE */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width + pow(2.0,opcode_length)+ 2*coredynp.int_data_width)*coredynp.num_hthreads; - /* pipe stage EXE/MEM */ - num_piperegs += coredynp.issueW*(3 * coredynp.arch_ireg_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); - /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); -// /* pipe stage 5/6 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 6/7 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 7/8 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/**2*powers (2,reg_length)*/); -// /* assume 50% extra in control signals (rule of thumb) */ - num_stages=6; - - } - else - { - /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ - /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ - - /* pipe stage 0/1F*/ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads ;//PC and Next PC - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is used to feed branch predictor in ID - /* pipe stage 1D/Renaming*/ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is for branch exe in later stage. - /* pipe stage Renaming/wire_drive */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width); - /* pipe stage Renaming/IssueQ */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + coredynp.pc_width + 3*coredynp.phy_ireg_width)*coredynp.num_hthreads;//3*coredynp.phy_ireg_width means 2 sources and 1 dest - /* pipe stage IssueQ/Dispatch */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + 3 * coredynp.phy_ireg_width); - /* pipe stage Dispatch/EXE */ - - num_piperegs += coredynp.issueW*(3 * coredynp.phy_ireg_width + coredynp.pc_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* pipe stage EXE/MEM, data need to be read/write, address*/ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.v_address_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);//memory Opcode still need to be passed - /* pipe stage MEM/WB; result data, writeback regs */ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.phy_ireg_width /* powers (2,opcode_length) + (2,opcode_length)+2*powers (2,reg_length)*/); - /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ - num_piperegs += coredynp.commitW*(coredynp.int_data_width + coredynp.v_address_width + coredynp.phy_ireg_width/*+ powers (2,opcode_length)*2*powers (2,reg_length)*/)*coredynp.num_hthreads; -// if (multithreaded) -// { -// -// } - num_stages=12; + num_piperegs += coredynp.issueW * + (3 * coredynp.phy_ireg_width + coredynp.pc_width + + pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); + /* 2^opcode_length means the total decoded signal for the opcode*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + /*+2*powers (2,reg_length)*/); + /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + /*+2*powers (2,reg_length)*/); + /* pipe stage EXE/MEM, data need to be read/write, address*/ + //memory Opcode still need to be passed + num_piperegs += coredynp.issueW * + (coredynp.int_data_width + coredynp.v_address_width + + pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); + /* pipe stage MEM/WB; result data, writeback regs */ + num_piperegs += coredynp.issueW * + (coredynp.int_data_width + coredynp.phy_ireg_width + /* powers (2,opcode_length) + + (2,opcode_length)+2*powers (2,reg_length)*/); + /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ + num_piperegs += coredynp.commitW * + (coredynp.int_data_width + coredynp.v_address_width + + coredynp.phy_ireg_width + /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) * + coredynp.num_hthreads; + num_stages = 12; } /* assume 50% extra in control registers and interrupt registers (rule of thumb) */ num_piperegs = num_piperegs * 1.5; - tot_stage_vector=num_piperegs; - per_stage_vector=tot_stage_vector/num_stages; + tot_stage_vector = num_piperegs; + per_stage_vector = tot_stage_vector / num_stages; - if (coredynp.core_ty==Inorder) - { - if (coredynp.pipeline_stages>6) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; + if (coredynp.core_ty == Inorder) { + if (coredynp.pipeline_stages > 6) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; + } else { //OOO + if (coredynp.pipeline_stages > 12) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; } - else//OOO - { - if (coredynp.pipeline_stages>12) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; - } - } + } } -FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - fu_type(fu_type_) -{ - double area_t;//, leakage, gate_leakage; +FunctionalUnit::FunctionalUnit(XMLNode* _xml_data, + InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + enum FU_type fu_type_) + : McPATComponent(_xml_data), + interface_ip(*interface_ip_), core_params(_core_params), + core_stats(_core_stats), fu_type(fu_type_) { + double area_t; + double leakage; + double gate_leakage; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; + clockRate = core_params.clockRate; - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); - if (XML->sys.Embedded) - { - if (fu_type == FPU) - { - num_fu=coredynp.num_fpus; + uca_org_t result2; + // Temp name for the following function call + name = "Functional Unit"; + + result2 = init_interface(&interface_ip, name); + + if (core_params.Embedded) { + if (fu_type == FPU) { + num_fu=core_params.num_fpus; //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60% @@ -449,10 +534,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ) //FPU power from Sandia's processor sizing tech report FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; + } else if (fu_type == ALU) { + num_fu=core_params.num_alus; area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; @@ -462,10 +545,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; + } else if (fu_type == MUL) { + num_fu=core_params.num_muls; area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; @@ -474,197 +555,117 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam base_energy = 0; per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { + } else { cout<<"Unknown Functional Unit Type"<F_sz_nm * g_ip->F_sz_nm / 90.0 / + 90.0);//this is um^2 + if (g_ip->F_sz_nm > 90) + area_t = 8.47 * 1e6 * + g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + //W The base energy of ALU average numbers from Intel 4G and + //773Mhz (Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) + FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data + } else if (fu_type == ALU) { + name = "Integer ALU(s)"; + num_fu = core_params.num_alus; + //this is um^2 ALU + MUl + area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff; + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; + //W The base energy of ALU average numbers from Intel 4G and 773Mhz + //(Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) + FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU + } else if (fu_type == MUL) { + name = "Multiply/Divide Unit(s)"; + num_fu = core_params.num_muls; + //this is um^2 ALU + MUl + area_t = 280 * 260 * 2 * 3 * + g_tp.scaling_factor.logic_scaling_co_eff; + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; + //W The base energy of ALU average numbers from Intel 4G and 773Mhz + //(Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch + FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data + } else { + cout << "Unknown Functional Unit Type" << endl; + exit(0); } - else - { - if (fu_type == FPU) - { - num_fu=coredynp.num_fpus; - //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - area_t = 8.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 - if (g_ip->F_sz_nm>90) - area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles. - base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) - FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; - area_t = 280*260*2*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) - FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU + } - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; - area_t = 280*260*2*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch - FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { - cout<<"Unknown Functional Unit Type"<sys.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); + power.readOp.leakage = leakage * num_fu; + power.readOp.gate_leakage = gate_leakage * num_fu; + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, core_params.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + double macro_layout_overhead = g_tp.macro_layout_overhead; + area.set_area(area.get_area()*macro_layout_overhead); } -void FunctionalUnit::computeEnergy(bool is_tdp) -{ - double pppm_t[4] = {1,1,1,1}; - double FU_duty_cycle; - if (is_tdp) - { +void FunctionalUnit::computeEnergy() { + double pppm_t[4] = {1, 1, 1, 1}; + double FU_duty_cycle; + double sckRation = g_tp.sckt_co_eff; + // TDP power calculation + //2 means two source operands needs to be passed for each int instruction. + set_pppm(pppm_t, 2, 2, 2, 2); + tdp_stats.readAc.access = num_fu; + if (fu_type == FPU) { + FU_duty_cycle = core_stats.FPU_duty_cycle; + } else if (fu_type == ALU) { + FU_duty_cycle = core_stats.ALU_duty_cycle; + } else if (fu_type == MUL) { + FU_duty_cycle = core_stats.MUL_duty_cycle; + } - set_pppm(pppm_t, 2, 2, 2, 2);//2 means two source operands needs to be passed for each int instruction. - if (fu_type == FPU) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.FPU_duty_cycle; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = 1*num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.ALU_duty_cycle; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.MUL_duty_cycle; - } + power.readOp.dynamic = + per_access_energy * tdp_stats.readAc.access + base_energy / clockRate; + power.readOp.dynamic *= sckRation * FU_duty_cycle; - //power.readOp.dynamic = base_energy/clockRate + energy*stats_t.readAc.access; - power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy/clockRate; - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation*FU_duty_cycle; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; + // Runtime power calculation + if (fu_type == FPU) { + rtp_stats.readAc.access = core_stats.fpu_accesses; + } else if (fu_type == ALU) { + rtp_stats.readAc.access = core_stats.ialu_accesses; + } else if (fu_type == MUL) { + rtp_stats.readAc.access = core_stats.mul_accesses; + } - power.readOp.leakage = leakage; - power.readOp.gate_leakage = gate_leakage; - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - } - else - { - if (fu_type == FPU) - { - stats_t.readAc.access = XML->sys.core[ithCore].fpu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = XML->sys.core[ithCore].ialu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = XML->sys.core[ithCore].mul_accesses; - rtp_stats = stats_t; - } - - //rt_power.readOp.dynamic = base_energy*executionTime + energy*stats_t.readAc.access; - rt_power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy*executionTime; - double sckRation = g_tp.sckt_co_eff; - rt_power.readOp.dynamic *= sckRation; - rt_power.writeOp.dynamic *= sckRation; - rt_power.searchOp.dynamic *= sckRation; - - } - - -} - -void FunctionalUnit::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - if (fu_type == FPU) - { - cout << indent_str << "Floating Point Units (FPUs) (Count: "<< coredynp.num_fpus <<" ):" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.Embedded), - pipeline_stage(coredynp.pipeline_stages), - num_hthreads(coredynp.num_hthreads), - issue_width(coredynp.issueW), - exist(exist_) -// is_default(_is_default) -{ - if (!exist) return; - double undifferentiated_core=0; - double core_tx_density=0; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double undifferentiated_core_coe; - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); +UndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & dyn_p_, + bool exist_) + : McPATComponent(_xml_data), + interface_ip(*interface_ip_), coredynp(dyn_p_), + core_ty(coredynp.core_ty), embedded(coredynp.Embedded), + pipeline_stage(coredynp.pipeline_stages), + num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW), + exist(exist_) { + if (!exist) return; - //Compute undifferentiated core area at 90nm. - if (embedded==false) - { - //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements - if (core_ty==OOO) - { - //undifferentiated_core = (0.0764*pipeline_stage*pipeline_stage -2.3685*pipeline_stage + 10.405);//OOO - undifferentiated_core = (3.57*log(pipeline_stage)-1.2643)>0?(3.57*log(pipeline_stage)-1.2643):0; - } - else if (core_ty==Inorder) - { - //undifferentiated_core = (0.1238*pipeline_stage + 7.2572)*0.9;//inorder - undifferentiated_core = (-2.19*log(pipeline_stage)+6.55)>0?(-2.19*log(pipeline_stage)+6.55):0; - } - else - { - cout<<"invalid core type"< 0 ? + (3.57 * log(pipeline_stage) - 1.2643) : 0; + } else if (core_ty == Inorder) { + undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ? + (-2.19 * log(pipeline_stage) + 6.55) : 0; + } else { + cout << "invalid core type" << endl; + exit(0); } - else - { - //Based on the results in paper "parametrized processor models" Sandia Labs - if (XML->sys.opt_clockrate) + undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716); + } else { + //Based on the results in paper "parametrized processor models" Sandia Labs + if (opt_for_clk) undifferentiated_core_coe = 0.05; else undifferentiated_core_coe = 0; - undifferentiated_core = (0.4109* pipeline_stage - 0.776)*undifferentiated_core_coe; - undifferentiated_core *= (1+ logtwo(num_hthreads)* 0.0426); - } + undifferentiated_core = (0.4109 * pipeline_stage - 0.776) * + undifferentiated_core_coe; + undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0426); + } - undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff*1e6;//change from mm^2 to um^2 - core_tx_density = g_tp.scaling_factor.core_tx_density; - //undifferentiated_core = 3*1e6; - //undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd; - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - area.set_area(undifferentiated_core); - - scktRatio = g_tp.sckt_co_eff; - power.readOp.dynamic *= scktRatio; - power.writeOp.dynamic *= scktRatio; - power.searchOp.dynamic *= scktRatio; - macro_PR_overhead = g_tp.macro_layout_overhead; - area.set_area(area.get_area()*macro_PR_overhead); - - - -// double vt=g_tp.peri_global.Vth; -// double velocity_index=1.1; -// double c_in=gate_C(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r , 0.0, false); -// double c_out= drain_C_(g_tp.min_w_nmos_, NCH, 2, 1, g_tp.cell_h_def, false) + drain_C_(g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, PCH, 1, 1, g_tp.cell_h_def, false) + c_in; -// double w_nmos=g_tp.min_w_nmos_; -// double w_pmos=g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; -// double i_on_n=1.0; -// double i_on_p=1.0; -// double i_on_n_in=1.0; -// double i_on_p_in=1; -// double vdd=g_tp.peri_global.Vdd; - -// power.readOp.sc=shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd); -// power.readOp.dynamic=c_out*vdd*vdd/2; - -// cout<sys.longer_channel_device; + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); + cell.h = g_tp.cell_h_def; + cell.w = g_tp.cell_h_def; - if (is_tdp) - { - cout << indent_str << "UndiffCore:" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - //cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout < 18) opcode_length = 18; + num_decoded_signals = (int)pow(2.0, opcode_length); + pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + load_nmos_width = g_tp.max_w_nmos_ / 2; + load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; + C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram); + R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; + final_dec = new Decoder( + num_decoded_signals, + false, + C_driver_load, + R_wire_load, + false/*is_fa*/, + false/*is_dram*/, + false/*wl_tr*/, //to use peri device + cell); + + PredecBlk * predec_blk1 = new PredecBlk( + num_decoded_signals, + final_dec, + 0,//Assuming predec and dec are back to back + 0, + 1,//Each Predec only drives one final dec + false/*is_dram*/, + true); + PredecBlk * predec_blk2 = new PredecBlk( + num_decoded_signals, + final_dec, + 0,//Assuming predec and dec are back to back + 0, + 1,//Each Predec only drives one final dec + false/*is_dram*/, + false); + + PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); + PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); + + pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); + + double area_decoder = final_dec->area.get_area() * num_decoded_signals * + num_decoder_segments * num_decoders; + //double w_decoder = area_decoder / area.get_h(); + double area_pre_dec = (predec_blk_drv1->area.get_area() + + predec_blk_drv2->area.get_area() + + predec_blk1->area.get_area() + + predec_blk2->area.get_area()) * + num_decoder_segments * num_decoders; + area.set_area(area.get_area() + area_decoder + area_pre_dec); + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); + + inst_decoder_delay_power(); + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + + output_data.area = area.get_area() / 1e6; + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; } -inst_decoder::inst_decoder( - bool _is_default, - const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_, - enum Core_type core_ty_) -:is_default(_is_default), - opcode_length(opcode_length_), - num_decoders(num_decoders_), - x86(x86_), - device_ty(device_ty_), - core_ty(core_ty_) - { - /* - * Instruction decoder is different from n to 2^n decoders - * that are commonly used in row decoders in memory arrays. - * The RISC instruction decoder is typically a very simple device. - * We can decode an instruction by simply - * separating the machine word into small parts using wire slices - * The RISC instruction decoder can be approximate by the n to 2^n decoders, - * although this approximation usually underestimate power since each decoded - * instruction normally has more than 1 active signal. - * - * However, decoding a CISC instruction word is much more difficult - * than the RISC case. A CISC decoder is typically set up as a state machine. - * The machine reads the opcode field to determine - * what type of instruction it is, - * and where the other data values are. - * The instruction word is read in piece by piece, - * and decisions are made at each stage as to - * how the remainder of the instruction word will be read. - * (sequencer and ROM are usually needed) - * An x86 decoder can be even more complex since - * it involve both decoding instructions into u-ops and - * merge u-ops when doing micro-ops fusion. - */ - bool is_dram=false; - double pmos_to_nmos_sizing_r; - double load_nmos_width, load_pmos_width; - double C_driver_load, R_wire_load; - Area cell; +void InstructionDecoder::inst_decoder_delay_power() { - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - cell.h =g_tp.cell_h_def; - cell.w =g_tp.cell_h_def; + double dec_outrisetime; + double inrisetime = 0, outrisetime; + double pppm_t[4] = {1, 1, 1, 1}; + double squencer_passes = x86 ? 2 : 1; - num_decoder_segments = (int)ceil(opcode_length/18.0); - if (opcode_length > 18) opcode_length = 18; - num_decoded_signals= (int)pow(2.0,opcode_length); - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - load_nmos_width=g_tp.max_w_nmos_ /2; - load_pmos_width= g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; - C_driver_load = 1024*gate_C(load_nmos_width + load_pmos_width, 0, is_dram); //TODO: this number 1024 needs to be revisited - R_wire_load = 3000*l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; - - final_dec = new Decoder( - num_decoded_signals, - false, - C_driver_load, - R_wire_load, - false/*is_fa*/, - false/*is_dram*/, - false/*wl_tr*/, //to use peri device - cell); - - PredecBlk * predec_blk1 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - true); - PredecBlk * predec_blk2 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - false); - - PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); - PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); - - pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); - - double area_decoder = final_dec->area.get_area() * num_decoded_signals * num_decoder_segments*num_decoders; - //double w_decoder = area_decoder / area.get_h(); - double area_pre_dec = (predec_blk_drv1->area.get_area() + - predec_blk_drv2->area.get_area() + - predec_blk1->area.get_area() + - predec_blk2->area.get_area())* - num_decoder_segments*num_decoders; - area.set_area(area.get_area()+ area_decoder + area_pre_dec); - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); - - inst_decoder_delay_power(); - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - -} - -void inst_decoder::inst_decoder_delay_power() -{ - - double dec_outrisetime; - double inrisetime=0, outrisetime; - double pppm_t[4] = {1,1,1,1}; - double squencer_passes = x86?2:1; - - outrisetime = pre_dec->compute_delays(inrisetime); - dec_outrisetime = final_dec->compute_delays(outrisetime); - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); - power = power + pre_dec->power*pppm_t; + outrisetime = pre_dec->compute_delays(inrisetime); + dec_outrisetime = final_dec->compute_delays(outrisetime); + set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); + power = power + pre_dec->power * pppm_t; set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals, - num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); - power = power + final_dec->power*pppm_t; + num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); + power = power + final_dec->power * pppm_t; } -void inst_decoder::leakage_feedback(double temperature) -{ + +void InstructionDecoder::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy final_dec->leakage_feedback(temperature); pre_dec->leakage_feedback(temperature); @@ -1000,15 +945,14 @@ void inst_decoder::leakage_feedback(double temperature) power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; } -inst_decoder::~inst_decoder() -{ - local_result.cleanup(); +InstructionDecoder::~InstructionDecoder() { + local_result.cleanup(); - delete final_dec; + delete final_dec; - delete pre_dec->blk1; - delete pre_dec->blk2; - delete pre_dec->drv1; - delete pre_dec->drv2; - delete pre_dec; + delete pre_dec->blk1; + delete pre_dec->blk2; + delete pre_dec->drv1; + delete pre_dec->drv2; + delete pre_dec; } diff --git a/ext/mcpat/logic.h b/ext/mcpat/logic.h index e2a35e845..19c774ef9 100644 --- a/ext/mcpat/logic.h +++ b/ext/mcpat/logic.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,18 +26,16 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef LOGIC_H_ #define LOGIC_H_ -#include #include #include #include -#include "XML_Parse.h" #include "arch_const.h" #include "basic_circuit.h" #include "basic_components.h" @@ -49,185 +48,190 @@ using namespace std; -class selection_logic : public Component{ +class selection_logic : public McPATComponent { public: - selection_logic(bool _is_default, int win_entries_, - int issue_width_, const InputParameter *configure_interface, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface); - bool is_default; - InputParameter l_ip; - uca_org_t local_result; - const ParseXML *XML_interface; - int win_entries; - int issue_width; - int num_threads; - enum Device_ty device_ty; - enum Core_type core_ty; + bool is_default; + InputParameter l_ip; + uca_org_t local_result; + int win_entries; + int issue_width; + double accesses; + int num_threads; + enum Device_ty device_ty; + enum Core_type core_ty; - void selection_power(); + selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries, + int issue_width_, const InputParameter* configure_interface, + string _name, double _accesses, + double clockRate_ = 0.0f, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + void computeArea(); + void computeEnergy(); void leakage_feedback(double temperature); // TODO + // TODO: Add a deconstructor }; -class dep_resource_conflict_check : public Component{ +class dep_resource_conflict_check : public McPATComponent { public: - dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; - CoreDynParam coredynp; - int compare_bits; - bool is_default; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; + InputParameter l_ip; + uca_org_t local_result; + double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; + CoreParameters coredynp; + int compare_bits; + bool is_default; + statsDef stats_t; - void conflict_check_power(); - double compare_cap(); - ~dep_resource_conflict_check(){ - local_result.cleanup(); - } + dep_resource_conflict_check(XMLNode* _xml_data, const string _name, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, int compare_bits_, + double clockRate_ = 0.0f, + bool _is_default = true); + void conflict_check_power(); + double compare_cap(); + void computeEnergy() {}; + ~dep_resource_conflict_check() { + local_result.cleanup(); + } void leakage_feedback(double temperature); }; -class inst_decoder: public Component{ +class InstructionDecoder: public McPATComponent { public: - inst_decoder(bool _is_default, const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder); - inst_decoder(); - bool is_default; - int opcode_length; - int num_decoders; - bool x86; - int num_decoder_segments; - int num_decoded_signals; - InputParameter l_ip; - uca_org_t local_result; - enum Device_ty device_ty; - enum Core_type core_ty; + Decoder* final_dec; + Predec* pre_dec; - Decoder * final_dec; - Predec * pre_dec; + bool is_default; + int opcode_length; + int num_decoders; + bool x86; + int num_decoder_segments; + int num_decoded_signals; + InputParameter l_ip; + uca_org_t local_result; + enum Device_ty device_ty; + enum Core_type core_ty; + statsDef stats_t; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - void inst_decoder_delay_power(); - ~inst_decoder(); + InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default, + const InputParameter *configure_interface, + int opcode_length_, int num_decoders_, bool x86_, + double clockRate_ = 0.0f, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + InstructionDecoder(); + void computeEnergy() {}; + void inst_decoder_delay_power(); + ~InstructionDecoder(); void leakage_feedback(double temperature); }; +// TODO: This should be defined elsewhere? This isn't a true McPATComponent class DFFCell : public Component { public: - DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load, - const InputParameter *configure_interface); - InputParameter l_ip; - bool is_dram; - double cell_load; - double WdecNANDn; - double WdecNANDp; - double clock_cap; - int model; - int n_switch; - int n_keep_1; - int n_keep_0; - int n_clock; - powerDef e_switch; - powerDef e_keep_1; - powerDef e_keep_0; - powerDef e_clock; + InputParameter l_ip; + bool is_dram; + double cell_load; + double WdecNANDn; + double WdecNANDp; + double clock_cap; + int model; + int n_switch; + int n_keep_1; + int n_keep_0; + int n_clock; + powerDef e_switch; + powerDef e_keep_1; + powerDef e_keep_0; + powerDef e_clock; - double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); - void compute_DFF_cell(void); - }; + DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load, + const InputParameter *configure_interface); + double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); + void compute_DFF_cell(void); + ~DFFCell() {}; +}; -class Pipeline : public Component{ +// TODO: This is a very ambiguous component. Try to refactor it. +class Pipeline : public McPATComponent { public: - Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - CoreDynParam coredynp; - enum Device_ty device_ty; - bool is_core_pipeline, is_default; - double num_piperegs; -// int pipeline_stages; -// int tot_stage_vector, per_stage_vector; - bool process_ind; - double WNANDn ; - double WNANDp; - double load_per_pipeline_stage; -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; - void compute_stage_vector(); - void compute(); - ~Pipeline(){ - local_result.cleanup(); - }; + InputParameter l_ip; + uca_org_t local_result; + CoreParameters coredynp; + enum Device_ty device_ty; + bool is_core_pipeline, is_default; + double num_piperegs; + bool process_ind; + double WNANDn; + double WNANDp; + double load_per_pipeline_stage; + + Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface, + const CoreParameters & dyn_p_, + enum Device_ty device_ty_ = Core_device, + bool _is_core_pipeline = true, bool _is_default = true); + void compute_stage_vector(); + /** + * TODO: compute() completes work that should be completed in computeArea() + * and computeEnergy() recursively. Consider shifting these calculations + * around to be consistent with rest of hierarchy + */ + void compute(); + void computeArea() {}; + // TODO: Move energy computation to this function to unify hierarchy + void computeEnergy() {}; + ~Pipeline() { + local_result.cleanup(); + }; }; -//class core_pipeline :public pipeline{ -//public: -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; -// core_pipeline(bool _is_default, const InputParameter *configure_interface); -// virtual void compute_stage_vector(); -// -//}; - -class FunctionalUnit :public Component{ +class FunctionalUnit : public McPATComponent { public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double FU_height; - double clockRate,executionTime; - double num_fu; - double energy, base_energy,per_access_energy, leakage, gate_leakage; - bool is_default; - enum FU_type fu_type; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double FU_height; + double num_fu; + double energy; + double base_energy; + double per_access_energy; + bool is_default; + enum FU_type fu_type; + statsDef stats_t; - FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, enum FU_type fu_type); + void computeEnergy(); void leakage_feedback(double temperature); - + ~FunctionalUnit() {}; }; -class UndiffCore :public Component{ +// TODO: This is a very ambiguous component. Try to refactor it. +class UndiffCore : public McPATComponent { public: - UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false); - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Core_type core_ty; - bool opt_performance, embedded; - double pipeline_stage,num_hthreads,issue_width; - bool is_default; - - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~UndiffCore(){}; - bool exist; - + InputParameter interface_ip; + CoreParameters coredynp; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + enum Core_type core_ty; + bool opt_performance; + bool embedded; + double pipeline_stage; + double num_hthreads; + double issue_width; + bool is_default; + bool exist; + UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & dyn_p_, + bool exist_ = true); + void computeArea() {}; + // TODO: Move energy computation to this function to unify hierarchy + void computeEnergy() {}; + ~UndiffCore() {}; }; #endif /* LOGIC_H_ */ diff --git a/ext/mcpat/main.cc b/ext/mcpat/main.cc index 8acce8d23..ec266f386 100644 --- a/ext/mcpat/main.cc +++ b/ext/mcpat/main.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,15 +26,17 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ +#include + +#include #include -#include "XML_Parse.h" -#include "globalvar.h" +#include "basic_components.h" #include "io.h" -#include "processor.h" +#include "system.h" #include "version.h" #include "xmlParser.h" @@ -41,61 +44,68 @@ using namespace std; void print_usage(char * argv0); -int main(int argc,char *argv[]) -{ - char * fb ; - bool infile_specified = false; - int plevel = 2; - opt_for_clk =true; - //cout.precision(10); - if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help")) - { - print_usage(argv[0]); +int main(int argc, char *argv[]) { + char* xml_file = NULL; + int plevel = 2; + + for (int32_t i = 0; i < argc; i++) { + if (argv[i] == string("-infile")) { + xml_file = argv[++i]; + + } else if (argv[i] == string("-print_level")) { + plevel = atoi(argv[++i]); + + } else if (argv[i] == string("-opt_for_clk")) { + McPATComponent::opt_for_clk = (bool)atoi(argv[++i]); } + } - for (int32_t i = 0; i < argc; i++) - { - if (argv[i] == string("-infile")) - { - infile_specified = true; - i++; - fb = argv[ i]; - } + // Ensure that the XML file was specified + if (xml_file == NULL) { + cerr << "ERROR: Please specify infile\n\n"; + print_usage(argv[0]); + } - if (argv[i] == string("-print_level")) - { - i++; - plevel = atoi(argv[i]); - } + // Ensure that the XML file exists + struct stat file_info; + if (stat(xml_file, &file_info)) { + cerr << "ERROR: File not found: " << xml_file << endl << endl; + print_usage(argv[0]); + } - if (argv[i] == string("-opt_for_clk")) - { - i++; - opt_for_clk = (bool)atoi(argv[i]); - } - } - if (infile_specified == false) - { - print_usage(argv[0]); - } + cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") is computing the target processor...\n " + << endl; + // Parse the XML input file + XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component"); + unsigned int num_children = xml_data.nChildNode("component"); + assert(num_children == 1); + XMLNode system_xml = xml_data.getChildNode("component"); + assert(strcmp(system_xml.getAttribute("type"), "System") == 0); - cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") is computing the target processor...\n "<computeArea(); + + // Recursively compute the power consumed + system->computeEnergy(); + + // Recursively output the computed values + system->displayData(2, plevel); + + // Clean up + delete system; + return 0; - //parse XML-based interface - ParseXML *p1= new ParseXML(); - p1->parse(fb); - Processor proc(p1); - proc.displayEnergy(2, plevel); - delete p1; - return 0; } -void print_usage(char * argv0) -{ +void print_usage(char * argv0) { cerr << "How to use McPAT:" << endl; - cerr << " mcpat -infile -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl; - //cerr << " Note:default print level is at processor level, please increase it to see the details" << endl; + cerr << " mcpat -infile -print_level < " + << "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P " + << "only)/1 (optimzed for target clock rate)>" << endl; exit(1); } diff --git a/ext/mcpat/mcpat.mk b/ext/mcpat/mcpat.mk index f89f499a9..acb73211e 100644 --- a/ext/mcpat/mcpat.mk +++ b/ext/mcpat/mcpat.mk @@ -29,13 +29,16 @@ VPATH = cacti SRCS = \ Ucache.cc \ - XML_Parse.cc \ arbiter.cc \ area.cc \ array.cc \ bank.cc \ basic_circuit.cc \ basic_components.cc \ + bus_interconnect.cc \ + cachearray.cc \ + cachecontroller.cc \ + cacheunit.cc \ cacti_interface.cc \ component.cc \ core.cc \ @@ -52,14 +55,13 @@ SRCS = \ noc.cc \ nuca.cc \ parameter.cc \ - processor.cc \ router.cc \ - sharedcache.cc \ subarray.cc \ + system.cc \ technology.cc \ uca.cc \ wire.cc \ - xmlParser.cc + xmlParser.cc OBJS = $(patsubst %.cc,$(ODIR)/obj_$(TAG)/%.o,$(SRCS)) diff --git a/ext/mcpat/mcpatXeonCore.mk b/ext/mcpat/mcpatXeonCore.mk deleted file mode 100644 index 20cf0ddc8..000000000 --- a/ext/mcpat/mcpatXeonCore.mk +++ /dev/null @@ -1,81 +0,0 @@ -TARGET = mcpatXeonCore -SHELL = /bin/sh -.PHONY: all depend clean -.SUFFIXES: .cc .o - -ifndef NTHREADS - NTHREADS = 4 -endif - - -LIBS = -INCS = -lm - -ifeq ($(TAG),dbg) - DBG = -Wall - OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti -else - DBG = - OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti - #OPT = -O0 -DNTHREADS=$(NTHREADS) -endif - -#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) -CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) -CXX = g++ -m32 -CC = gcc -m32 - -VPATH = cacti - -SRCS = \ - Ucache.cc \ - XML_Parse.cc \ - arbiter.cc \ - area.cc \ - array.cc \ - bank.cc \ - basic_circuit.cc \ - basic_components.cc \ - cacti_interface.cc \ - component.cc \ - core.cc \ - crossbar.cc \ - decoder.cc \ - htree2.cc \ - interconnect.cc \ - io.cc \ - iocontrollers.cc \ - logic.cc \ - main.cc \ - mat.cc \ - memoryctrl.cc \ - noc.cc \ - nuca.cc \ - parameter.cc \ - processor.cc \ - router.cc \ - sharedcache.cc \ - subarray.cc \ - technology_xeon_core.cc \ - uca.cc \ - wire.cc \ - xmlParser.cc - -OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) - -all: obj_$(TAG)/$(TARGET) - cp -f obj_$(TAG)/$(TARGET) $(TARGET) - -obj_$(TAG)/$(TARGET) : $(OBJS) - $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread - -#obj_$(TAG)/%.o : %.cc -# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< - -obj_$(TAG)/%.o : %.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -clean: - -rm -f *.o $(TARGET) - - diff --git a/ext/mcpat/memoryctrl.cc b/ext/mcpat/memoryctrl.cc index ae3bc75ec..dec24512e 100644 --- a/ext/mcpat/memoryctrl.cc +++ b/ext/mcpat/memoryctrl.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,18 +26,19 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ + #include #include #include #include #include -#include "XML_Parse.h" #include "basic_circuit.h" #include "basic_components.h" +#include "common.h" #include "const.h" #include "io.h" #include "logic.h" @@ -69,668 +71,543 @@ * */ -MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:l_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_) -{ - - local_result = init_interface(&l_ip); - compute(); +MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_) + : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { + name = "Transaction Engine"; + local_result = init_interface(&l_ip, name); + // Set up stats for the power calculations + tdp_stats.reset(); + tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + rtp_stats.reset(); + rtp_stats.readAc.access = mcs.reads; + rtp_stats.writeAc.access = mcs.writes; } - -void MCBackend::compute() -{ - //double max_row_addr_width = 20.0;//Current address 12~18bits - double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend, - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) - { - if (mcp.type == 0) - { - //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); - area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2 - //assuming the approximately same scaling factor as seen in processors. - //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip. - //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. - //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et - mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; - power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - - } - else - { NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2 - backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate - //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800) - backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator - - power_t.readOp.dynamic = backend_dyn; - power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - - } - } - else - {//skip old model - cout<<"Unknown memory controllers"< 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = mcp.num_channels; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer", + Uncore_device, mcp.clockRate); + children.push_back(frontendBuffer); + + frontendBuffer->tdp_stats.reset(); + frontendBuffer->tdp_stats.readAc.access = + frontendBuffer->l_ip.num_search_ports + + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->tdp_stats.writeAc.access = + frontendBuffer->l_ip.num_search_ports; + frontendBuffer->tdp_stats.searchAc.access = + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->rtp_stats.reset(); + // TODO: These stats assume that access power is calculated per buffer + // bit, which requires the stats to take into account the number of + // bits for each buffer slot. This should be revised... + //For each channel, each memory word need to check the address data to + //achieve best scheduling results. + //and this need to be done on all physical DIMMs in each logical memory + //DIMM *mcp.dataBusWidth/72 + frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.searchAc.access = + frontendBuffer->rtp_stats.readAc.access + + frontendBuffer->rtp_stats.writeAc.access; + + // Read Buffers + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.read_buffer_assoc; + interface_ip.nbanks = mcp.read_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.read_buffer_tag_width > 0; + interface_ip.tag_w = mcp.read_buffer_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer", + Uncore_device, mcp.clockRate); + children.push_back(readBuffer); + + readBuffer->tdp_stats.reset(); + readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + readBuffer->rtp_stats.reset(); + readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // Write Buffer + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.write_buffer_assoc; + interface_ip.nbanks = mcp.write_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.write_buffer_tag_width > 0; + interface_ip.tag_w = mcp.write_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer", + Uncore_device, mcp.clockRate); + children.push_back(writeBuffer); + + writeBuffer->tdp_stats.reset(); + writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + writeBuffer->rtp_stats.reset(); + writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // TODO: Set up selection logic as a leaf node in tree + //selection and arbitration logic + MC_arb = + new selection_logic(xml_data, is_default, + mcp.req_window_size_per_channel, 1, &interface_ip, + "Arbitration Logic", (mcs.reads + mcs.writes), + mcp.clockRate, Uncore_device); + // MC_arb is not included in the roll-up due to the uninitialized area + //children.push_back(MC_arb); +} + +MemoryController::MemoryController(XMLNode* _xml_data, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), interface_ip(*interface_ip_) { + name = "Memory Controller"; + set_mc_param(); + // TODO: Pass params and stats as pointers + children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs)); + children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs)); + + if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) { + children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs)); } } -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_), - MC_arb(0), - frontendBuffer(0), - readBuffer(0), - writeBuffer(0) -{ - /* All computations are for a single MC - * - */ - - int tag, data; - bool is_default =true;//indication for default setup - - /* MC frontend engine channels share the same engines but logically partitioned - * For all hardware inside MC. different channels do not share resources. - * TODO: add docodeing/mux stage to steer memory requests to different channels. - */ - - //memory request reorder buffer - tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; - data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0)); - interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //selection and arbitration logic - MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device); - - //read buffers. - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //write buffer - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); +void MemoryController::initialize_params() { + memset(&mcp, 0, sizeof(MCParameters)); } -void MCFrontEnd::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; +void MemoryController::set_mc_param() { + initialize_params(); - readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; + int num_children = xml_data->nChildNode("param"); + int tech_type; + int mat_type; + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); - writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); - } - else - { - //init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - //For each channel, each memory word need to check the address data to achieve best scheduling results. - //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; - } - - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - -// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access* -// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ -// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic - + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic - + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access* - readBuffer->local_result.power.readOp.dynamic+ - readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access* - writeBuffer->local_result.power.readOp.dynamic+ - writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) - { - power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; + ASSIGN_FP_IF("mc_clock", mcp.clockRate); + ASSIGN_INT_IF("tech_type", tech_type); + ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type); + ASSIGN_FP_IF("num_mcs", mcp.num_mcs); + ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length); + ASSIGN_INT_IF("databus_width", mcp.databus_width); + ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels); + ASSIGN_INT_IF("req_window_size_per_channel", + mcp.req_window_size_per_channel); + ASSIGN_INT_IF("IO_buffer_size_per_channel", + mcp.IO_buffer_size_per_channel); + ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width); + ASSIGN_INT_IF("opcode_width", mcp.opcodeW); + ASSIGN_INT_IF("type", mcp.type); + ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool); + ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool); + ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate); + ASSIGN_INT_IF("number_ranks", mcp.number_ranks); + ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc); + ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc); + ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width); + ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc); + ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks); + ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + else { + warnUnrecognizedParam(node_name); + } } - else - { - rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; - } -} -void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - - if (is_tdp) - { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - - cout <area.get_area()); - transecEngine = new MCBackend(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ transecEngine->area.get_area()); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY = new MCPHY(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ PHY->area.get_area()); - } - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// transecEngine.initialize(&interface_ip); -// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; -// transecEngine.memDataWidth = dataBusWidth; -// transecEngine.memRank = XML->sys.mem.number_ranks; -// //transecEngine.memAccesses=XML->sys.mc.memory_accesses; -// //transecEngine.llcBlocksize=llcBlockSize; -// transecEngine.compute(); -// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ; -// area.set_area(area.get_area()+ transecEngine.area.get_area()); -// ///cout<<"area="<sys.mem.peak_transfer_rate; -// PHY.memDataWidth = dataBusWidth; -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// PHY.compute(); -// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ; -// area.set_area(area.get_area()+ PHY.area.get_area()); - ///cout<<"area="<sys.core[0].opcode_width + dataBusWidth; -// pipeLogic = new pipeline(is_default, &interface_ip); -// //pipeLogic.init_pipeline(is_default, &interface_ip); -// pipeLogic->compute_pipeline(); -// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); -// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead -// -// -//// //clock -//// clockNetwork.init_wire_external(is_default, &interface_ip); -//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -//// clockNetwork.end_wiring_level =5;//toplevel metal -//// clockNetwork.start_wiring_level =5;//toplevel metal -//// clockNetwork.num_regs = pipeLogic.tot_stage_vector; -//// clockNetwork.optimize_wire(); - - -} -void MemoryController::computeEnergy(bool is_tdp) -{ - - frontend->computeEnergy(is_tdp); - transecEngine->computeEnergy(is_tdp); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY->computeEnergy(is_tdp); - } - if (is_tdp) - { - power = power + frontend->power + transecEngine->power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - power = power + PHY->power; - } - } - else - { - rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - rt_power = rt_power + PHY->rt_power; - } - } -} - -void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "Memory Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout<2){ - frontend->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Transaction Engine:" << endl; - cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <sys.mc.mc_clock*2;//DDR double pumped - mcp.clockRate *= 1e6; - mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead - mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width; - mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width; - mcp.opcodeW =16; - mcp.num_mcs = XML->sys.mc.number_mcs; - mcp.num_channels = XML->sys.mc.memory_channels_per_mc; - mcp.reads = XML->sys.mc.memory_reads; - mcp.writes = XML->sys.mc.memory_writes; - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. - mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; - mcp.memRank = XML->sys.mc.number_ranks; - //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers - //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - //PHY.llcBlocksize=llcBlockSize; - mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared - mcp.LVDS = XML->sys.mc.LVDS; - mcp.type = XML->sys.mc.type; - mcp.withPHY = XML->sys.mc.withPHY; - } -// else if (mc_type==FLASHC) -// { -// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped -// mcp.clockRate *= 1e6; -// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -// -// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead -// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width; -// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; -// mcp.opcodeW =16; -// mcp.num_mcs = XML->sys.flashc.number_mcs; -// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; -// mcp.reads = XML->sys.flashc.memory_reads; -// mcp.writes = XML->sys.flashc.memory_writes; -// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; -// mcp.memRank = XML->sys.flashc.number_ranks; -// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared -// mcp.LVDS = XML->sys.flashc.LVDS; -// mcp.type = XML->sys.flashc.type; -// } - else - { - cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" <nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle); + ASSIGN_FP_IF("perc_load", mcs.perc_load); + ASSIGN_FP_IF("memory_reads", mcs.reads); + ASSIGN_INT_IF("memory_writes", mcs.writes); + + else { + warnUnrecognizedStat(node_name); } + } + + // Add ECC overhead + mcp.llcBlockSize = int(ceil(mcp.llc_line_length / BITS_PER_BYTE)) + + mcp.llc_line_length; + mcp.dataBusWidth = int(ceil(mcp.databus_width / BITS_PER_BYTE)) + + mcp.databus_width; } -MCFrontEnd ::~MCFrontEnd(){ +MCFrontEnd ::~MCFrontEnd() { - if(MC_arb) {delete MC_arb; MC_arb = 0;} - if(frontendBuffer) {delete frontendBuffer; frontendBuffer = 0;} - if(readBuffer) {delete readBuffer; readBuffer = 0;} - if(writeBuffer) {delete writeBuffer; writeBuffer = 0;} + if (MC_arb) { + delete MC_arb; + MC_arb = NULL; + } + if (frontendBuffer) { + delete frontendBuffer; + frontendBuffer = NULL; + } + if (readBuffer) { + delete readBuffer; + readBuffer = NULL; + } + if (writeBuffer) { + delete writeBuffer; + writeBuffer = NULL; + } } -MemoryController ::~MemoryController(){ - - if(frontend) {delete frontend; frontend = 0;} - if(transecEngine) {delete transecEngine; transecEngine = 0;} - if(PHY) {delete PHY; PHY = 0;} - if(pipeLogic) {delete pipeLogic; pipeLogic = 0;} +MemoryController::~MemoryController() { + // TODO: use default constructor to delete children } diff --git a/ext/mcpat/memoryctrl.h b/ext/mcpat/memoryctrl.h index 65be20a8f..72e18dd5c 100644 --- a/ext/mcpat/memoryctrl.h +++ b/ext/mcpat/memoryctrl.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,89 +26,75 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef MEMORYCTRL_H_ #define MEMORYCTRL_H_ -#include "XML_Parse.h" -#include "parameter.h" -//#include "io.h" #include "array.h" -//#include "Undifferentiated_Core_Area.h" -#include - #include "basic_components.h" +#include "cachearray.h" +#include "parameter.h" -class MCBackend : public Component { - public: +class MCBackend : public McPATComponent { +public: InputParameter l_ip; uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; + MCParameters mcp; + MCStatistics mcs; statsDef stats_t; - powerDef power_t; - MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCBackend(){}; + + MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_); + void computeArea(); + void computeEnergy(); + ~MCBackend() {}; }; -class MCPHY : public Component { - public: +class MCPHY : public McPATComponent { +public: InputParameter l_ip; uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCPHY(){}; + MCParameters mcp; + MCStatistics mcs; + statsDef stats_t; + + MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_); + void computeArea(); + void computeEnergy(); + ~MCPHY() {}; }; -class MCFrontEnd : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - selection_logic * MC_arb; - ArrayST * frontendBuffer; - ArrayST * readBuffer; - ArrayST * writeBuffer; +class MCFrontEnd : public McPATComponent { +public: + CacheArray* frontendBuffer; + CacheArray* readBuffer; + CacheArray* writeBuffer; + selection_logic* MC_arb; - MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + InputParameter interface_ip; + MCParameters mcp; + MCStatistics mcs; + + MCFrontEnd(XMLNode* _xml_data, + InputParameter* interface_ip_, const MCParameters & mcp_, + const MCStatistics & mcs_); ~MCFrontEnd(); }; -class MemoryController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - MCFrontEnd * frontend; - MCBackend * transecEngine; - MCPHY * PHY; - Pipeline * pipeLogic; +class MemoryController : public McPATComponent { +public: + InputParameter interface_ip; + MCParameters mcp; + MCStatistics mcs; - //clock_network clockNetwork; - MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_); + MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_); + void initialize_params(); void set_mc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); ~MemoryController(); }; + #endif /* MEMORYCTRL_H_ */ diff --git a/ext/mcpat/noc.cc b/ext/mcpat/noc.cc index d5dfbb137..d6e309054 100644 --- a/ext/mcpat/noc.cc +++ b/ext/mcpat/noc.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -35,321 +36,236 @@ #include #include -#include "XML_Parse.h" #include "basic_circuit.h" +#include "common.h" #include "const.h" #include "io.h" #include "noc.h" #include "parameter.h" -NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_) -:XML(XML_interface), -ithNoC(ithNoC_), -interface_ip(*interface_ip_), -router(0), -link_bus(0), -link_bus_exist(false), -router_exist(false), -M_traffic_pattern(M_traffic_pattern_) -{ - /* - * initialize, compute and optimize individual components. - */ +OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_), + interface_ip(*interface_ip_), link_bus_exist(false), + router_exist(false) { + name = "On-Chip Network"; + set_param_stats(); + local_result = init_interface(&interface_ip, name); + scktRatio = g_tp.sckt_co_eff; - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_noc_param(); - local_result=init_interface(&interface_ip); - scktRatio = g_tp.sckt_co_eff; + // TODO: Routers and links should be children of the NOC component + if (noc_params.type) { + init_router(); + } else { + init_link_bus(); + } +} - if (nocdynp.type) - {/* - * if NOC compute router, router links must be computed separately - * and called from external - * since total chip area must be known first - */ - init_router(); +void OnChipNetwork::init_router() { + router = new Router(noc_params.flit_size, + noc_params.virtual_channel_per_port * + noc_params.input_buffer_entries_per_vc, + noc_params.virtual_channel_per_port, + &(g_tp.peri_global), + noc_params.input_ports, noc_params.output_ports, + noc_params.M_traffic_pattern); + // TODO: Make a router class within McPAT that descends from McPATComponent + // children.push_back(router); + area.set_area(area.get_area() + router->area.get_area() * + noc_params.total_nodes); + + double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); + router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction; + router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction; + router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction; + router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction; + router_exist = true; +} + +void OnChipNetwork::init_link_bus() { + if (noc_params.type) { + link_name = "Links"; + } else { + link_name = "Bus"; + } + + interface_ip.throughput = noc_params.link_throughput / + noc_params.clockRate; + interface_ip.latency = noc_params.link_latency / noc_params.clockRate; + + link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2; + + if (noc_params.total_nodes > 1) { + //All links are shared by neighbors + link_len /= 2; + } + link_bus = new Interconnect(xml_data, "Link", Uncore_device, + noc_params.link_base_width, + noc_params.link_base_height, + noc_params.flit_size, link_len, &interface_ip, + noc_params.link_start_wiring_level, + noc_params.clockRate, true/*pipelinable*/, + noc_params.route_over_perc); + children.push_back(link_bus); + + link_bus_exist = true; +} + +// TODO: This should use the McPATComponent::computeEnergy function to +// recursively calculate energy of routers and links and then add +void OnChipNetwork::computeEnergy() { + double pppm_t[4] = {1, 1, 1, 1}; + + // Initialize stats for TDP + tdp_stats.reset(); + tdp_stats.readAc.access = noc_stats.duty_cycle; + if (router_exist) { + // TODO: Define a regression to exercise routers + // TODO: Clean this up: it is too invasive and breaks abstraction + set_pppm(pppm_t, 1 * tdp_stats.readAc.access, 1, 1, 1); + router->power = router->power * pppm_t; + set_pppm(pppm_t, noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes); + } + if (link_bus_exist) { + if (noc_params.type) { + link_bus->int_params.active_ports = noc_params.min_ports - 1; + } else { + link_bus->int_params.active_ports = noc_params.min_ports; } - else - { - init_link_bus(link_len_); //if bus compute bus + link_bus->int_stats.duty_cycle = + noc_params.M_traffic_pattern * noc_stats.duty_cycle; + + // TODO: Decide how to roll multiple routers into a single top-level + // NOC module. I would prefer not to, but it might be a nice feature + set_pppm(pppm_t, noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes); + } + + // Initialize stats for runtime energy and power + rtp_stats.reset(); + rtp_stats.readAc.access = noc_stats.total_access; + set_pppm(pppm_t, 1, 0 , 0, 0); + if (router_exist) { + // TODO: Move this to a McPATComponent parent class of Router + router->buffer.rt_power.readOp.dynamic = + (router->buffer.power.readOp.dynamic + + router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access; + router->crossbar.rt_power.readOp.dynamic = + router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access; + router->arbiter.rt_power.readOp.dynamic = + router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access; + + router->rt_power = router->rt_power + + (router->buffer.rt_power + router->crossbar.rt_power + + router->arbiter.rt_power) * pppm_t + + router->power * pppm_lkg;//TDP power must be calculated first! + } + if (link_bus_exist) { + link_bus->int_stats.accesses = noc_stats.total_access; + } + + // Recursively compute energy + McPATComponent::computeEnergy(); +} + +void OnChipNetwork::set_param_stats() { + // TODO: Remove this or move initialization elsewhere + memset(&noc_params, 0, sizeof(OnChipNetworkParameters)); + + int num_children = xml_data->nChildNode("param"); + int i; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("type", noc_params.type); + ASSIGN_FP_IF("clockrate", noc_params.clockRate); + ASSIGN_INT_IF("flit_bits", noc_params.flit_size); + ASSIGN_FP_IF("link_len", link_len); + ASSIGN_FP_IF("link_throughput", noc_params.link_throughput); + ASSIGN_FP_IF("link_latency", noc_params.link_latency); + ASSIGN_INT_IF("input_ports", noc_params.input_ports); + ASSIGN_INT_IF("output_ports", noc_params.output_ports); + ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports); + ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes); + ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes); + ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage); + ASSIGN_FP_IF("link_routing_over_percentage", + noc_params.route_over_perc); + ASSIGN_INT_IF("has_global_link", noc_params.has_global_link); + ASSIGN_INT_IF("virtual_channel_per_port", + noc_params.virtual_channel_per_port); + ASSIGN_INT_IF("input_buffer_entries_per_vc", + noc_params.input_buffer_entries_per_vc); + ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern); + ASSIGN_FP_IF("link_base_width", noc_params.link_base_width); + ASSIGN_FP_IF("link_base_height", noc_params.link_base_height); + ASSIGN_INT_IF("link_start_wiring_level", + noc_params.link_start_wiring_level); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); } + } - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb - // clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); -} + // Change from MHz to Hz + noc_params.clockRate *= 1e6; -void NoC::init_router() -{ - router = new Router(nocdynp.flit_size, - nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc, - nocdynp.virtual_channel_per_port, &(g_tp.peri_global), - nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern); - //router->print_router(); - area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes); + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction; - router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction; - router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction; - router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction; - router_exist = true; -} + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); -void NoC ::init_link_bus(double link_len_) -{ + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle); + ASSIGN_FP_IF("total_accesses", noc_stats.total_access); -// if (nocdynp.min_ports==1 ) - if (nocdynp.type) - link_name = "Links"; - else - link_name = "Bus"; - - link_len=link_len_; - assert(link_len>0); - - interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate; - interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate; - - link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2; - - if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors - link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size, - link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc); - - link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area() - * nocdynp.global_linked_ports); - - area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes); - link_bus_exist = true; -} -void NoC::computeEnergy(bool is_tdp) -{ - //power_point_product_masks - double pppm_t[4] = {1,1,1,1}; - double M=nocdynp.duty_cycle; - if (is_tdp) - { - //init stats for TDP - stats_t.readAc.access = M; - tdp_stats = stats_t; - if (router_exist) - { - set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern - router->power = router->power*pppm_t; - set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes); - power = power + router->power*pppm_t; - } - if (link_bus_exist) - { - if (nocdynp.type) - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports); - //reset traffic pattern; local port do not have router links - else - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern - - link_bus_tot_per_Router.power = link_bus->power*pppm_t; - - set_pppm(pppm_t, nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); - power = power + link_bus_tot_per_Router.power*pppm_t; - - } - } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses; - rtp_stats = stats_t; - set_pppm(pppm_t, 1, 0 , 0, 0); - if (router_exist) - { - router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ; - router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ; - router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ; - - router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t + - router->power*pppm_lkg;//TDP power must be calculated first! - rt_power = rt_power + router->rt_power; - } - if (link_bus_exist) - { - set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access); - link_bus->rt_power = link_bus->power * pppm_t; - rt_power = rt_power + link_bus->rt_power; - } - - } -} - - -void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - double M =M_traffic_pattern*nocdynp.duty_cycle; - /*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc; - * When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to - * be applied together with McPAT's extra traffic pattern. - * */ - if (is_tdp) - { - cout << name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout<2){ - cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl; - cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl; - cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic) - *nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl; - cout << indent_str<< indent_str_next << "Subthreshold Leakage = " - << (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl; - cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl; - cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout < 0); } -void NoC::set_noc_param() -{ - - nocdynp.type = XML->sys.NoC[ithNoC].type; - nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate; - nocdynp.clockRate *= 1e6; - nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits; - if (nocdynp.type) - { - nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports; - nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1 - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1); - /* - * Except local i/o ports, all ports needs links( global_linked_ports); - * However only min_ports can be fully active simultaneously - * since the fewer number of ports (input or output ) is the bottleneck. - */ - } - else - { - nocdynp.input_ports = 1; - nocdynp.output_ports = 1; - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = 1; - } - - nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port; - nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc; - - nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes; - nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes; - nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes; - nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle; - nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link; - nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput; - nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency; - nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage; - nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc; - - assert (nocdynp.chip_coverage <=1); - assert (nocdynp.route_over_perc <=1); - - if (nocdynp.type) - name = "NOC"; - else - name = "BUSES"; +OnChipNetwork ::~OnChipNetwork() { -} - - -NoC ::~NoC(){ - - if(router) {delete router; router = 0;} - if(link_bus) {delete link_bus; link_bus = 0;} + if (router) { + delete router; + router = 0; + } + if (link_bus) { + delete link_bus; + link_bus = 0; + } } diff --git a/ext/mcpat/noc.h b/ext/mcpat/noc.h index 31b5b3b2e..291712b9d 100644 --- a/ext/mcpat/noc.h +++ b/ext/mcpat/noc.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,13 +26,13 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef NOC_H_ #define NOC_H_ -#include "XML_Parse.h" + #include "array.h" #include "basic_components.h" #include "interconnect.h" @@ -39,37 +40,62 @@ #include "parameter.h" #include "router.h" -class NoC :public Component { - public: +class OnChipNetworkParameters { +public: + double clockRate; + int flit_size; + int input_ports; + int output_ports; + int min_ports; + int global_linked_ports; + int virtual_channel_per_port; + int input_buffer_entries_per_vc; + int horizontal_nodes; + int vertical_nodes; + int total_nodes; + double link_throughput; + double link_latency; + double chip_coverage; + double route_over_perc; + bool has_global_link; + bool type; + double M_traffic_pattern; + double link_base_width; + double link_base_height; + int link_start_wiring_level; +}; - ParseXML *XML; - int ithNoC; - InputParameter interface_ip; - double link_len; - double executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - Router * router; - interconnect * link_bus; - NoCParam nocdynp; - uca_org_t local_result; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - Component link_bus_tot_per_Router; - bool link_bus_exist; - bool router_exist; - string name, link_name; - double M_traffic_pattern; - NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0); - void set_noc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void init_link_bus(double link_len_); - void init_router(); - void computeEnergy_link_bus(bool is_tdp=true); - void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~NoC(); +class OnChipNetworkStatistics { +public: + double duty_cycle; + double total_access; +}; + +class OnChipNetwork : public McPATComponent { +public: + Router* router; + Interconnect* link_bus; + Component link_bus_tot_per_Router; + + int ithNoC; + InputParameter interface_ip; + double link_len; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + OnChipNetworkParameters noc_params; + OnChipNetworkStatistics noc_stats; + uca_org_t local_result; + statsDef stats_t; + bool link_bus_exist; + bool router_exist; + string link_name; + + OnChipNetwork(XMLNode* _xml_data, int ithNoC_, + InputParameter* interface_ip_); + void set_param_stats(); + void computeEnergy(); + void init_link_bus(); + void init_router(); + ~OnChipNetwork(); }; #endif /* NOC_H_ */ diff --git a/ext/mcpat/processor.cc b/ext/mcpat/processor.cc deleted file mode 100644 index 8520c9633..000000000 --- a/ext/mcpat/processor.cc +++ /dev/null @@ -1,839 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ -#include -#include -#include -#include -#include -#include -#include - -#include "XML_Parse.h" -#include "array.h" -#include "basic_circuit.h" -#include "const.h" -#include "parameter.h" -#include "processor.h" -#include "version.h" - -Processor::Processor(ParseXML *XML_interface) -:XML(XML_interface),//TODO: using one global copy may have problems. - mc(0), - niu(0), - pcie(0), - flashcontroller(0) -{ - /* - * placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - * There is no point to have heterogeneous memory controller on chip, - * thus McPAT only support homogeneous memory controllers. - */ - int i; - double pppm_t[4] = {1,1,1,1}; - set_proc_param(); - if (procdynp.homoCore) - numCore = procdynp.numCore==0? 0:1; - else - numCore = procdynp.numCore; - - if (procdynp.homoL2) - numL2 = procdynp.numL2==0? 0:1; - else - numL2 = procdynp.numL2; - - if (XML->sys.Private_L2 && numCore != numL2) - { - cout<<"Number of private L2 does not match number of cores"<computeEnergy(); - cores[i]->computeEnergy(false); - if (procdynp.homoCore){ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore); - set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.power = core.power + cores[i]->power*pppm_t; - set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - power = power + core.power; - rt_power = rt_power + core.rt_power; - } - else{ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); - area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1); - core.power = core.power + cores[i]->power*pppm_t; - power = power + cores[i]->power*pppm_t; - - set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - rt_power = rt_power + cores[i]->rt_power*pppm_t; - } - } - - if (!XML->sys.Private_L2) - { - if (numL2 >0) - for (i = 0;i < numL2; i++) - { - l2array.push_back(new SharedCache(XML,i, &interface_ip)); - l2array[i]->computeEnergy(); - l2array[i]->computeEnergy(false); - if (procdynp.homoL2){ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2); - set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.power = l2.power + l2array[i]->power*pppm_t; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2.power; - rt_power = rt_power + l2.rt_power; - } - else{ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); - area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i]->power*pppm_t; - power = power + l2array[i]->power*pppm_t;; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - rt_power = rt_power + l2array[i]->rt_power*pppm_t; - } - } - } - - if (numL3 >0) - for (i = 0;i < numL3; i++) - { - l3array.push_back(new SharedCache(XML,i, &interface_ip, L3)); - l3array[i]->computeEnergy(); - l3array[i]->computeEnergy(false); - if (procdynp.homoL3){ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3); - set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.power = l3.power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l3.power; - rt_power = rt_power + l3.rt_power; - - } - else{ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); - area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i]->power*pppm_t; - power = power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - rt_power = rt_power + l3array[i]->rt_power*pppm_t; - - } - } - if (numL1Dir >0) - for (i = 0;i < numL1Dir; i++) - { - l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory)); - l1dirarray[i]->computeEnergy(); - l1dirarray[i]->computeEnergy(false); - if (procdynp.homoL1Dir){ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l1dir.power; - rt_power = rt_power + l1dir.rt_power; - - } - else{ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - power = power + l1dirarray[i]->power; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l1dirarray[i]->rt_power; - } - } - - if (numL2Dir >0) - for (i = 0;i < numL2Dir; i++) - { - l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory)); - l2dirarray[i]->computeEnergy(); - l2dirarray[i]->computeEnergy(false); - if (procdynp.homoL2Dir){ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2dir.power; - rt_power = rt_power + l2dir.rt_power; - - } - else{ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - power = power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t; - } - } - - if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc = new MemoryController(XML, &interface_ip, MC); - mc->computeEnergy(); - mc->computeEnergy(false); - mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.power = mc->power*pppm_t; - power = power + mcs.power; - set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.rt_power = mc->rt_power*pppm_t; - rt_power = rt_power + mcs.rt_power; - - } - - if (XML->sys.flashc.number_mcs >0 )//flash controller - { - flashcontroller = new FlashController(XML, &interface_ip); - flashcontroller->computeEnergy(); - flashcontroller->computeEnergy(false); - double number_fcs = flashcontroller->fcp.num_mcs; - flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs); - area.set_area(area.get_area()+flashcontrollers.area.get_area()); - set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs ); - flashcontrollers.power = flashcontroller->power*pppm_t; - power = power + flashcontrollers.power; - set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs ); - flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t; - rt_power = rt_power + flashcontrollers.rt_power; - - } - - if (XML->sys.niu.number_units >0) - { - niu = new NIUController(XML, &interface_ip); - niu->computeEnergy(); - niu->computeEnergy(false); - nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.power = niu->power*pppm_t; - power = power + nius.power; - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.rt_power = niu->rt_power*pppm_t; - rt_power = rt_power + nius.rt_power; - - } - - if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0) - { - pcie = new PCIeController(XML, &interface_ip); - pcie->computeEnergy(); - pcie->computeEnergy(false); - pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.power = pcie->power*pppm_t; - power = power + pcies.power; - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.rt_power = pcie->rt_power*pppm_t; - rt_power = rt_power + pcies.rt_power; - - } - - if (numNOC >0) - { - for (i = 0;i < numNOC; i++) - { - if (XML->sys.NoC[i].type) - {//First add up area of routers if NoC is used - nocs.push_back(new NoC(XML,i, &interface_ip, 1)); - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - else - {//Bus based interconnect - nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage))); - if (procdynp.homoNOC){ - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - } - - /* - * Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip - * area must be obtain to decide the link routing - */ - for (i = 0;i < numNOC; i++) - { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) - { - nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - } - } - } - //Compute energy of NoC (w or w/o links) or buses - for (i = 0;i < numNOC; i++) - { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); - if (procdynp.homoNOC){ - set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.power = noc.power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - power = power + noc.power; - rt_power = rt_power + noc.rt_power; - } - else - { - set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power*pppm_t; - power = power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - rt_power = rt_power + nocs[i]->rt_power*pppm_t; - - - } - } - } - -// //clock power -// globalClock.init_wire_external(is_default, &interface_ip); -// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 -// globalClock.end_wiring_level =5;//toplevel metal -// globalClock.start_wiring_level =5;//toplevel metal -// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes -// globalClock.optimize_wire(); - -} - -void Processor::displayDeviceType(int device_type_, uint32_t indent) -{ - string indent_str(indent, ' '); - - switch ( device_type_ ) { - - case 0 : - cout <sys.longer_channel_device; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - if (is_tdp) - { - - if (plevel<5) - { - cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") results (current print level is "<< plevel - <<", please increase print level to see the details in components): "<sys.core_tech_node<<" nm"<sys.core[0].clock_rate<0){ - cout <sys.number_of_cores << " cores "<sys.device_type,indent); - cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl; - cout <sys.Private_L2) - { - if (numL2 >0){ - cout <sys.L2[0].device_type,indent); - cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L3[0].device_type, indent); - cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.device_type, indent); - cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl; - cout <sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - cout <sys.mc.number_mcs << " Memory Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl; - cout <sys.flashc.number_mcs >0) - { - cout <fcp.num_mcs << " Flash/SSD Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl; - cout <sys.niu.number_units >0 ) - { - cout <niup.num_units << " Network Interface Units "<sys.device_type, indent); - cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl; - cout <sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - cout <pciep.num_units << " PCIe Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl; - cout <1) - { - for (i = 0;i < numCore; i++) - { - cores[i]->displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.Private_L2) - { - for (i = 0;i < numL2; i++) - { - l2array[i]->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0) - { - flashcontroller->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.niu.number_units >0 ) - { - niu->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - pcie->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.homogeneous_cores); - procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s); - procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s); - procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs); - procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories); - procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories); - - procdynp.numCore = XML->sys.number_of_cores; - procdynp.numL2 = XML->sys.number_of_L2s; - procdynp.numL3 = XML->sys.number_of_L3s; - procdynp.numNOC = XML->sys.number_of_NoCs; - procdynp.numL1Dir = XML->sys.number_of_L1Directories; - procdynp.numL2Dir = XML->sys.number_of_L2Directories; - procdynp.numMC = XML->sys.mc.number_mcs; - procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc; - -// if (procdynp.numCore<1) -// { -// cout<<" The target processor should at least have one core on chip." <2) - // { - // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<sys.device_type; - interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - - interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type; - interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. - interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_wt = 0; - interface_ip.cycle_time_wt = 0; - - interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied. - interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_dev = 10000; - interface_ip.cycle_time_dev = 10000; - - interface_ip.ed = 2; - interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately - interface_ip.int_prefetch_w = 1; - interface_ip.page_sz_bits = 0; - interface_ip.temp = debug?360: XML->sys.temperature; - interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node; - interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; - - //***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors. - //They will be overridden during each components initialization - interface_ip.cache_sz =64; - interface_ip.line_sz = 1; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = 64; - interface_ip.access_mode = 2; - - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - - interface_ip.is_main_mem = false; - interface_ip.rpters_in_htree = true ; - interface_ip.ver_htree_wires_over_array = 0; - interface_ip.broadcast_addr_din_over_ver_htrees = 0; - - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - interface_ip.nuca = 0; - interface_ip.nuca_bank_count = 0; - interface_ip.is_cache =true; - interface_ip.pure_ram =false; - interface_ip.pure_cam =false; - interface_ip.force_cache_config =false; - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - interface_ip.force_wiretype = false; - interface_ip.print_detail = 1; - interface_ip.add_ecc_b_ =true; -} - -Processor::~Processor(){ - while (!cores.empty()) - { - delete cores.back(); - cores.pop_back(); - } - while (!l2array.empty()) - { - delete l2array.back(); - l2array.pop_back(); - } - while (!l3array.empty()) - { - delete l3array.back(); - l3array.pop_back(); - } - while (!nocs.empty()) - { - delete nocs.back(); - nocs.pop_back(); - } - if (!mc) - { - delete mc; - } - if (!niu) - { - delete niu; - } - if (!pcie) - { - delete pcie; - } - if (!flashcontroller) - { - delete flashcontroller; - } -}; diff --git a/ext/mcpat/sharedcache.cc b/ext/mcpat/sharedcache.cc deleted file mode 100644 index 3a61e1b6d..000000000 --- a/ext/mcpat/sharedcache.cc +++ /dev/null @@ -1,1162 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#include -#include -#include -#include -#include - -#include "XML_Parse.h" -#include "arbiter.h" -#include "array.h" -#include "basic_circuit.h" -#include "const.h" -#include "io.h" -#include "logic.h" -#include "parameter.h" -#include "sharedcache.h" - -SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip_, enum cache_level cacheL_) -:XML(XML_interface), - ithCache(ithCache_), - interface_ip(*interface_ip_), - cacheL(cacheL_), - dir_overhead(0) -{ - int idx; - int tag, data; - bool is_default, debug; - enum Device_ty device_t; - enum Core_type core_t; - double size, line, assoc, banks; - if (cacheL==L2 && XML->sys.Private_L2) - { - device_t=Core_device; - core_t = (enum Core_type)XML->sys.core[ithCache].machine_type; - } - else - { - device_t=LLC_device; - core_t = Inorder; - } - - debug = false; - is_default=true;//indication for default setup - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_cache_param(); - - //All lower level cache are physically indexed and tagged. - size = cachep.capacity; - line = cachep.blockW; - assoc = cachep.assoc; - banks = cachep.nbanks; - if ((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)) - { - assoc = 0; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 1; - } - else - { - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 0; - if (cachep.dir_ty==SBT) - { - dir_overhead = ceil(XML->sys.number_of_cores/8.0)*8/(cachep.blockW*8); - line = cachep.blockW*(1+ dir_overhead) ; - size = cachep.capacity*(1+ dir_overhead); - - } - } -// if (XML->sys.first_level_dir==2) -// tag += int(XML->sys.domain_size + 5); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = (int)size; - interface_ip.line_sz = (int)line; - interface_ip.assoc = (int)assoc; - interface_ip.nbanks = (int)banks; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 1; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; -// interface_ip.force_cache_config =true; -// interface_ip.ndwl = 4; -// interface_ip.ndbl = 8; -// interface_ip.nspd = 1; -// interface_ip.ndcm =1 ; -// interface_ip.ndsam1 =1; -// interface_ip.ndsam2 =1; - unicache.caches = new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.caches->local_result.area); - area.set_area(area.get_area()+ unicache.caches->local_result.area); - interface_ip.force_cache_config =false; - - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = cachep.missb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput;//means cycle time - interface_ip.latency = cachep.latency;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.missb->local_result.area); - area.set_area(area.get_area()+ unicache.missb->local_result.area); - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*cachep.fu_size ; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.ifb->local_result.area); - area.set_area(area.get_area()+ unicache.ifb->local_result.area); - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = unicache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = cachep.prefetchb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.prefetchb->local_result.area); - area.set_area(area.get_area()+ unicache.prefetchb->local_result.area); - //WBB - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = cachep.wbb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.wbb = new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.wbb->local_result.area); - area.set_area(area.get_area()+ unicache.wbb->local_result.area); - } - // //pipeline -// interface_ip.pipeline_stages = int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + llCache.caches.l_ip.tag_w ; -// pipeLogicCache.init_pipeline(is_default, &interface_ip); -// pipeLogicCache.compute_pipeline(); - - /* - if (!((XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==1) - ||(XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==2)))//not single level IC and DIC - { - //directory Now assuming one directory per bank, TODO:should change it later - size = XML->sys.L2directory.L2Dir_config[0]; - line = XML->sys.L2directory.L2Dir_config[1]; - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 0; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory.caches.name,"L2 Directory"); - directory.caches.init_cache(&interface_ip); - directory.caches.optimize_array(); - directory.area += directory.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.missb.name,"directoryMissB"); - directory.missb.init_cache(&interface_ip); - directory.missb.optimize_array(); - directory.area += directory.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.ifb.name,"directoryFillB"); - directory.ifb.init_cache(&interface_ip); - directory.ifb.optimize_array(); - directory.area += directory.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.prefetchb.name,"directoryPrefetchB"); - directory.prefetchb.init_cache(&interface_ip); - directory.prefetchb.optimize_array(); - directory.area += directory.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.wbb.name,"directoryWBB"); - directory.wbb.init_cache(&interface_ip); - directory.wbb.optimize_array(); - directory.area += directory.wbb.local_result.area; - } - - if (XML->sys.number_of_dir_levels ==2 && XML->sys.first_level_dir==0) - { - //first level directory - size = XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128; - line = int(ceil(XML->sys.domain_size/8.0)); - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory1.caches.name,"first level Directory"); - directory1.caches.init_cache(&interface_ip); - directory1.caches.optimize_array(); - directory1.area += directory1.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.missb.name,"directory1MissB"); - directory1.missb.init_cache(&interface_ip); - directory1.missb.optimize_array(); - directory1.area += directory1.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.ifb.name,"directory1FillB"); - directory1.ifb.init_cache(&interface_ip); - directory1.ifb.optimize_array(); - directory1.area += directory1.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory1.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.prefetchb.name,"directory1PrefetchB"); - directory1.prefetchb.init_cache(&interface_ip); - directory1.prefetchb.optimize_array(); - directory1.area += directory1.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.wbb.name,"directoryWBB"); - directory1.wbb.init_cache(&interface_ip); - directory1.wbb.optimize_array(); - directory1.area += directory1.wbb.local_result.area; - } - - if (XML->sys.first_level_dir==1)//IC - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = int(ceil(XML->sys.domain_size/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1024; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(inv_dir.caches.name,"inv_dir"); - inv_dir.caches.init_cache(&interface_ip); - inv_dir.caches.optimize_array(); - inv_dir.area = inv_dir.caches.local_result.area; - - } -*/ -// //pipeline -// interface_ip.pipeline_stages = int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = directory.caches.l_ip.out_w + directory.caches.l_ip.tag_w ; -// pipeLogicDirectory.init_pipeline(is_default, &interface_ip); -// pipeLogicDirectory.compute_pipeline(); -// -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + pipeLogicDirectory.tot_stage_vector; -// clockNetwork.optimize_wire(); - -} - - -void SharedCache::computeEnergy(bool is_tdp) -{ - double homenode_data_access = (cachep.dir_ty==SBT)? 0.9:1.0; - if (is_tdp) - { - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - //init stats for Peak - unicache.caches->stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = .33*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.readAc.miss = 0; - homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; - homenode_stats_t.writeAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.writeAc.miss = 0; - homenode_stats_t.writeAc.hit = homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss; - homenode_tdp_stats = homenode_stats_t; - } - - unicache.missb->stats_t.readAc.access = unicache.missb->l_ip.num_search_ports; - unicache.missb->stats_t.writeAc.access = unicache.missb->l_ip.num_search_ports; - unicache.missb->tdp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.ifb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.ifb->tdp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.prefetchb->l_ip.num_search_ports; - unicache.prefetchb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.wbb->l_ip.num_search_ports; - unicache.wbb->stats_t.writeAc.access = unicache.wbb->l_ip.num_search_ports; - unicache.wbb->tdp_stats = unicache.wbb->stats_t; - } - else - { - unicache.caches->stats_t.readAc.access = unicache.caches->l_ip.num_search_ports*cachep.duty_cycle; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = 0; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - } - - } - else - { - //init stats for runtime power (RTP) - if (cacheL==L2) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L2[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L2[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L2[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L2[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L3) - { - unicache.caches->stats_t.readAc.access = XML->sys.L3[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L3[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L3[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L3[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L3[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L3[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L3[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L1Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L1Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L1Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L1Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L1Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - else if (cacheL==L2Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { //Assuming write back and write-allocate cache - - unicache.missb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss ; - unicache.missb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.wbb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - if (cachep.dir_ty==SBT) - { - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - } - unicache.wbb->rtp_stats = unicache.wbb->stats_t; - - } - - } - - unicache.power_t.reset(); - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.hit*unicache.caches->local_result.power.readOp.dynamic+ - unicache.caches->stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - unicache.caches->stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.writeOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic);//write miss will also generate a write later - - if (cachep.dir_ty==SBT) - { - unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * (unicache.caches->local_result.data_array2->power.readOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic) + - homenode_stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.hit*(unicache.caches->local_result.data_array2->power.writeOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - homenode_stats_t.writeAc.miss*unicache.caches->local_result.power.writeOp.dynamic);//write miss on dynamic home node will generate a replacement write on whole cache block - - - } - - unicache.power_t.readOp.dynamic += unicache.missb->stats_t.readAc.access*unicache.missb->local_result.power.searchOp.dynamic + - unicache.missb->stats_t.writeAc.access*unicache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - unicache.power_t.readOp.dynamic += unicache.ifb->stats_t.readAc.access*unicache.ifb->local_result.power.searchOp.dynamic + - unicache.ifb->stats_t.writeAc.access*unicache.ifb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.prefetchb->stats_t.readAc.access*unicache.prefetchb->local_result.power.searchOp.dynamic + - unicache.prefetchb->stats_t.writeAc.access*unicache.prefetchb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.wbb->stats_t.readAc.access*unicache.wbb->local_result.power.searchOp.dynamic + - unicache.wbb->stats_t.writeAc.access*unicache.wbb->local_result.power.writeOp.dynamic; - } - else - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.access*unicache.caches->local_result.power.searchOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - unicache.power = unicache.power_t + (unicache.caches->local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power = unicache.power+ - (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - power = power + unicache.power; -// cout<<"unicache.caches->local_result.power.readOp.dynamic"<local_result.power.readOp.dynamic<local_result.power.writeOp.dynamic"<local_result.power.writeOp.dynamic<local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - (unicache.rt_power = unicache.rt_power + - unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - rt_power = rt_power + unicache.rt_power; - } -} - -void SharedCache::displayEnergy(uint32_t indent,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << (XML->sys.Private_L2? indent_str:"")<< cachep.name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*cachep.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/cachep.executionTime << " W" << endl; - cout <sys.first_level_dir==1) -// { -// inv_dir.maxPower = inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size; -// cc.power.readOp.dynamic = inv_dir.maxPower*scktRatio*64/XML->sys.domain_size; -// cc.power.readOp.leakage = inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*64/XML->sys.domain_size; -// -// cc.area.set_area(inv_dir.area*64/XML->sys.domain_size); -// cout<<"CC area="<sys.number_of_dir_levels==2) -// { -// -// directory.maxPower = 0.0; -// directory.maxPower += (directory.caches.l_ip.num_rw_ports*(0.67*directory.caches.local_result.power.readOp.dynamic+0.33*directory.caches.local_result.power.writeOp.dynamic) -// +directory.caches.l_ip.num_rd_ports*directory.caches.local_result.power.readOp.dynamic+directory.caches.l_ip.num_wr_ports*directory.caches.local_result.power.writeOp.dynamic -// +directory.caches.l_ip.num_se_rd_ports*directory.caches.local_result.power.readOp.dynamic)*clockRate; -// ///cout<<"directory.maxPower=" <sys.L2[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - cachep.capacity = XML->sys.L2[ithCache].L2_config[0]; - cachep.blockW = XML->sys.L2[ithCache].L2_config[1]; - cachep.assoc = XML->sys.L2[ithCache].L2_config[2]; - cachep.nbanks = XML->sys.L2[ithCache].L2_config[3]; - cachep.throughput = XML->sys.L2[ithCache].L2_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2[ithCache].L2_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - } - else if (cacheL==L3) - { - cachep.name = "L3"; - cachep.clockRate = XML->sys.L3[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - cachep.capacity = XML->sys.L3[ithCache].L3_config[0]; - cachep.blockW = XML->sys.L3[ithCache].L3_config[1]; - cachep.assoc = XML->sys.L3[ithCache].L3_config[2]; - cachep.nbanks = XML->sys.L3[ithCache].L3_config[3]; - cachep.throughput = XML->sys.L3[ithCache].L3_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L3[ithCache].L3_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L3[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - } - else if (cacheL==L1Directory) - { - cachep.name = "First Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L1Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L1Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L1Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L1Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; - } - else if (cacheL==L2Directory) - { - cachep.name = "Second Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L2Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L2Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; - } - //cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35; -} - diff --git a/ext/mcpat/system.cc b/ext/mcpat/system.cc new file mode 100644 index 000000000..657f7f38d --- /dev/null +++ b/ext/mcpat/system.cc @@ -0,0 +1,350 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "array.h" +#include "basic_circuit.h" +#include "common.h" +#include "const.h" +#include "parameter.h" +#include "system.h" +#include "version.h" + +// TODO: Fix this constructor to default initialize all pointers to NULL +System::System(XMLNode* _xml_data) + : McPATComponent(_xml_data) { + int i; + int currCore = 0; + int currNOC = 0; + name = "System"; + set_proc_param(); + + // TODO: This loop can (and should) be called by every component in + // the hierarchy. Consider moving it to McPATComponent + int numChildren = xml_data->nChildNode("component"); + for (i = 0; i < numChildren; i++ ) { + // For each child node of the system, + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) { + warnMissingComponentType(childXML->getAttribute("id")); + + } STRCMP(type, "Core") { + // TODO: If homogeneous cores, and currCore > 0, just copy core 0 + children.push_back(new Core(childXML, currCore, &interface_ip)); + currCore++; + } STRCMP(type, "CacheUnit") { + children.push_back(new CacheUnit(childXML, &interface_ip)); + } STRCMP(type, "CacheController") { + // TODO: Remove reliance on interface_ip - there should be a better + // way to share global variables than passing, copying + children.push_back(new CacheController(childXML, &interface_ip)); + } STRCMP(type, "MemoryController") { + children.push_back(new MemoryController(childXML, &interface_ip)); + } STRCMP(type, "FlashController") { + children.push_back(new FlashController(childXML, &interface_ip)); + } STRCMP(type, "NIUController") { + children.push_back(new NIUController(childXML, &interface_ip)); + } STRCMP(type, "PCIeController") { + children.push_back(new PCIeController(childXML, &interface_ip)); + } STRCMP(type, "Memory") { + // TODO: + warnIncompleteComponentType(type); + } STRCMP(type, "OnChipNetwork") { + // TODO: Many of the parameters to this constructor should be + // handled in another way + children.push_back(new OnChipNetwork(childXML, currNOC, + &interface_ip)); + currNOC++; + warnIncompleteComponentType(type); + } STRCMP(type, "BusInterconnect") { + // TODO: Many of the parameters to this constructor should be + // handled in another way + children.push_back(new BusInterconnect(childXML, &interface_ip)); + warnIncompleteComponentType(type); + + // TODO: Add a directory data type that can handle the directories + // as defined by certain McScript output + } else { + warnUnrecognizedComponent(type); + } + } +} + +void System::displayDeviceType(int device_type_, uint32_t indent) { + string indent_str(indent, ' '); + cout << indent_str << "Device Type = "; + + switch ( device_type_ ) { + case 0: + cout << "ITRS high performance device type" << endl; + break; + case 1: + cout << "ITRS low standby power device type" << endl; + break; + case 2: + cout << "ITRS low operating power device type" << endl; + break; + case 3: + cout << "LP-DRAM device type" << endl; + break; + case 4: + cout << "COMM-DRAM device type" << endl; + break; + default: + cout << indent_str << "Unknown!" << endl; + exit(0); + } +} + +void System::displayInterconnectType(int interconnect_type_, uint32_t indent) { + string indent_str(indent, ' '); + cout << indent_str << "Interconnect metal projection = "; + + switch ( interconnect_type_ ) { + case 0: + cout << "aggressive interconnect technology projection" << endl; + break; + case 1: + cout << "conservative interconnect technology projection" << endl; + break; + default: + cout << indent_str << "Unknown!" << endl; + exit(0); + } +} + +// TODO: Migrate this down to the McPATComponent::displayData function +void System::displayData(uint32_t indent, int plevel) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + if (plevel < 5) { + cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") results (current print level is " + << plevel + << ", please increase print level to see the details in " + << "components) " << endl; + } else { + cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") results (current print level is 5)" + << endl; + } + + cout << "*****************************************************************" + << "************************" << endl; + cout << indent_str << "Technology " << core_tech_node << " nm" << endl; + if (longer_channel_device) + cout << indent_str << "Using Long Channel Devices When Appropriate" << endl; + displayInterconnectType(interconnect_projection_type, indent); + cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl; + cout << endl; + + cout << "*****************************************************************" + << "************************" << endl; + + McPATComponent::displayData(indent, plevel); +} + +void System::set_proc_param() { + // TODO: Consider creating a SystemParams class that tracks system-wide + // parameters like these + longer_channel_device = false; + core_tech_node = -1; + temperature = -1; + interconnect_projection_type = -1; + device_type = -1; + physical_address_width = -1; + + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("core_tech_node", core_tech_node); + ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate); + ASSIGN_INT_IF("temperature", temperature); + ASSIGN_INT_IF("device_type", device_type); + ASSIGN_INT_IF("longer_channel_device", longer_channel_device); + ASSIGN_INT_IF("interconnect_projection_type", + interconnect_projection_type); + ASSIGN_INT_IF("machine_bits", data_path_width); + ASSIGN_INT_IF("virtual_address_width", virtual_address_width); + ASSIGN_INT_IF("physical_address_width", physical_address_width); + ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size); + ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type); + ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type); + ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt); + ASSIGN_INT_IF("area_wt", interface_ip.area_wt); + ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt); + ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt); + ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt); + ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev); + ASSIGN_INT_IF("area_dev", interface_ip.area_dev); + ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev); + ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev); + ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev); + ASSIGN_INT_IF("ed", interface_ip.ed); + ASSIGN_INT_IF("burst_len", interface_ip.burst_len); + ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w); + ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits); + ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool); + ASSIGN_INT_IF("ver_htree_wires_over_array", + interface_ip.ver_htree_wires_over_array); + ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees", + interface_ip.broadcast_addr_din_over_ver_htrees); + ASSIGN_INT_IF("nuca", interface_ip.nuca); + ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count); + ASSIGN_ENUM_IF("force_cache_config", + interface_ip.force_cache_config, bool); + ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type); + ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype); + ASSIGN_INT_IF("print_detail", interface_ip.print_detail); + ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + target_core_clockrate *= 1e6; + interconnect_projection_type = + (interconnect_projection_type == 0) ? 0 : 1; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("total_cycles", total_cycles); + + else { + warnUnrecognizedStat(node_name); + } + } + + if (temperature < 0) { + errorUnspecifiedParam("temperature"); + } + + if (core_tech_node < 0) { + errorUnspecifiedParam("core_tech_node"); + } + + if (interconnect_projection_type < 0) { + errorUnspecifiedParam("interconnect_projection_type"); + } + + if (device_type < 0) { + errorUnspecifiedParam("device_type"); + } + + if (physical_address_width <= 0) { + errorNonPositiveParam("physical_address_width"); + } + + if (data_path_width <= 0) { + errorNonPositiveParam("machine_bits"); + } + + if (total_cycles <= 0) { + fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ", + "power numbers will be funky...\n"); + } + + clockRate = target_core_clockrate; + execution_time = total_cycles / (target_core_clockrate); + + /* Basic parameters*/ + interface_ip.data_arr_ram_cell_tech_type = device_type; + interface_ip.data_arr_peri_global_tech_type = device_type; + interface_ip.tag_arr_ram_cell_tech_type = device_type; + interface_ip.tag_arr_peri_global_tech_type = device_type; + + interface_ip.ic_proj_type = interconnect_projection_type; + interface_ip.temp = temperature; + interface_ip.F_sz_nm = core_tech_node; + interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; + interface_ip.is_main_mem = false; + + // These are there just to make CACTI's error_checking() happy. + // They are either not actually used or overwritten by each component. + interface_ip.cache_sz = MIN_BUFFER_SIZE; + interface_ip.nbanks = 1; + interface_ip.out_w = 0; + interface_ip.line_sz = 1; + interface_ip.assoc = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_search_ports = 1; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + + + //This section of code does not have real meaning; it is just to ensure + //all data will have initial value to prevent errors. + //They will be overridden during each components initialization + interface_ip.specific_tag = 1; + interface_ip.tag_w = 64; + interface_ip.access_mode = 2; + + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; +} + +System::~System() { + // TODO: Delete children... do this in McPATComponent +}; diff --git a/ext/mcpat/processor.h b/ext/mcpat/system.h similarity index 68% rename from ext/mcpat/processor.h rename to ext/mcpat/system.h index 5a7a2f7f5..d2e263720 100644 --- a/ext/mcpat/processor.h +++ b/ext/mcpat/system.h @@ -1,7 +1,7 @@ /***************************************************************************** * McPAT * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,19 +25,23 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert * ***************************************************************************/ -#ifndef PROCESSOR_H_ -#define PROCESSOR_H_ -#include +#ifndef SYSTEM_H_ +#define SYSTEM_H_ -#include "XML_Parse.h" #include "arbiter.h" #include "area.h" #include "array.h" #include "basic_components.h" +#include "bus_interconnect.h" +#include "cachecontroller.h" +#include "cacheunit.h" #include "core.h" #include "decoder.h" #include "iocontrollers.h" @@ -45,35 +49,23 @@ #include "noc.h" #include "parameter.h" #include "router.h" -#include "sharedcache.h" -class Processor : public Component -{ - public: - ParseXML *XML; - vector cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; - vector nocs; - MemoryController * mc; - NIUController * niu; - PCIeController * pcie; - FlashController * flashcontroller; +class System : public McPATComponent { +public: InputParameter interface_ip; - ProcParam procdynp; - //wire globalInterconnect; - //clock_network globalClock; - Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface); - void compute(); + + int device_type; + double core_tech_node; + int interconnect_projection_type; + int temperature; + + System(XMLNode* _xml_data); void set_proc_param(); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + // TODO: make this recursively compute energy on subcomponents + void displayData(uint32_t indent = 0, int plevel = 100); void displayDeviceType(int device_type_, uint32_t indent = 0); void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); - ~Processor(); + ~System(); }; -#endif /* PROCESSOR_H_ */ +#endif /* SYSTEM_H_ */ diff --git a/ext/mcpat/technology_xeon_core.cc b/ext/mcpat/technology_xeon_core.cc deleted file mode 100644 index 4e60edc1b..000000000 --- a/ext/mcpat/technology_xeon_core.cc +++ /dev/null @@ -1,2772 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - - -#include "basic_circuit.h" - -#include "parameter.h" - -double wire_resistance(double resistivity, double wire_width, double wire_thickness, - double barrier_thickness, double dishing_thickness, double alpha_scatter) -{ - double resistance; - resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness)); - return(resistance); -} - -double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing, - double ild_thickness, double miller_value, double horiz_dielectric_constant, - double vert_dielectric_constant, double fringe_cap) -{ - double vertical_cap, sidewall_cap, total_cap; - vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness; - sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing; - total_cap = vertical_cap + sidewall_cap + fringe_cap; - return(total_cap); -} - - -void init_tech_params(double technology, bool is_tag) -{ - int iter, tech, tech_lo, tech_hi; - double curr_alpha, curr_vpp; - double wire_width, wire_thickness, wire_spacing, - fringe_cap, pmos_to_nmos_sizing_r; -// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant; - double barrier_thickness, dishing_thickness, alpha_scatter; - double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell; - - uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; - - technology = technology * 1000.0; // in the unit of nm - - // initialize parameters - g_tp.reset(); - double gmp_to_gmn_multiplier_periph_global = 0; - - double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, - curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, - curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, - curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; - double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data - curr_asp_ratio_cell_cam; - double SENSE_AMP_D, SENSE_AMP_P; // J - double area_cell_dram = 0; - double asp_ratio_cell_dram = 0; - double area_cell_sram = 0; - double asp_ratio_cell_sram = 0; - double area_cell_cam = 0; - double asp_ratio_cell_cam = 0; - double mobility_eff_periph_global = 0; - double Vdsat_periph_global = 0; - double nmos_effective_resistance_multiplier; - double width_dram_access_transistor; - - double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date - double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn - double curr_chip_layout_overhead = 0; - double curr_macro_layout_overhead = 0; - double curr_sckt_co_eff = 0; - - if (technology < 91 && technology > 89) - { - tech_lo = 90; - tech_hi = 90; - } - else if (technology < 66 && technology > 64) - { - tech_lo = 65; - tech_hi = 65; - } - else if (technology < 46 && technology > 44) - { - tech_lo = 45; - tech_hi = 45; - } - else if (technology < 33 && technology > 31) - { - tech_lo = 32; - tech_hi = 32; - } - else if (technology < 23 && technology > 21) - { - tech_lo = 22; - tech_hi = 22; - if (ram_cell_tech_type == 3) - { - cout<<"current version does not support eDRAM technologies at 22nm"< 15) -// { -// tech_lo = 16; -// tech_hi = 16; -// } - else if (technology < 90 && technology > 65) - { - tech_lo = 90; - tech_hi = 65; - } - else if (technology < 65 && technology > 45) - { - tech_lo = 65; - tech_hi = 45; - } - else if (technology < 45 && technology > 32) - { - tech_lo = 45; - tech_hi = 32; - } - else if (technology < 32 && technology > 22) - { - tech_lo = 32; - tech_hi = 22; - } -// else if (technology < 22 && technology > 16) -// { -// tech_lo = 22; -// tech_hi = 16; -// } - else - { - cout<<"Invalid technology nodes"<F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1; - curr_core_tx_density = 1.25*0.7*0.7; - curr_sckt_co_eff = 1.1539; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - - - } - - if (tech == 65) - { //65nm technology-node. Corresponds to year 2007 in ITRS - //ITRS HP device type -// SENSE_AMP_D = .2e-9; // s -// SENSE_AMP_P = 5.7e-15; // J -// vdd[0] = 1.1; -// Lphy[0] = 0.025; -// Lelec[0] = 0.019; -// t_ox[0] = 1.1e-3; -// v_th[0] = .19491; -// c_ox[0] = 1.88e-14; -// mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); -// Vdsat[0] = 7.71e-2; -// c_g_ideal[0] = 4.69e-16; -// c_fringe[0] = 0.077e-15; -// c_junc[0] = 1e-15; -// I_on_n[0] = 1197.2e-6; -// I_on_p[0] = 870.8e-6; -// nmos_effective_resistance_multiplier = 1.50; -// n_to_p_eff_curr_drv_ratio[0] = 2.41; -// gmp_to_gmn_multiplier[0] = 1.38; -// Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; -// Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; -// long_channel_leakage_reduction[0] = 1/3.74; -// //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first -// //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. -// I_off_n[0][0] = 1.96e-7; -// I_off_n[0][10] = 2.29e-7; -// I_off_n[0][20] = 2.66e-7; -// I_off_n[0][30] = 3.05e-7; -// I_off_n[0][40] = 3.49e-7; -// I_off_n[0][50] = 3.95e-7; -// I_off_n[0][60] = 4.45e-7; -// I_off_n[0][70] = 4.97e-7; -// I_off_n[0][80] = 5.48e-7; -// I_off_n[0][90] = 5.94e-7; -// I_off_n[0][100] = 6.3e-7; -// I_g_on_n[0][0] = 4.09e-8;//A/micron -// I_g_on_n[0][10] = 4.09e-8; -// I_g_on_n[0][20] = 4.09e-8; -// I_g_on_n[0][30] = 4.09e-8; -// I_g_on_n[0][40] = 4.09e-8; -// I_g_on_n[0][50] = 4.09e-8; -// I_g_on_n[0][60] = 4.09e-8; -// I_g_on_n[0][70] = 4.09e-8; -// I_g_on_n[0][80] = 4.09e-8; -// I_g_on_n[0][90] = 4.09e-8; -// I_g_on_n[0][100] = 4.09e-8; - - SENSE_AMP_D = .2e-9; // s - SENSE_AMP_P = 5.7e-15; // J - vdd[0] = 1.25; - Lphy[0] = 0.025; - Lelec[0] = 0.019; - t_ox[0] = 1.1e-3; - v_th[0] = .12491; - c_ox[0] = 1.88e-14; - mobility_eff[0] = 409.31 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.08e-2; - c_g_ideal[0] = 4.72e-16; - c_fringe[0] = 0.08e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 1486.4e-6; - I_on_p[0] = 1131.5e-6; - nmos_effective_resistance_multiplier = 1.57; - n_to_p_eff_curr_drv_ratio[0] = 2; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1.0/4.97; - //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first - //Ioff(Lgate normal)/Ioff(Lgate long)= 4.97@Vdd=1.25; (3.74@Vdd=1.1), however, Intel paper suggest the reduction factor is 3. - I_off_n[0][0] = 8.62e-7; - I_off_n[0][10] = 9.08e-7; - I_off_n[0][20] = 9.55e-7; - I_off_n[0][30] = 1.00e-6; - I_off_n[0][40] = 1.05e-6; - I_off_n[0][50] = 1.09e-6; - I_off_n[0][60] = 1.14e-6; - I_off_n[0][70] = 1.18e-6; - I_off_n[0][80] = 1.23e-6; - I_off_n[0][90] = 1.27e-6; - I_off_n[0][100] = 1.31e-6; - - - I_g_on_n[0][0] = 7.02e-8;//A/micron - I_g_on_n[0][10] = 7.02e-8; - I_g_on_n[0][20] = 7.02e-8; - I_g_on_n[0][30] = 7.02e-8; - I_g_on_n[0][40] = 7.02e-8; - I_g_on_n[0][50] = 7.02e-8; - I_g_on_n[0][60] = 7.02e-8; - I_g_on_n[0][70] = 7.02e-8; - I_g_on_n[0][80] = 7.02e-8; - I_g_on_n[0][90] = 7.02e-8; - I_g_on_n[0][100] = 7.02e-8; - - //ITRS LSTP device type - vdd[1] = 1.2; - Lphy[1] = 0.045; - Lelec[1] = 0.0298; - t_ox[1] = 1.9e-3; - v_th[1] = 0.52354; - c_ox[1] = 1.36e-14; - mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.128; - c_g_ideal[1] = 6.14e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 519.2e-6; - I_on_p[1] = 266e-6; - nmos_effective_resistance_multiplier = 1.96; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.82; - I_off_n[1][0] = 9.12e-12; - I_off_n[1][10] = 1.49e-11; - I_off_n[1][20] = 2.36e-11; - I_off_n[1][30] = 3.64e-11; - I_off_n[1][40] = 5.48e-11; - I_off_n[1][50] = 8.05e-11; - I_off_n[1][60] = 1.15e-10; - I_off_n[1][70] = 1.59e-10; - I_off_n[1][80] = 2.1e-10; - I_off_n[1][90] = 2.62e-10; - I_off_n[1][100] = 3.21e-10; - - I_g_on_n[1][0] = 1.09e-10;//A/micron - I_g_on_n[1][10] = 1.09e-10; - I_g_on_n[1][20] = 1.09e-10; - I_g_on_n[1][30] = 1.09e-10; - I_g_on_n[1][40] = 1.09e-10; - I_g_on_n[1][50] = 1.09e-10; - I_g_on_n[1][60] = 1.09e-10; - I_g_on_n[1][70] = 1.09e-10; - I_g_on_n[1][80] = 1.09e-10; - I_g_on_n[1][90] = 1.09e-10; - I_g_on_n[1][100] = 1.09e-10; - - //ITRS LOP device type - vdd[2] = 0.8; - Lphy[2] = 0.032; - Lelec[2] = 0.0216; - t_ox[2] = 1.2e-3; - v_th[2] = 0.28512; - c_ox[2] = 1.87e-14; - mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.292; - c_g_ideal[2] = 6e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 573.1e-6; - I_on_p[2] = 340.6e-6; - nmos_effective_resistance_multiplier = 1.82; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/2.05; - I_off_n[2][0] = 4.9e-9; - I_off_n[2][10] = 6.49e-9; - I_off_n[2][20] = 8.45e-9; - I_off_n[2][30] = 1.08e-8; - I_off_n[2][40] = 1.37e-8; - I_off_n[2][50] = 1.71e-8; - I_off_n[2][60] = 2.09e-8; - I_off_n[2][70] = 2.48e-8; - I_off_n[2][80] = 2.84e-8; - I_off_n[2][90] = 3.13e-8; - I_off_n[2][100] = 3.42e-8; - - I_g_on_n[2][0] = 9.61e-9;//A/micron - I_g_on_n[2][10] = 9.61e-9; - I_g_on_n[2][20] = 9.61e-9; - I_g_on_n[2][30] = 9.61e-9; - I_g_on_n[2][40] = 9.61e-9; - I_g_on_n[2][50] = 9.61e-9; - I_g_on_n[2][60] = 9.61e-9; - I_g_on_n[2][70] = 9.61e-9; - I_g_on_n[2][80] = 9.61e-9; - I_g_on_n[2][90] = 9.61e-9; - I_g_on_n[2][100] = 9.61e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.43806; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.11; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.43806; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.43806; - c_g_ideal[3] = 1.46e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 399.8e-6; - I_on_p[3] = 243.4e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.23e-11; - I_off_n[3][10] = 3.46e-11; - I_off_n[3][20] = 5.24e-11; - I_off_n[3][30] = 7.75e-11; - I_off_n[3][40] = 1.12e-10; - I_off_n[3][50] = 1.58e-10; - I_off_n[3][60] = 2.18e-10; - I_off_n[3][70] = 2.88e-10; - I_off_n[3][80] = 3.63e-10; - I_off_n[3][90] = 4.41e-10; - I_off_n[3][100] = 5.36e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.3; - Lphy[3] = 0.065; - Lelec[3] = 0.0426; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.065; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.065*0.065; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.3; - t_ox[3] = 5e-3; - v_th[3] = 1.0; - c_ox[3] = 6.16e-15; - mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.385; - c_g_ideal[3] = 4e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 1031e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 2.39; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.80e-14; - I_off_n[3][10] = 3.64e-14; - I_off_n[3][20] = 7.03e-14; - I_off_n[3][30] = 1.31e-13; - I_off_n[3][40] = 2.35e-13; - I_off_n[3][50] = 4.09e-13; - I_off_n[3][60] = 6.89e-13; - I_off_n[3][70] = 1.13e-12; - I_off_n[3][80] = 1.78e-12; - I_off_n[3][90] = 2.71e-12; - I_off_n[3][100] = 3.99e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7; - curr_core_tx_density = 1.25*0.7; - curr_sckt_co_eff = 1.1359; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if (tech == 45) - { //45nm technology-node. Corresponds to year 2010 in ITRS - //ITRS HP device type - SENSE_AMP_D = .04e-9; // s - SENSE_AMP_P = 2.7e-15; // J - vdd[0] = 1.0; - Lphy[0] = 0.018; - Lelec[0] = 0.01345; - t_ox[0] = 0.65e-3; - v_th[0] = .18035; - c_ox[0] = 3.77e-14; - mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.38E-2; - c_g_ideal[0] = 6.78e-16; - c_fringe[0] = 0.05e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2046.6e-6; - //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of - //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm - I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI - nmos_effective_resistance_multiplier = 1.51; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 - I_off_n[0][0] = 2.8e-7; - I_off_n[0][10] = 3.28e-7; - I_off_n[0][20] = 3.81e-7; - I_off_n[0][30] = 4.39e-7; - I_off_n[0][40] = 5.02e-7; - I_off_n[0][50] = 5.69e-7; - I_off_n[0][60] = 6.42e-7; - I_off_n[0][70] = 7.2e-7; - I_off_n[0][80] = 8.03e-7; - I_off_n[0][90] = 8.91e-7; - I_off_n[0][100] = 9.84e-7; - - I_g_on_n[0][0] = 3.59e-8;//A/micron - I_g_on_n[0][10] = 3.59e-8; - I_g_on_n[0][20] = 3.59e-8; - I_g_on_n[0][30] = 3.59e-8; - I_g_on_n[0][40] = 3.59e-8; - I_g_on_n[0][50] = 3.59e-8; - I_g_on_n[0][60] = 3.59e-8; - I_g_on_n[0][70] = 3.59e-8; - I_g_on_n[0][80] = 3.59e-8; - I_g_on_n[0][90] = 3.59e-8; - I_g_on_n[0][100] = 3.59e-8; - - //ITRS LSTP device type - vdd[1] = 1.1; - Lphy[1] = 0.028; - Lelec[1] = 0.0212; - t_ox[1] = 1.4e-3; - v_th[1] = 0.50245; - c_ox[1] = 2.01e-14; - mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 9.12e-2; - c_g_ideal[1] = 5.18e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 666.2e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.08; - I_off_n[1][0] = 1.01e-11; - I_off_n[1][10] = 1.65e-11; - I_off_n[1][20] = 2.62e-11; - I_off_n[1][30] = 4.06e-11; - I_off_n[1][40] = 6.12e-11; - I_off_n[1][50] = 9.02e-11; - I_off_n[1][60] = 1.3e-10; - I_off_n[1][70] = 1.83e-10; - I_off_n[1][80] = 2.51e-10; - I_off_n[1][90] = 3.29e-10; - I_off_n[1][100] = 4.1e-10; - - I_g_on_n[1][0] = 9.47e-12;//A/micron - I_g_on_n[1][10] = 9.47e-12; - I_g_on_n[1][20] = 9.47e-12; - I_g_on_n[1][30] = 9.47e-12; - I_g_on_n[1][40] = 9.47e-12; - I_g_on_n[1][50] = 9.47e-12; - I_g_on_n[1][60] = 9.47e-12; - I_g_on_n[1][70] = 9.47e-12; - I_g_on_n[1][80] = 9.47e-12; - I_g_on_n[1][90] = 9.47e-12; - I_g_on_n[1][100] = 9.47e-12; - - //ITRS LOP device type - vdd[2] = 0.7; - Lphy[2] = 0.022; - Lelec[2] = 0.016; - t_ox[2] = 0.9e-3; - v_th[2] = 0.22599; - c_ox[2] = 2.82e-14;//F/micron2 - mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 5.71e-2; - c_g_ideal[2] = 6.2e-16; - c_fringe[2] = 0.073e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 748.9e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.76; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.92; - I_off_n[2][0] = 4.03e-9; - I_off_n[2][10] = 5.02e-9; - I_off_n[2][20] = 6.18e-9; - I_off_n[2][30] = 7.51e-9; - I_off_n[2][40] = 9.04e-9; - I_off_n[2][50] = 1.08e-8; - I_off_n[2][60] = 1.27e-8; - I_off_n[2][70] = 1.47e-8; - I_off_n[2][80] = 1.66e-8; - I_off_n[2][90] = 1.84e-8; - I_off_n[2][100] = 2.03e-8; - - I_g_on_n[2][0] = 3.24e-8;//A/micron - I_g_on_n[2][10] = 4.01e-8; - I_g_on_n[2][20] = 4.90e-8; - I_g_on_n[2][30] = 5.92e-8; - I_g_on_n[2][40] = 7.08e-8; - I_g_on_n[2][50] = 8.38e-8; - I_g_on_n[2][60] = 9.82e-8; - I_g_on_n[2][70] = 1.14e-7; - I_g_on_n[2][80] = 1.29e-7; - I_g_on_n[2][90] = 1.43e-7; - I_g_on_n[2][100] = 1.54e-7; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.078; - Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44559; - width_dram_access_transistor = 0.079; - curr_I_on_dram_cell = 36e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2.1e-3; - v_th[3] = 0.44559; - c_ox[3] = 1.41e-14; - mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.181; - c_g_ideal[3] = 1.10e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 456e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.54e-11; - I_off_n[3][10] = 3.94e-11; - I_off_n[3][20] = 5.95e-11; - I_off_n[3][30] = 8.79e-11; - I_off_n[3][40] = 1.27e-10; - I_off_n[3][50] = 1.79e-10; - I_off_n[3][60] = 2.47e-10; - I_off_n[3][70] = 3.31e-10; - I_off_n[3][80] = 4.26e-10; - I_off_n[3][90] = 5.27e-10; - I_off_n[3][100] = 6.46e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.045; - Lelec[3] = 0.0298; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.045; - curr_I_on_dram_cell = 20e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.045*0.045; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.7; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.98e-15; - mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.147; - c_g_ideal[3] = 3.59e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 999.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.31e-14; - I_off_n[3][10] = 2.68e-14; - I_off_n[3][20] = 5.25e-14; - I_off_n[3][30] = 9.88e-14; - I_off_n[3][40] = 1.79e-13; - I_off_n[3][50] = 3.15e-13; - I_off_n[3][60] = 5.36e-13; - I_off_n[3][70] = 8.86e-13; - I_off_n[3][80] = 1.42e-12; - I_off_n[3][90] = 2.20e-12; - I_off_n[3][100] = 3.29e-12; - } - - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7; - curr_core_tx_density = 1.25; - curr_sckt_co_eff = 1.1387; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if (tech == 32) - { - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm - //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for - //HP and LSTP. - vdd[0] = 0.9; - Lphy[0] = 0.013; - Lelec[0] = 0.01013; - t_ox[0] = 0.5e-3; - v_th[0] = 0.21835; - c_ox[0] = 4.11e-14; - mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 5.09E-2; - c_g_ideal[0] = 5.34e-16; - c_fringe[0] = 0.04e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2211.7e-6; - I_on_p[0] = I_on_n[0] / 2; - nmos_effective_resistance_multiplier = 1.49; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.706; - //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), - //whichever comes first - I_off_n[0][0] = 1.52e-7; - I_off_n[0][10] = 1.55e-7; - I_off_n[0][20] = 1.59e-7; - I_off_n[0][30] = 1.68e-7; - I_off_n[0][40] = 1.90e-7; - I_off_n[0][50] = 2.69e-7; - I_off_n[0][60] = 5.32e-7; - I_off_n[0][70] = 1.02e-6; - I_off_n[0][80] = 1.62e-6; - I_off_n[0][90] = 2.73e-6; - I_off_n[0][100] = 6.1e-6; - - I_g_on_n[0][0] = 6.55e-8;//A/micron - I_g_on_n[0][10] = 6.55e-8; - I_g_on_n[0][20] = 6.55e-8; - I_g_on_n[0][30] = 6.55e-8; - I_g_on_n[0][40] = 6.55e-8; - I_g_on_n[0][50] = 6.55e-8; - I_g_on_n[0][60] = 6.55e-8; - I_g_on_n[0][70] = 6.55e-8; - I_g_on_n[0][80] = 6.55e-8; - I_g_on_n[0][90] = 6.55e-8; - I_g_on_n[0][100] = 6.55e-8; - -// 32 DG -// I_g_on_n[0][0] = 2.71e-9;//A/micron -// I_g_on_n[0][10] = 2.71e-9; -// I_g_on_n[0][20] = 2.71e-9; -// I_g_on_n[0][30] = 2.71e-9; -// I_g_on_n[0][40] = 2.71e-9; -// I_g_on_n[0][50] = 2.71e-9; -// I_g_on_n[0][60] = 2.71e-9; -// I_g_on_n[0][70] = 2.71e-9; -// I_g_on_n[0][80] = 2.71e-9; -// I_g_on_n[0][90] = 2.71e-9; -// I_g_on_n[0][100] = 2.71e-9; - - //LSTP device type - vdd[1] = 1; - Lphy[1] = 0.020; - Lelec[1] = 0.0173; - t_ox[1] = 1.2e-3; - v_th[1] = 0.513; - c_ox[1] = 2.29e-14; - mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 8.64e-2; - c_g_ideal[1] = 4.58e-16; - c_fringe[1] = 0.053e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 683.6e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/1.93; - I_off_n[1][0] = 2.06e-11; - I_off_n[1][10] = 3.30e-11; - I_off_n[1][20] = 5.15e-11; - I_off_n[1][30] = 7.83e-11; - I_off_n[1][40] = 1.16e-10; - I_off_n[1][50] = 1.69e-10; - I_off_n[1][60] = 2.40e-10; - I_off_n[1][70] = 3.34e-10; - I_off_n[1][80] = 4.54e-10; - I_off_n[1][90] = 5.96e-10; - I_off_n[1][100] = 7.44e-10; - - I_g_on_n[1][0] = 3.73e-11;//A/micron - I_g_on_n[1][10] = 3.73e-11; - I_g_on_n[1][20] = 3.73e-11; - I_g_on_n[1][30] = 3.73e-11; - I_g_on_n[1][40] = 3.73e-11; - I_g_on_n[1][50] = 3.73e-11; - I_g_on_n[1][60] = 3.73e-11; - I_g_on_n[1][70] = 3.73e-11; - I_g_on_n[1][80] = 3.73e-11; - I_g_on_n[1][90] = 3.73e-11; - I_g_on_n[1][100] = 3.73e-11; - - - //LOP device type - vdd[2] = 0.6; - Lphy[2] = 0.016; - Lelec[2] = 0.01232; - t_ox[2] = 0.9e-3; - v_th[2] = 0.24227; - c_ox[2] = 2.84e-14; - mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 4.64e-2; - c_g_ideal[2] = 4.54e-16; - c_fringe[2] = 0.057e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 827.8e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.89; - I_off_n[2][0] = 5.94e-8; - I_off_n[2][10] = 7.23e-8; - I_off_n[2][20] = 8.7e-8; - I_off_n[2][30] = 1.04e-7; - I_off_n[2][40] = 1.22e-7; - I_off_n[2][50] = 1.43e-7; - I_off_n[2][60] = 1.65e-7; - I_off_n[2][70] = 1.90e-7; - I_off_n[2][80] = 2.15e-7; - I_off_n[2][90] = 2.39e-7; - I_off_n[2][100] = 2.63e-7; - - I_g_on_n[2][0] = 2.93e-9;//A/micron - I_g_on_n[2][10] = 2.93e-9; - I_g_on_n[2][20] = 2.93e-9; - I_g_on_n[2][30] = 2.93e-9; - I_g_on_n[2][40] = 2.93e-9; - I_g_on_n[2][50] = 2.93e-9; - I_g_on_n[2][60] = 2.93e-9; - I_g_on_n[2][70] = 2.93e-9; - I_g_on_n[2][80] = 2.93e-9; - I_g_on_n[2][90] = 2.93e-9; - I_g_on_n[2][100] = 2.93e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.056; - Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44129; - width_dram_access_transistor = 0.056; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2e-3; - v_th[3] = 0.44467; - c_ox[3] = 1.48e-14; - mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.174; - c_g_ideal[3] = 7.45e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1055.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.57e-11; - I_off_n[3][10] = 5.51e-11; - I_off_n[3][20] = 8.27e-11; - I_off_n[3][30] = 1.21e-10; - I_off_n[3][40] = 1.74e-10; - I_off_n[3][50] = 2.45e-10; - I_off_n[3][60] = 3.38e-10; - I_off_n[3][70] = 4.53e-10; - I_off_n[3][80] = 5.87e-10; - I_off_n[3][90] = 7.29e-10; - I_off_n[3][100] = 8.87e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.032; - Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.032; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.032*0.032; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.6; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.99e-15; - mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.129; - c_g_ideal[3] = 2.56e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1024.5e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.63e-14; - I_off_n[3][10] = 7.18e-14; - I_off_n[3][20] = 1.36e-13; - I_off_n[3][30] = 2.49e-13; - I_off_n[3][40] = 4.41e-13; - I_off_n[3][50] = 7.55e-13; - I_off_n[3][60] = 1.26e-12; - I_off_n[3][70] = 2.03e-12; - I_off_n[3][80] = 3.19e-12; - I_off_n[3][90] = 4.87e-12; - I_off_n[3][100] = 7.16e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7; - curr_sckt_co_eff = 1.1111; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 22){ - //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm - //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. - //22 nm HP - vdd[0] = 0.8; - Lphy[0] = 0.009;//Lphy is the physical gate-length. - Lelec[0] = 0.00468;//Lelec is the electrical gate-length. - t_ox[0] = 0.55e-3;//micron - v_th[0] = 0.1395;//V - c_ox[0] = 3.63e-14;//F/micron2 - mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 2.33e-2; //V/micron - c_g_ideal[0] = 3.27e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron - c_junc[0] = 0;//F/micron2 - I_on_n[0] = 2626.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.45; - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.274; - I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there. - I_off_n[0][10] = 1.55e-7/1.5*1.2; - I_off_n[0][20] = 1.59e-7/1.5*1.2; - I_off_n[0][30] = 1.68e-7/1.5*1.2; - I_off_n[0][40] = 1.90e-7/1.5*1.2; - I_off_n[0][50] = 2.69e-7/1.5*1.2; - I_off_n[0][60] = 5.32e-7/1.5*1.2; - I_off_n[0][70] = 1.02e-6/1.5*1.2; - I_off_n[0][80] = 1.62e-6/1.5*1.2; - I_off_n[0][90] = 2.73e-6/1.5*1.2; - I_off_n[0][100] = 6.1e-6/1.5*1.2; - //for 22nm DG HP - I_g_on_n[0][0] = 1.81e-9;//A/micron - I_g_on_n[0][10] = 1.81e-9; - I_g_on_n[0][20] = 1.81e-9; - I_g_on_n[0][30] = 1.81e-9; - I_g_on_n[0][40] = 1.81e-9; - I_g_on_n[0][50] = 1.81e-9; - I_g_on_n[0][60] = 1.81e-9; - I_g_on_n[0][70] = 1.81e-9; - I_g_on_n[0][80] = 1.81e-9; - I_g_on_n[0][90] = 1.81e-9; - I_g_on_n[0][100] = 1.81e-9; - - //22 nm LSTP DG - vdd[1] = 0.8; - Lphy[1] = 0.014; - Lelec[1] = 0.008;//Lelec is the electrical gate-length. - t_ox[1] = 1.1e-3;//micron - v_th[1] = 0.40126;//V - c_ox[1] = 2.30e-14;//F/micron2 - mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[1] = 6.64e-2; //V/micron - c_g_ideal[1] = 3.22e-16;//F/micron - c_fringe[1] = 0.08e-15; - c_junc[1] = 0;//F/micron2 - I_on_n[1] = 727.6e-6;//A/micron - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron - long_channel_leakage_reduction[1] = 1/1.89; - I_off_n[1][0] = 2.43e-11; - I_off_n[1][10] = 4.85e-11; - I_off_n[1][20] = 9.68e-11; - I_off_n[1][30] = 1.94e-10; - I_off_n[1][40] = 3.87e-10; - I_off_n[1][50] = 7.73e-10; - I_off_n[1][60] = 3.55e-10; - I_off_n[1][70] = 3.09e-9; - I_off_n[1][80] = 6.19e-9; - I_off_n[1][90] = 1.24e-8; - I_off_n[1][100]= 2.48e-8; - - I_g_on_n[1][0] = 4.51e-10;//A/micron - I_g_on_n[1][10] = 4.51e-10; - I_g_on_n[1][20] = 4.51e-10; - I_g_on_n[1][30] = 4.51e-10; - I_g_on_n[1][40] = 4.51e-10; - I_g_on_n[1][50] = 4.51e-10; - I_g_on_n[1][60] = 4.51e-10; - I_g_on_n[1][70] = 4.51e-10; - I_g_on_n[1][80] = 4.51e-10; - I_g_on_n[1][90] = 4.51e-10; - I_g_on_n[1][100] = 4.51e-10; - - //22 nm LOP - vdd[2] = 0.6; - Lphy[2] = 0.011; - Lelec[2] = 0.00604;//Lelec is the electrical gate-length. - t_ox[2] = 0.8e-3;//micron - v_th[2] = 0.2315;//V - c_ox[2] = 2.87e-14;//F/micron2 - mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[2] = 1.81e-2; //V/micron - c_g_ideal[2] = 3.16e-16;//F/micron - c_fringe[2] = 0.08e-15; - c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab - I_on_n[2] = 916.1e-6;//A/micron - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron - long_channel_leakage_reduction[2] = 1/2.38; - - I_off_n[2][0] = 1.31e-8; - I_off_n[2][10] = 2.60e-8; - I_off_n[2][20] = 5.14e-8; - I_off_n[2][30] = 1.02e-7; - I_off_n[2][40] = 2.02e-7; - I_off_n[2][50] = 3.99e-7; - I_off_n[2][60] = 7.91e-7; - I_off_n[2][70] = 1.09e-6; - I_off_n[2][80] = 2.09e-6; - I_off_n[2][90] = 4.04e-6; - I_off_n[2][100]= 4.48e-6; - - I_g_on_n[2][0] = 2.74e-9;//A/micron - I_g_on_n[2][10] = 2.74e-9; - I_g_on_n[2][20] = 2.74e-9; - I_g_on_n[2][30] = 2.74e-9; - I_g_on_n[2][40] = 2.74e-9; - I_g_on_n[2][50] = 2.74e-9; - I_g_on_n[2][60] = 2.74e-9; - I_g_on_n[2][70] = 2.74e-9; - I_g_on_n[2][80] = 2.74e-9; - I_g_on_n[2][90] = 2.74e-9; - I_g_on_n[2][100] = 2.74e-9; - - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 16){ - //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm - //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. - //16 nm HP - vdd[0] = 0.7; - Lphy[0] = 0.006;//Lphy is the physical gate-length. - Lelec[0] = 0.00315;//Lelec is the electrical gate-length. - t_ox[0] = 0.5e-3;//micron - v_th[0] = 0.1489;//V - c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR - mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet - c_g_ideal[0] = 2.30e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 - c_junc[0] = 0;//F/micron2 MASTAR result dynamic - I_on_n[0] = 2768.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/2.655; - I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07; - I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07; - I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07; - I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07; - I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07; - I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07; - I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07; - I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07; - I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07; - I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07; - I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07; - //for 16nm DG HP - I_g_on_n[0][0] = 1.07e-9;//A/micron - I_g_on_n[0][10] = 1.07e-9; - I_g_on_n[0][20] = 1.07e-9; - I_g_on_n[0][30] = 1.07e-9; - I_g_on_n[0][40] = 1.07e-9; - I_g_on_n[0][50] = 1.07e-9; - I_g_on_n[0][60] = 1.07e-9; - I_g_on_n[0][70] = 1.07e-9; - I_g_on_n[0][80] = 1.07e-9; - I_g_on_n[0][90] = 1.07e-9; - I_g_on_n[0][100] = 1.07e-9; - -// //16 nm LSTP DG -// vdd[1] = 0.8; -// Lphy[1] = 0.014; -// Lelec[1] = 0.008;//Lelec is the electrical gate-length. -// t_ox[1] = 1.1e-3;//micron -// v_th[1] = 0.40126;//V -// c_ox[1] = 2.30e-14;//F/micron2 -// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs -// Vdsat[1] = 6.64e-2; //V/micron -// c_g_ideal[1] = 3.22e-16;//F/micron -// c_fringe[1] = 0.008e-15; -// c_junc[1] = 0;//F/micron2 -// I_on_n[1] = 727.6e-6;//A/micron -// I_on_p[1] = I_on_n[1] / 2; -// nmos_effective_resistance_multiplier = 1.99; -// n_to_p_eff_curr_drv_ratio[1] = 2; -// gmp_to_gmn_multiplier[1] = 0.99; -// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron -// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron -// I_off_n[1][0] = 2.43e-11; -// I_off_n[1][10] = 4.85e-11; -// I_off_n[1][20] = 9.68e-11; -// I_off_n[1][30] = 1.94e-10; -// I_off_n[1][40] = 3.87e-10; -// I_off_n[1][50] = 7.73e-10; -// I_off_n[1][60] = 3.55e-10; -// I_off_n[1][70] = 3.09e-9; -// I_off_n[1][80] = 6.19e-9; -// I_off_n[1][90] = 1.24e-8; -// I_off_n[1][100]= 2.48e-8; -// -// // for 22nm LSTP HP -// I_g_on_n[1][0] = 4.51e-10;//A/micron -// I_g_on_n[1][10] = 4.51e-10; -// I_g_on_n[1][20] = 4.51e-10; -// I_g_on_n[1][30] = 4.51e-10; -// I_g_on_n[1][40] = 4.51e-10; -// I_g_on_n[1][50] = 4.51e-10; -// I_g_on_n[1][60] = 4.51e-10; -// I_g_on_n[1][70] = 4.51e-10; -// I_g_on_n[1][80] = 4.51e-10; -// I_g_on_n[1][90] = 4.51e-10; -// I_g_on_n[1][100] = 4.51e-10; - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - - g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; - g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; - g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; - g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; - g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; - g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; - g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; - g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; - g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; - g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; - g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; - g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; - g_tp.peri_global.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; - g_tp.peri_global.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; - g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; - - g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; - g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; - g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; - g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; - g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; - g_tp.vpp += curr_alpha * curr_vpp; - g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; - g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; - g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - - g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; - g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; - g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; - area_cell_dram += curr_alpha * curr_area_cell_dram; - asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; - - g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; - g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; - g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; - area_cell_sram += curr_alpha * curr_area_cell_sram; - asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; - - g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng - g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; - g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; - area_cell_cam += curr_alpha * curr_area_cell_cam; - asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; - - //Sense amplifier latch Gm calculation - mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; - Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - - //Empirical undifferetiated core/FU coefficient - g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; - g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; - g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; - g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; - g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; - } - - - //Currently we are not modeling the resistance/capacitance of poly anywhere. - //Continuous function (or date have been processed) does not need linear interpolation - g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process - g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process - g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; - g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; - g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; - g_tp.cell_h_def = 50 * g_ip->F_sz_um; - g_tp.w_poly_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; - g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; - - g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; - g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; - g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process - g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process - g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process - g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process - g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; - g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_; - - if (ram_cell_tech_type == comm_dram) - { - g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; - g_tp.h_dec = 8; // in the unit of memory cell height - } - else - { - g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; - g_tp.h_dec = 4; // in the unit of memory cell height - } - - g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; - g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; - g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; - - g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; - g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; - //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; - - g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; - - double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; - double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; - g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; - - g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); - g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; - g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); - g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; - g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng - g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; - - g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; - g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; - g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - - - double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; - - for (iter=0; iter<=1; ++iter) - { - // linear interpolation - if (iter == 0) - { - tech = tech_lo; - if (tech_lo == tech_hi) - { - curr_alpha = 1; - } - else - { - curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi); - } - } - else - { - tech = tech_hi; - if (tech_lo == tech_hi) - { - break; - } - else - { - curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi); - } - } - - if (tech == 90) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.4; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.01;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.48;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.48;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.7; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.96; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.008; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.48; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.48; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.1; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.09; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); - wire_r_per_micron[1][3] = 12 / 0.09; - } - else if (tech == 65) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 2.7; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.405; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.303; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.7; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.405; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.303; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.8; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.81; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.303; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.006; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.405; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.734; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.405; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.734; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.77; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.734; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.065; - wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); - wire_r_per_micron[1][3] = 12 / 0.065; - } - else if (tech == 45) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.315; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.958; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.315; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.958; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.63; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.958; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.004; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.315; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.46; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.315; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.46; - vert_dielectric_constant[1][1] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.55; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.46; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.045; - wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); - wire_r_per_micron[1][3] = 12 / 0.045; - } - else if (tech == 32) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.21; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.664; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.21; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.664; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.42; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.664; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.21; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.214; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - aspect_ratio[1][1] = 2.0; - wire_width = wire_pitch[1][1] / 2; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.21; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.214; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.385; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.214; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.032;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron - wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron - } - else if (tech == 22) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.15; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.414; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.15; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.414; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.3; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.414; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.15; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.104; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.15; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.104; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.275; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.104; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.022;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron - wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - - else if (tech == 16) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.108; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.202; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - aspect_ratio[0][1] = 3.0; - wire_width = wire_pitch[0][1] / 2; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.108; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.202; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.216; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.202; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.002; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.108; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 1.998; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.108; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 1.998; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.198; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 1.998; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.016;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron - wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - - g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - - g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - - g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; - - g_tp.sense_delay += curr_alpha *SENSE_AMP_D; - g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P; -// g_tp.horiz_dielectric_constant += horiz_dielectric_constant; -// g_tp.vert_dielectric_constant += vert_dielectric_constant; -// g_tp.aspect_ratio += aspect_ratio; -// g_tp.miller_value += miller_value; -// g_tp.ild_thickness += ild_thickness; - - } - g_tp.fringe_cap = fringe_cap; - - double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); - double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); - double tf = rd * c_load; - g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); - double KLOAD = 1; - c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); - tf = rd * c_load; - g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); -} - diff --git a/ext/mcpat/xmlParser.cc b/ext/mcpat/xmlParser.cc index 5ac45edae..97532d506 100644 --- a/ext/mcpat/xmlParser.cc +++ b/ext/mcpat/xmlParser.cc @@ -75,6 +75,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (c) 2002, Business-Insight + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * Business-Insight * All rights reserved. * @@ -91,7 +92,7 @@ //#endif #define WIN32_LEAN_AND_MEAN #include // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files - // to have "MessageBoxA" to display error messages for openFilHelper +// to have "MessageBoxA" to display error messages for openFilHelper #endif #include @@ -101,37 +102,49 @@ #include #include -XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); } -void freeXMLString(XMLSTR t){if(t)free(t);} +XMLCSTR XMLNode::getVersion() { + return _CXML("v2.39"); +} +void freeXMLString(XMLSTR t) { + if (t)free(t); +} static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; -inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } +inline int mmin( const int t1, const int t2 ) { + return t1 < t2 ? t1 : t2; +} // You can modify the initialization of the variable "XMLClearTags" below // to change the clearTags that are currently recognized by the library. // The number on the second columns is the length of the string inside the // first column. The "") }, - { _CXML("") }, - { _CXML("") }, - { _CXML("
")    ,5,  _CXML("
") }, +typedef struct { + XMLCSTR lpszOpen; + int openTagLen; + XMLCSTR lpszClose; +} ALLXMLClearTag; +static ALLXMLClearTag XMLClearTags[] = { + { _CXML("") }, + { _CXML("") }, + { _CXML("") }, + { _CXML("
")    , 5,  _CXML("
") }, // { _CXML("")}, - { NULL ,0, NULL } + { NULL , 0, NULL } }; // You can modify the initialization of the variable "XMLEntities" below // to change the character entities that are currently recognized by the library. // The number on the second columns is the length of the string inside the // first column. Additionally, the syntaxes " " and " " are recognized. -typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; -static XMLCharacterEntity XMLEntities[] = -{ +typedef struct { + XMLCSTR s; + int l; + XMLCHAR c; +} XMLCharacterEntity; +static XMLCharacterEntity XMLEntities[] = { { _CXML("&" ), 5, _CXML('&' )}, { _CXML("<" ), 4, _CXML('<' )}, { _CXML(">" ), 4, _CXML('>' )}, @@ -147,32 +160,51 @@ static XMLCharacterEntity XMLEntities[] = // The following function parses the XML errors into a user friendly string. // You can edit this to change the output language of the library to something else. -XMLCSTR XMLNode::getError(XMLError xerror) -{ - switch (xerror) - { - case eXMLErrorNone: return _CXML("No error"); - case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); - case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); - case eXMLErrorEmpty: return _CXML("Error: No XML data"); - case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); - case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); - case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); - case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); - case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); - case eXMLErrorNoElements: return _CXML("Error: No elements found"); - case eXMLErrorFileNotFound: return _CXML("Error: File not found"); - case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); - case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); - case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); - case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); - case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); - case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); +XMLCSTR XMLNode::getError(XMLError xerror) { + switch (xerror) { + case eXMLErrorNone: + return _CXML("No error"); + case eXMLErrorMissingEndTag: + return _CXML("Warning: Unmatched end tag"); + case eXMLErrorNoXMLTagFound: + return _CXML("Warning: No XML tag found"); + case eXMLErrorEmpty: + return _CXML("Error: No XML data"); + case eXMLErrorMissingTagName: + return _CXML("Error: Missing start tag name"); + case eXMLErrorMissingEndTagName: + return _CXML("Error: Missing end tag name"); + case eXMLErrorUnmatchedEndTag: + return _CXML("Error: Unmatched end tag"); + case eXMLErrorUnmatchedEndClearTag: + return _CXML("Error: Unmatched clear tag end"); + case eXMLErrorUnexpectedToken: + return _CXML("Error: Unexpected token found"); + case eXMLErrorNoElements: + return _CXML("Error: No elements found"); + case eXMLErrorFileNotFound: + return _CXML("Error: File not found"); + case eXMLErrorFirstTagNotFound: + return _CXML("Error: First Tag not found"); + case eXMLErrorUnknownCharacterEntity: + return _CXML("Error: Unknown character entity"); + case eXMLErrorCharacterCodeAbove255: + return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); + case eXMLErrorCharConversionError: + return _CXML("Error: unable to convert between WideChar and MultiByte chars"); + case eXMLErrorCannotOpenWriteFile: + return _CXML("Error: unable to open file for writing"); + case eXMLErrorCannotWriteFile: + return _CXML("Error: cannot write into file"); - case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); - case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); - case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); - case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); + case eXMLErrorBase64DataSizeIsNotMultipleOf4: + return _CXML("Warning: Base64-string length is not a multiple of 4"); + case eXMLErrorBase64DecodeTruncatedData: + return _CXML("Warning: Base64-string is truncated"); + case eXMLErrorBase64DecodeIllegalCharacter: + return _CXML("Error: Base64-string contains an illegal character"); + case eXMLErrorBase64DecodeBufferTooSmall: + return _CXML("Error: Base64 decode output buffer is too small"); }; return _CXML("Unknown"); } @@ -187,168 +219,244 @@ XMLCSTR XMLNode::getError(XMLError xerror) // If you plan to "port" the library to a new system/compiler, all you have to do is // to edit the following lines. #ifdef XML_NO_WIDE_CHAR -char myIsTextWideChar(const void *b, int len) { return FALSE; } +char myIsTextWideChar(const void *b, int len) { + return FALSE; +} #else - #if defined (UNDER_CE) || !defined(_XMLWINDOWS) - char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode - { +#if defined (UNDER_CE) || !defined(_XMLWINDOWS) +// inspired by the Wine API: RtlIsTextUnicode +char myIsTextWideChar(const void *b, int len) { #ifdef sun - // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. - if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; + // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. + if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; #endif - const wchar_t *s=(const wchar_t*)b; + const wchar_t *s = (const wchar_t*)b; - // buffer too small: - if (len<(int)sizeof(wchar_t)) return FALSE; + // buffer too small: + if (len < (int)sizeof(wchar_t)) return FALSE; - // odd length test - if (len&1) return FALSE; + // odd length test + if (len&1) return FALSE; - /* only checks the first 256 characters */ - len=mmin(256,len/sizeof(wchar_t)); + /* only checks the first 256 characters */ + len = mmin(256, len / sizeof(wchar_t)); - // Check for the special byte order: - if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; - if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE + // Check for the special byte order: + if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; + if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE - // checks for ASCII characters in the UNICODE stream - int i,stats=0; - for (i=0; ilen/2) return TRUE; + // checks for ASCII characters in the UNICODE stream + int i, stats=0; + for (i=0; ilen/2) return TRUE; - // Check for UNICODE NULL chars - for (i=0; i - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } - #else - // for gcc - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } - #endif - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) - { - char *filenameAscii=myWideCharToMultiByte(filename); - FILE *f; - if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); - else f=fopen(filenameAscii,"wb"); - free(filenameAscii); - return f; - } - #else - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return strlen(c); } - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } - #endif - static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} +#ifdef XML_NO_WIDE_CHAR +char *myWideCharToMultiByte(const wchar_t *s) { + return NULL; +} +#else +char *myWideCharToMultiByte(const wchar_t *s) { + const wchar_t *ss = s; + int i = (int)wcsrtombs(NULL, &ss, 0, NULL); + if (i < 0) return NULL; + char *d = (char *)malloc(i + 1); + wcsrtombs(d, &s, i, NULL); + d[i] = 0; + return d; +} +#endif +#ifdef _XMLWIDECHAR +wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) { + const char *ss = s; + int i = (int)mbsrtowcs(NULL, &ss, 0, NULL); + if (i < 0) return NULL; + wchar_t *d = (wchar_t *)malloc((i + 1) * sizeof(wchar_t)); + mbsrtowcs(d, &s, i, NULL); + d[i] = 0; + return d; +} +int xstrlen(XMLCSTR c) { + return wcslen(c); +} +#ifdef sun +// for CC +#include +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return wscasecmp(c1, c2); +} +#else +// for gcc +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return wcscasecmp(c1, c2); +} +#endif +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)wcsstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)wcscpy(c1, c2); +} +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + char *filenameAscii = myWideCharToMultiByte(filename); + FILE *f; + if (mode[0] == _CXML('r')) f = fopen(filenameAscii, "rb"); + else f = fopen(filenameAscii, "wb"); + free(filenameAscii); + return f; +} +#else +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + return fopen(filename, mode); +} +static inline int xstrlen(XMLCSTR c) { + return strlen(c); +} +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return strcasecmp(c1, c2); +} +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)strstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)strcpy(c1, c2); +} +#endif +static inline int _strnicmp(const char *c1, const char *c2, int l) { + return strncasecmp(c1, c2, l); +} #endif @@ -359,35 +467,86 @@ char myIsTextWideChar(const void *b, int len) { return FALSE; } // There are only here as "convenience" functions for the user. // If you don't need them, you can delete them without any trouble. #ifdef _XMLWIDECHAR - #ifdef _XMLWINDOWS - // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 - char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #else - #ifdef sun - // for CC - #include - char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } - #else - // for gcc - char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } - #endif - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #endif +#ifdef _XMLWINDOWS +// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 +char xmltob(XMLCSTR t, int v) { + if (t && (*t)) return (char)_wtoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) return _wtoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) return _wtol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} #else - char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } +#ifdef sun +// for CC +#include +char xmltob(XMLCSTR t, int v) { + if (t) return (char)wstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) return (int)wstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) return wstol(t, NULL, 10); + return v; +} +#else +// for gcc +char xmltob(XMLCSTR t, int v) { + if (t) return (char)wcstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) return (int)wcstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) return wcstol(t, NULL, 10); + return v; +} #endif -XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; } -XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} +#endif +#else +char xmltob(XMLCSTR t, char v) { + if (t && (*t)) return (char)atoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) return atoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) return atol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) return atof(t); + return v; +} +#endif +XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v) { + if (t) return t; + return v; +} +XMLCHAR xmltoc(XMLCSTR t, XMLCHAR v) { + if (t && (*t)) return *t; + return v; +} ///////////////////////////////////////////////////////////////////////// // the "openFileHelper" function // @@ -395,42 +554,47 @@ XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } // Since each application has its own way to report and deal with errors, you should modify & rewrite // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. -XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) -{ +XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) { // guess the value of the global parameter "characterEncoding" // (the guess is based on the first 200 bytes of the file). - FILE *f=xfopen(filename,_CXML("rb")); - if (f) - { + FILE *f = xfopen(filename, _CXML("rb")); + if (f) { char bb[205]; - int l=(int)fread(bb,1,200,f); - setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); + int l = (int)fread(bb, 1, 200, f); + setGlobalOptions(guessCharEncoding(bb, l), guessWideCharChars, + dropWhiteSpace, removeCommentsInMiddleOfText); fclose(f); } // parse the file XMLResults pResults; - XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); + XMLNode xnode = XMLNode::parseFile(filename, tag, &pResults); // display error message (if any) - if (pResults.error != eXMLErrorNone) - { + if (pResults.error != eXMLErrorNone) { // create message - char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); - if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } + char message[2000], *s1 = (char*)"", *s3 = (char*)""; + XMLCSTR s2 = _CXML(""); + if (pResults.error == eXMLErrorFirstTagNotFound) { + s1 = (char*)"First Tag should be '"; + s2 = tag; + s3 = (char*)"'.\n"; + } sprintf(message, #ifdef _XMLWIDECHAR - "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" + "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" #else - "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" + "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" #endif - ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); + , filename, XMLNode::getError(pResults.error), pResults.nLine, + pResults.nColumn, s1, s2, s3); // display message #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) - MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); + MessageBoxA(NULL, message, "XML Parsing error", MB_OK | MB_ICONERROR | + MB_TOPMOST); #else - printf("%s",message); + printf("%s", message); #endif exit(255); } @@ -450,106 +614,101 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) // This table is used as lookup-table to know the length of a character (in byte) based on the // content of the first byte of the character. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). -static const char XML_utf8ByteTable[256] = -{ +static const char XML_utf8ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte - 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 End of ASCII range + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80 0x80 to 0xc1 invalid + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0 + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 0xc2 to 0xdf 2 byte + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,// 0xe0 0xe0 to 0xef 3 byte + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid }; -static const char XML_legacyByteTable[256] = -{ - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +static const char XML_legacyByteTable[256] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static const char XML_sjisByteTable[256] = -{ +static const char XML_sjisByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0x9F 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xc0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 0xe0 to 0xef 2 bytes + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gb2312ByteTable[256] = -{ +static const char XML_gb2312ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0 0xa1 to 0xf7 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gbk_big5_ByteTable[256] = -{ +static const char XML_gbk_big5_ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0xfe 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 // 0xf0 }; -static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" +// the default is "characterEncoding=XMLNode::encoding_UTF8" +static const char *XML_ByteTable = (const char *)XML_utf8ByteTable; #endif XMLNode XMLNode::emptyXMLNode; -XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; -XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; +XMLClear XMLNode::emptyXMLClear = { NULL, NULL, NULL}; +XMLAttribute XMLNode::emptyXMLAttribute = { NULL, NULL}; // Enumeration used to decipher what type a token is -typedef enum XMLTokenTypeTag -{ +typedef enum XMLTokenTypeTag { eTokenText = 0, eTokenQuotedText, eTokenTagStart, /* "<" */ @@ -563,8 +722,7 @@ typedef enum XMLTokenTypeTag } XMLTokenType; // Main structure used for parsing XML -typedef struct XML -{ +typedef struct XML { XMLCSTR lpXML; XMLCSTR lpszText; int nIndex,nIndexMissigEndTag; @@ -576,15 +734,13 @@ typedef struct XML int nFirst; } XML; -typedef struct -{ +typedef struct { ALLXMLClearTag *pClr; XMLCSTR pStr; } NextToken; // Enumeration used when parsing attributes -typedef enum Attrib -{ +typedef enum Attrib { eAttribName = 0, eAttribEquals, eAttribValue @@ -592,118 +748,126 @@ typedef enum Attrib // Enumeration used when parsing elements to dictate whether we are currently // inside a tag -typedef enum Status -{ +typedef enum Status { eInsideTag = 0, eOutsideTag } Status; -XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const -{ +XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const { if (!d) return eXMLErrorNone; - FILE *f=xfopen(filename,_CXML("wb")); + FILE *f = xfopen(filename, _CXML("wb")); if (!f) return eXMLErrorCannotOpenWriteFile; #ifdef _XMLWIDECHAR - unsigned char h[2]={ 0xFF, 0xFE }; - if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile; - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (!fwrite(L"\n",sizeof(wchar_t)*40,1,f)) + unsigned char h[2] = { 0xFF, 0xFE }; + if (!fwrite(h, 2, 1, f)) return eXMLErrorCannotWriteFile; + if ((!isDeclaration()) && ((d->lpszName) || + (!getChildNode().isDeclaration()))) { + if (!fwrite(L"\n", + sizeof(wchar_t)*40, 1, f)) return eXMLErrorCannotWriteFile; } #else - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (characterEncoding==char_encoding_UTF8) - { + if ((!isDeclaration()) && ((d->lpszName) || + (!getChildNode().isDeclaration()))) { + if (characterEncoding == char_encoding_UTF8) { // header so that windows recognize the file as UTF-8: - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; - encoding="utf-8"; - } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile; + encoding = "utf-8"; + } else if (characterEncoding == char_encoding_ShiftJIS) + encoding = "SHIFT-JIS"; - if (!encoding) encoding="ISO-8859-1"; - if (fprintf(f,"\n",encoding)<0) return eXMLErrorCannotWriteFile; - } else - { - if (characterEncoding==char_encoding_UTF8) - { - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; + if (!encoding) encoding = "ISO-8859-1"; + if (fprintf(f, "\n", encoding) + < 0) + return eXMLErrorCannotWriteFile; + } else { + if (characterEncoding == char_encoding_UTF8) { + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile; } } #endif int i; - XMLSTR t=createXMLString(nFormat,&i); - if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile; - if (fclose(f)!=0) return eXMLErrorCannotWriteFile; + XMLSTR t = createXMLString(nFormat, &i); + if (!fwrite(t, sizeof(XMLCHAR)*i, 1, f)) return eXMLErrorCannotWriteFile; + if (fclose(f) != 0) return eXMLErrorCannotWriteFile; free(t); return eXMLErrorNone; } // Duplicate a given string. -XMLSTR stringDup(XMLCSTR lpszData, int cbData) -{ - if (lpszData==NULL) return NULL; +XMLSTR stringDup(XMLCSTR lpszData, int cbData) { + if (lpszData == NULL) return NULL; XMLSTR lpszNew; - if (cbData==-1) cbData=(int)xstrlen(lpszData); - lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); - if (lpszNew) - { + if (cbData == -1) cbData = (int)xstrlen(lpszData); + lpszNew = (XMLSTR)malloc((cbData + 1) * sizeof(XMLCHAR)); + if (lpszNew) { memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); lpszNew[cbData] = (XMLCHAR)NULL; } return lpszNew; } -XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) -{ - XMLSTR dd=dest; +XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest, XMLCSTR source) { + XMLSTR dd = dest; XMLCHAR ch; XMLCharacterEntity *entity; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + xstrcpy(dest, entity->s); + dest += entity->l; + source++; + goto out_of_loop1; + } entity++; - } while(entity->s); + } while (entity->s); #ifdef _XMLWIDECHAR - *(dest++)=*(source++); + *(dest++) = *(source++); #else - switch(XML_ByteTable[(unsigned char)ch]) - { - case 4: *(dest++)=*(source++); - case 3: *(dest++)=*(source++); - case 2: *(dest++)=*(source++); - case 1: *(dest++)=*(source++); + switch (XML_ByteTable[(unsigned char)ch]) { + case 4: + *(dest++) = *(source++); + case 3: + *(dest++) = *(source++); + case 2: + *(dest++) = *(source++); + case 1: + *(dest++) = *(source++); } #endif out_of_loop1: ; } - *dest=0; + *dest = 0; return dd; } // private (used while rendering): -int ToXMLStringTool::lengthXMLString(XMLCSTR source) -{ - int r=0; +int ToXMLStringTool::lengthXMLString(XMLCSTR source) { + int r = 0; XMLCharacterEntity *entity; XMLCHAR ch; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + r += entity->l; + source++; + goto out_of_loop1; + } entity++; - } while(entity->s); + } while (entity->s); #ifdef _XMLWIDECHAR - r++; source++; + r++; + source++; #else - ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; + ch = XML_ByteTable[(unsigned char)ch]; + r += ch; + source += ch; #endif out_of_loop1: ; @@ -711,18 +875,25 @@ out_of_loop1: return r; } -ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } -void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } -XMLSTR ToXMLStringTool::toXML(XMLCSTR source) -{ - int l=lengthXMLString(source)+1; - if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); } - return toXMLUnSafe(buf,source); +ToXMLStringTool::~ToXMLStringTool() { + freeBuffer(); +} +void ToXMLStringTool::freeBuffer() { + if (buf) free(buf); + buf = NULL; + buflen = 0; +} +XMLSTR ToXMLStringTool::toXML(XMLCSTR source) { + int l = lengthXMLString(source) + 1; + if (l > buflen) { + buflen = l; + buf = (XMLSTR)realloc(buf, l * sizeof(XMLCHAR)); + } + return toXMLUnSafe(buf, source); } // private: -XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) -{ +XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) { // This function is the opposite of the function "toXMLString". It decodes the escape // sequences &, ", ', <, > and replace them by the characters // &,",',<,>. This function is used internally by the XML Parser. All the calls to @@ -732,108 +903,134 @@ XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) // out: new allocated string converted from xml if (!s) return NULL; - int ll=0,j; + int ll = 0, j; XMLSTR d; - XMLCSTR ss=s; + XMLCSTR ss = s; XMLCharacterEntity *entity; - while ((lo>0)&&(*s)) - { - if (*s==_CXML('&')) - { - if ((lo>2)&&(s[1]==_CXML('#'))) - { - s+=2; lo-=2; - if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } - while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; - if (*s!=_CXML(';')) - { - pXML->error=eXMLErrorUnknownCharacterEntity; + while ((lo > 0) && (*s)) { + if (*s == _CXML('&')) { + if ((lo > 2) && (s[1] == _CXML('#'))) { + s += 2; + lo -= 2; + if ((*s == _CXML('X')) || (*s == _CXML('x'))) { + s++; + lo--; + } + while ((*s) && (*s != _CXML(';')) && ((lo--) > 0)) { + s++; + } + if (*s != _CXML(';')) { + pXML->error = eXMLErrorUnknownCharacterEntity; return NULL; } - s++; lo--; - } else - { - entity=XMLEntities; - do - { - if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } + s++; + lo--; + } else { + entity = XMLEntities; + do { + if ((lo >= entity->l) && + (xstrnicmp(s, entity->s, entity->l) == 0)) { + s += entity->l; + lo -= entity->l; + break; + } entity++; - } while(entity->s); - if (!entity->s) - { - pXML->error=eXMLErrorUnknownCharacterEntity; + } while (entity->s); + if (!entity->s) { + pXML->error = eXMLErrorUnknownCharacterEntity; return NULL; } } - } else - { + } else { #ifdef _XMLWIDECHAR - s++; lo--; + s++; + lo--; #else - j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; + j = XML_ByteTable[(unsigned char)*s]; + s += j; + lo -= j; + ll += j - 1; #endif } ll++; } - d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); - s=d; - while (ll-->0) - { - if (*ss==_CXML('&')) - { - if (ss[1]==_CXML('#')) - { - ss+=2; j=0; - if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) - { + d = (XMLSTR)malloc((ll + 1) * sizeof(XMLCHAR)); + s = d; + while (ll-- > 0) { + if (*ss == _CXML('&')) { + if (ss[1] == _CXML('#')) { + ss += 2; + j = 0; + if ((*ss == _CXML('X')) || (*ss == _CXML('x'))) { ss++; - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); - else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; - else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) { + j = (j << 4) + *ss - _CXML('0'); + } else if ((*ss >= _CXML('A')) && (*ss <= _CXML('F'))) { + j = (j << 4) + *ss - _CXML('A') + 10; + } else if ((*ss >= _CXML('a')) && (*ss <= _CXML('f'))) { + j = (j << 4) + *ss - _CXML('a') + 10; + } else { + free((void*)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } ss++; } - } else - { - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} + } else { + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) { + j = (j * 10) + *ss - _CXML('0'); + } else { + free((void*)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } ss++; } } #ifndef _XMLWIDECHAR - if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} + if (j > 255) { + free((void*)s); + pXML->error = eXMLErrorCharacterCodeAbove255; + return NULL; + } #endif - (*d++)=(XMLCHAR)j; ss++; - } else - { - entity=XMLEntities; - do - { - if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } + (*d++) = (XMLCHAR)j; + ss++; + } else { + entity = XMLEntities; + do { + if (xstrnicmp(ss, entity->s, entity->l) == 0) { + *(d++) = entity->c; + ss += entity->l; + break; + } entity++; - } while(entity->s); + } while (entity->s); } - } else - { + } else { #ifdef _XMLWIDECHAR - *(d++)=*(ss++); + *(d++) = *(ss++); #else - switch(XML_ByteTable[(unsigned char)*ss]) - { - case 4: *(d++)=*(ss++); ll--; - case 3: *(d++)=*(ss++); ll--; - case 2: *(d++)=*(ss++); ll--; - case 1: *(d++)=*(ss++); + switch (XML_ByteTable[(unsigned char)*ss]) { + case 4: + *(d++) = *(ss++); + ll--; + case 3: + *(d++) = *(ss++); + ll--; + case 2: + *(d++) = *(ss++); + ll--; + case 1: + *(d++) = *(ss++); } #endif } } - *d=0; + *d = 0; return (XMLSTR)s; } @@ -846,66 +1043,64 @@ char myTagCompare(XMLCSTR cclose, XMLCSTR copen) // return 1 if different { if (!cclose) return 1; - int l=(int)xstrlen(cclose); - if (xstrnicmp(cclose, copen, l)!=0) return 1; - const XMLCHAR c=copen[l]; - if (XML_isSPACECHAR(c)|| - (c==_CXML('/' ))|| - (c==_CXML('<' ))|| - (c==_CXML('>' ))|| - (c==_CXML('=' ))) return 0; + int l = (int)xstrlen(cclose); + if (xstrnicmp(cclose, copen, l) != 0) return 1; + const XMLCHAR c = copen[l]; + if (XML_isSPACECHAR(c) || + (c == _CXML('/' )) || + (c == _CXML('<' )) || + (c == _CXML('>' )) || + (c == _CXML('=' ))) return 0; return 1; } // Obtain the next character from the string. -static inline XMLCHAR getNextChar(XML *pXML) -{ +static inline XMLCHAR getNextChar(XML *pXML) { XMLCHAR ch = pXML->lpXML[pXML->nIndex]; #ifdef _XMLWIDECHAR - if (ch!=0) pXML->nIndex++; + if (ch != 0) pXML->nIndex++; #else - pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; + pXML->nIndex += XML_ByteTable[(unsigned char)ch]; #endif return ch; } // Find the next token in a string. // pcbToken contains the number of characters that have been read. -static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) -{ +static NextToken GetNextToken(XML *pXML, int *pcbToken, + enum XMLTokenTypeTag *pType) { NextToken result; XMLCHAR ch; XMLCHAR chTemp; - int indexStart,nFoundMatch,nIsText=FALSE; - result.pClr=NULL; // prevent warning + int indexStart, nFoundMatch, nIsText = FALSE; + result.pClr = NULL; // prevent warning // Find next non-white space character - do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); + do { + indexStart = pXML->nIndex; + ch = getNextChar(pXML); + } while XML_isSPACECHAR(ch); - if (ch) - { + if (ch) { // Cache the current string pointer result.pStr = &pXML->lpXML[indexStart]; // First check whether the token is in the clear tag list (meaning it // does not need formatting). - ALLXMLClearTag *ctag=XMLClearTags; - do - { - if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0) - { - result.pClr=ctag; - pXML->nIndex+=ctag->openTagLen-1; - *pType=eTokenClear; + ALLXMLClearTag *ctag = XMLClearTags; + do { + if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen) == 0) { + result.pClr = ctag; + pXML->nIndex += ctag->openTagLen - 1; + *pType = eTokenClear; return result; } ctag++; - } while(ctag->lpszOpen); + } while (ctag->lpszOpen); // If we didn't find a clear tag then check for standard tokens - switch(ch) - { - // Check for quotes + switch (ch) { + // Check for quotes case _CXML('\''): case _CXML('\"'): // Type of token @@ -916,17 +1111,20 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT nFoundMatch = FALSE; // Search through the string to find a matching quote - while((ch = getNextChar(pXML))) - { - if (ch==chTemp) { nFoundMatch = TRUE; break; } - if (ch==_CXML('<')) break; + while ((ch = getNextChar(pXML))) { + if (ch == chTemp) { + nFoundMatch = TRUE; + break; + } + if (ch == _CXML('<')) { + break; + } } // If we failed to find a matching quote - if (nFoundMatch == FALSE) - { - pXML->nIndex=indexStart+1; - nIsText=TRUE; + if (nFoundMatch == FALSE) { + pXML->nIndex = indexStart + 1; + nIsText = TRUE; break; } @@ -935,17 +1133,17 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT break; - // Equals (used with attribute values) + // Equals (used with attribute values) case _CXML('='): *pType = eTokenEquals; break; - // Close tag + // Close tag case _CXML('>'): *pType = eTokenCloseTag; break; - // Check for tag start and tag end + // Check for tag start and tag end case _CXML('<'): // Peek at the next character to see if we have an end tag 'lpXML[pXML->nIndex]; // If we have a tag end... - if (chTemp == _CXML('/')) - { + if (chTemp == _CXML('/')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenTagEnd; } // If we have an XML declaration tag - else if (chTemp == _CXML('?')) - { + else if (chTemp == _CXML('?')) { // Set the type and ensure we point at the next character getNextChar(pXML); @@ -970,21 +1166,19 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT } // Otherwise we must have a start tag - else - { + else { *pType = eTokenTagStart; } break; - // Check to see if we have a short hand type end tag ('/>'). + // Check to see if we have a short hand type end tag ('/>'). case _CXML('/'): // Peek at the next character to see if we have a short end tag '/>' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a short hand end tag... - if (chTemp == _CXML('>')) - { + if (chTemp == _CXML('>')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenShortHandClose; @@ -994,65 +1188,69 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT // If we haven't found a short hand closing tag then drop into the // text process - // Other characters + // Other characters default: nIsText = TRUE; } // If this is a TEXT node - if (nIsText) - { + if (nIsText) { // Indicate we are dealing with text *pType = eTokenText; - while((ch = getNextChar(pXML))) - { - if XML_isSPACECHAR(ch) - { - indexStart++; break; + while ((ch = getNextChar(pXML))) { + if XML_isSPACECHAR(ch) { + indexStart++; + break; - } else if (ch==_CXML('/')) - { + } else if (ch == _CXML('/')) { // If we find a slash then this maybe text or a short hand end tag // Peek at the next character to see it we have short hand end tag - ch=pXML->lpXML[pXML->nIndex]; + ch = pXML->lpXML[pXML->nIndex]; // If we found a short hand end tag then we need to exit the loop - if (ch==_CXML('>')) { pXML->nIndex--; break; } + if (ch == _CXML('>')) { + pXML->nIndex--; + break; + } - } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('='))) - { - pXML->nIndex--; break; + } else if ((ch == _CXML('<')) || (ch == _CXML('>')) || + (ch == _CXML('='))) { + pXML->nIndex--; + break; } } } - *pcbToken = pXML->nIndex-indexStart; - } else - { + *pcbToken = pXML->nIndex - indexStart; + } else { // If we failed to obtain a valid character *pcbToken = 0; *pType = eTokenError; - result.pStr=NULL; + result.pStr = NULL; } return result; } -XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) -{ - if (!d) { free(lpszName); return NULL; } - if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName); - d->lpszName=lpszName; +XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) { + if (!d) { + free(lpszName); + return NULL; + } + if (d->lpszName && (lpszName != d->lpszName)) free((void*)d->lpszName); + d->lpszName = lpszName; return lpszName; } // private: -XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; } -XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) -{ - d=(XMLNodeData*)malloc(sizeof(XMLNodeData)); - d->ref_count=1; +XMLNode::XMLNode(struct XMLNodeDataTag *p) { + d = p; + (p->ref_count)++; +} +XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) { + d = (XMLNodeData*)malloc(sizeof(XMLNodeData)); + d->ref_count = 1; - d->lpszName=NULL; - d->nChild= 0; + d->lpszName = NULL; + d->nChild = 0; d->nText = 0; d->nClear = 0; d->nAttribute = 0; @@ -1060,25 +1258,35 @@ XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) d->isDeclaration = isDeclaration; d->pParent = pParent; - d->pChild= NULL; - d->pText= NULL; - d->pClear= NULL; - d->pAttribute= NULL; - d->pOrder= NULL; + d->pChild = NULL; + d->pText = NULL; + d->pClear = NULL; + d->pAttribute = NULL; + d->pOrder = NULL; updateName_WOSD(lpszName); } -XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); } -XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); } +XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { + return XMLNode(NULL, lpszName, isDeclaration); +} +XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { + return XMLNode(NULL, stringDup(lpszName), isDeclaration); +} #define MEMORYINCREASE 50 -static inline void myFree(void *p) { if (p) free(p); } -static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) -{ - if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); } - if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem); +static inline void myFree(void *p) { + if (p) free(p); +} +static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) { + if (p == NULL) { + if (memInc) return malloc(memInc*sizeofElem); + return malloc(sizeofElem); + } + if ((memInc == 0) || ((newsize % memInc) == 0)) { + p = realloc(p, (newsize + memInc) * sizeofElem); + } // if (!p) // { // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220); @@ -1087,20 +1295,23 @@ static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) } // private: -XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) -{ - if (index<0) return -1; - int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i; +XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, + XMLElementType xxtype) { + if (index < 0) return -1; + int i = 0, j = (int)((index << 2) + xxtype), *o = d->pOrder; + while (o[i] != j) i++; + return i; } // private: // update "order" information when deleting a content of a XMLNode -int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) -{ - int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t); - memmove(o+i, o+i+1, (n-i)*sizeof(int)); - for (;inChild + d->nText + d->nClear; + int *o = d->pOrder; + int i = findPosition(d, index, t); + memmove(o + i, o + i + 1, (n - i)*sizeof(int)); + for (; i < n; i++) + if ((o[i]&3) == (int)t) o[i] -= 4; // We should normally do: // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int)); // but we skip reallocation because it's too time consuming. @@ -1108,51 +1319,67 @@ int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) return i; } -void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype) -{ +void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p, + int size, XMLElementType xtype) { // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") // out: *_pos is the index inside p - p=myRealloc(p,(nc+1),memoryIncrease,size); - int n=d->nChild+d->nText+d->nClear; - d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int)); - int pos=*_pos,*o=d->pOrder; + p = myRealloc(p, (nc + 1), memoryIncrease, size); + int n = d->nChild + d->nText + d->nClear; + d->pOrder = (int*)myRealloc(d->pOrder, n + 1, memoryIncrease * 3, + sizeof(int)); + int pos = *_pos, *o = d->pOrder; - if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } + if ((pos < 0) || (pos >= n)) { + *_pos = nc; + o[n] = (int)((nc << 2) + xtype); + return p; + } - int i=pos; - memmove(o+i+1, o+i, (n-i)*sizeof(int)); + int i = pos; + memmove(o + i + 1, o + i, (n - i)*sizeof(int)); - while ((pos>2; - memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size); + *_pos = pos = o[pos] >> 2; + memmove(((char*)p) + (pos + 1)*size, ((char*)p) + pos*size, (nc - pos)*size); return p; } // Add a child node to the given element. -XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos) -{ +XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, + char isDeclaration, int pos) { if (!lpszName) return emptyXMLNode; - d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=NULL; - d->pChild[pos]=XMLNode(d,lpszName,isDeclaration); + d->pChild = (XMLNode*)addToOrder(memoryIncrease, &pos, d->nChild, + d->pChild, sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = NULL; + d->pChild[pos] = XMLNode(d, lpszName, isDeclaration); d->nChild++; return d->pChild[pos]; } // Add an attribute to an element. -XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev) -{ +XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, XMLSTR lpszName, + XMLSTR lpszValuev) { if (!lpszName) return &emptyXMLAttribute; - if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; } - int nc=d->nAttribute; - d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute)); - XMLAttribute *pAttr=d->pAttribute+nc; + if (!d) { + myFree(lpszName); + myFree(lpszValuev); + return &emptyXMLAttribute; + } + int nc = d->nAttribute; + d->pAttribute = (XMLAttribute*)myRealloc(d->pAttribute, (nc + 1), + memoryIncrease, + sizeof(XMLAttribute)); + XMLAttribute *pAttr = d->pAttribute + nc; pAttr->lpszName = lpszName; pAttr->lpszValue = lpszValuev; d->nAttribute++; @@ -1160,26 +1387,35 @@ XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XML } // Add text to the element. -XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) -{ +XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) { if (!lpszValue) return NULL; - if (!d) { myFree(lpszValue); return NULL; } - d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText); - d->pText[pos]=lpszValue; + if (!d) { + myFree(lpszValue); + return NULL; + } + d->pText = (XMLCSTR*)addToOrder(memoryIncrease, &pos, d->nText, d->pText, + sizeof(XMLSTR), eNodeText); + d->pText[pos] = lpszValue; d->nText++; return lpszValue; } // Add clear (unformatted) text to the element. -XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) -{ +XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, + XMLCSTR lpszOpen, XMLCSTR lpszClose, + int pos) { if (!lpszValue) return &emptyXMLClear; - if (!d) { myFree(lpszValue); return &emptyXMLClear; } - d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear); - XMLClear *pNewClear=d->pClear+pos; + if (!d) { + myFree(lpszValue); + return &emptyXMLClear; + } + d->pClear = (XMLClear *)addToOrder(memoryIncrease, &pos, d->nClear, + d->pClear, sizeof(XMLClear), + eNodeClear); + XMLClear *pNewClear = d->pClear + pos; pNewClear->lpszValue = lpszValue; - if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen; - if (!lpszClose) lpszClose=XMLClearTags->lpszClose; + if (!lpszOpen) lpszOpen = XMLClearTags->lpszOpen; + if (!lpszClose) lpszClose = XMLClearTags->lpszClose; pNewClear->lpszOpenTag = lpszOpen; pNewClear->lpszCloseTag = lpszClose; d->nClear++; @@ -1188,41 +1424,44 @@ XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR l // private: // Parse a clear (unformatted) type node. -char XMLNode::parseClearTag(void *px, void *_pClear) -{ - XML *pXML=(XML *)px; - ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear); - int cbTemp=0; - XMLCSTR lpszTemp=NULL; - XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex]; - static XMLCSTR docTypeEnd=_CXML("]>"); +char XMLNode::parseClearTag(void *px, void *_pClear) { + XML *pXML = (XML *)px; + ALLXMLClearTag pClear = *((ALLXMLClearTag*)_pClear); + int cbTemp = 0; + XMLCSTR lpszTemp = NULL; + XMLCSTR lpXML = &pXML->lpXML[pXML->nIndex]; + static XMLCSTR docTypeEnd = _CXML("]>"); // Find the closing tag // Seems the ')) { lpszTemp=pCh; break; } + if (pClear.lpszOpen == XMLClearTags[1].lpszOpen) { + XMLCSTR pCh = lpXML; + while (*pCh) { + if (*pCh == _CXML('<')) { + pClear.lpszClose = docTypeEnd; + lpszTemp = xstrstr(lpXML, docTypeEnd); + break; + } else if (*pCh == _CXML('>')) { + lpszTemp = pCh; + break; + } #ifdef _XMLWIDECHAR pCh++; #else - pCh+=XML_ByteTable[(unsigned char)(*pCh)]; + pCh += XML_ByteTable[(unsigned char)(*pCh)]; #endif } - } else lpszTemp=xstrstr(lpXML, pClear.lpszClose); + } else lpszTemp = xstrstr(lpXML, pClear.lpszClose); - if (lpszTemp) - { + if (lpszTemp) { // Cache the size and increment the index cbTemp = (int)(lpszTemp - lpXML); - pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose); + pXML->nIndex += cbTemp + (int)xstrlen(pClear.lpszClose); // Add the clear node to the current element - addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1); + addClear_priv(MEMORYINCREASE, stringDup(lpXML, cbTemp), + pClear.lpszOpen, pClear.lpszClose, -1); return 0; } @@ -1231,63 +1470,81 @@ char XMLNode::parseClearTag(void *px, void *_pClear) return 1; } -void XMLNode::exactMemory(XMLNodeData *d) -{ - if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int)); - if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode)); - if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute)); - if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR)); - if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear)); +void XMLNode::exactMemory(XMLNodeData *d) { + if (d->pOrder) { + d->pOrder = (int*)realloc(d->pOrder, (d->nChild + d->nText + d->nClear) + * sizeof(int)); + } + if (d->pChild) { + d->pChild = (XMLNode*)realloc(d->pChild, d->nChild * sizeof(XMLNode)); + } + if (d->pAttribute) { + d->pAttribute = (XMLAttribute*)realloc(d->pAttribute, d->nAttribute * + sizeof(XMLAttribute)); + } + if (d->pText) { + d->pText = (XMLCSTR*)realloc(d->pText, d->nText * sizeof(XMLSTR)); + } + if (d->pClear) { + d->pClear = (XMLClear *)realloc(d->pClear, d->nClear * sizeof(XMLClear)); + } } -char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) -{ - XML *pXML=(XML *)pa; - XMLCSTR lpszText=pXML->lpszText; +char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) { + XML *pXML = (XML *)pa; + XMLCSTR lpszText = pXML->lpszText; if (!lpszText) return 0; - if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++; + if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText) && + (lpszText != tokenPStr)) lpszText++; int cbText = (int)(tokenPStr - lpszText); - if (!cbText) { pXML->lpszText=NULL; return 0; } - if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; } - if (!cbText) { pXML->lpszText=NULL; return 0; } - XMLSTR lpt=fromXMLString(lpszText,cbText,pXML); + if (!cbText) { + pXML->lpszText = NULL; + return 0; + } + if (dropWhiteSpace) { + cbText--; + while ((cbText) && XML_isSPACECHAR(lpszText[cbText])) cbText--; + cbText++; + } + if (!cbText) { + pXML->lpszText = NULL; + return 0; + } + XMLSTR lpt = fromXMLString(lpszText, cbText, pXML); if (!lpt) return 1; - pXML->lpszText=NULL; - if (removeCommentsInMiddleOfText && d->nText && d->nClear) - { + pXML->lpszText = NULL; + if (removeCommentsInMiddleOfText && d->nText && d->nClear) { // if the previous insertion was a comment () AND // if the previous previous insertion was a text then, delete the comment and append the text - int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder; - if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText)) - { - int i=o[n]>>2; - if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen) - { + int n = d->nChild + d->nText + d->nClear - 1, *o = d->pOrder; + if (((o[n]&3) == eNodeClear) && ((o[n-1]&3) == eNodeText)) { + int i = o[n] >> 2; + if (d->pClear[i].lpszOpenTag == XMLClearTags[2].lpszOpen) { deleteClear(i); - i=o[n-1]>>2; - n=xstrlen(d->pText[i]); - int n2=xstrlen(lpt)+1; - d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR)); + i = o[n-1] >> 2; + n = xstrlen(d->pText[i]); + int n2 = xstrlen(lpt) + 1; + d->pText[i] = (XMLSTR)realloc((void*)d->pText[i], (n + n2) * + sizeof(XMLCHAR)); if (!d->pText[i]) return 1; - memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR)); + memcpy((void*)(d->pText[i] + n), lpt, n2*sizeof(XMLCHAR)); free(lpt); return 0; } } } - addText_priv(MEMORYINCREASE,lpt,-1); + addText_priv(MEMORYINCREASE, lpt, -1); return 0; } // private: // Recursively parse an XML element. -int XMLNode::ParseXMLElement(void *pa) -{ - XML *pXML=(XML *)pa; +int XMLNode::ParseXMLElement(void *pa) { + XML *pXML = (XML *)pa; int cbToken; enum XMLTokenTypeTag xtype; NextToken token; - XMLCSTR lpszTemp=NULL; - int cbTemp=0; + XMLCSTR lpszTemp = NULL; + int cbTemp = 0; char nDeclaration; XMLNode pNew; enum Status status; // inside or outside a tag @@ -1296,36 +1553,30 @@ int XMLNode::ParseXMLElement(void *pa) assert(pXML); // If this is the first call to the function - if (pXML->nFirst) - { + if (pXML->nFirst) { // Assume we are outside of a tag definition pXML->nFirst = FALSE; status = eOutsideTag; - } else - { + } else { // If this is not the first call then we should only be called when inside a tag. status = eInsideTag; } // Iterate through the tokens in the document - for(;;) - { + for (;;) { // Obtain the next token token = GetNextToken(pXML, &cbToken, &xtype); - if (xtype != eTokenError) - { + if (xtype != eTokenError) { // Check the current status - switch(status) - { + switch (status) { - // If we are outside of a tag definition + // If we are outside of a tag definition case eOutsideTag: // Check what type of token we obtained - switch(xtype) - { - // If we have found text or quoted text + switch (xtype) { + // If we have found text or quoted text case eTokenText: case eTokenCloseTag: /* '>' */ case eTokenShortHandClose: /* '/>' */ @@ -1333,7 +1584,7 @@ int XMLNode::ParseXMLElement(void *pa) case eTokenEquals: break; - // If we found a start tag '<' and declarations 'error = eXMLErrorMissingTagName; return FALSE; } @@ -1359,8 +1609,7 @@ int XMLNode::ParseXMLElement(void *pa) #ifdef APPROXIMATE_PARSING if (d->lpszName && - myTagCompare(d->lpszName, token.pStr) == 0) - { + myTagCompare(d->lpszName, token.pStr) == 0) { // Indicate to the caller that it needs to create a // new element. pXML->lpNewElement = token.pStr; @@ -1372,30 +1621,28 @@ int XMLNode::ParseXMLElement(void *pa) // If the name of the new element differs from the name of // the current element we need to add the new element to // the current one and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1); + pNew = addChild_priv(MEMORYINCREASE, + stringDup(token.pStr, cbToken), + nDeclaration, -1); - while (!pNew.isEmpty()) - { + while (!pNew.isEmpty()) { // Callself to process the new node. If we return // FALSE this means we dont have any more // processing to do... if (!pNew.ParseXMLElement(pXML)) return FALSE; - else - { + else { // If the call to recurse this function // evented in a end tag specified in XML then // we need to unwind the calls to this // function until we find the appropriate node // (the element name and end tag name must // match) - if (pXML->cbEndTag) - { + if (pXML->cbEndTag) { // If we are back at the root node then we // have an unmatched end tag - if (!d->lpszName) - { - pXML->error=eXMLErrorUnmatchedEndTag; + if (!d->lpszName) { + pXML->error = eXMLErrorUnmatchedEndTag; return FALSE; } @@ -1403,55 +1650,56 @@ int XMLNode::ParseXMLElement(void *pa) // element then we only need to unwind // once more... - if (myTagCompare(d->lpszName, pXML->lpEndTag)==0) - { + if (myTagCompare(d->lpszName, + pXML->lpEndTag) == 0) { pXML->cbEndTag = 0; } return TRUE; - } else - if (pXML->cbNewElement) - { - // If the call indicated a new element is to - // be created on THIS element. + } else if (pXML->cbNewElement) { + // If the call indicated a new element is to + // be created on THIS element. - // If the name of this element matches the - // name of the element we need to create - // then we need to return to the caller - // and let it process the element. + // If the name of this element matches the + // name of the element we need to create + // then we need to return to the caller + // and let it process the element. - if (myTagCompare(d->lpszName, pXML->lpNewElement)==0) - { - return TRUE; - } - - // Add the new element and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1); - pXML->cbNewElement = 0; + if (myTagCompare(d->lpszName, + pXML->lpNewElement) == 0) { + return TRUE; } - else - { - // If we didn't have a new element to create - pNew = emptyXMLNode; - } + // Add the new element and recurse + pNew = + addChild_priv(MEMORYINCREASE, + stringDup(pXML-> + lpNewElement, + pXML-> + cbNewElement), + 0, -1); + pXML->cbNewElement = 0; + } else { + // If we didn't have a new element to create + pNew = emptyXMLNode; + + } } } } break; - // If we found an end tag + // If we found an end tag case eTokenTagEnd: // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; + if (maybeAddTxT(pXML, token.pStr)) return FALSE; // Find the name of the end tag token = GetNextToken(pXML, &cbTemp, &xtype); // The end tag should be text - if (xtype != eTokenText) - { + if (xtype != eTokenText) { pXML->error = eXMLErrorMissingEndTagName; return FALSE; } @@ -1459,12 +1707,11 @@ int XMLNode::ParseXMLElement(void *pa) // After the end tag we should find a closing tag token = GetNextToken(pXML, &cbToken, &xtype); - if (xtype != eTokenCloseTag) - { + if (xtype != eTokenCloseTag) { pXML->error = eXMLErrorMissingEndTagName; return FALSE; } - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; // We need to return to the previous caller. If the name // of the tag cannot be found we need to keep returning to @@ -1472,14 +1719,14 @@ int XMLNode::ParseXMLElement(void *pa) if (myTagCompare(d->lpszName, lpszTemp) != 0) #ifdef STRICT_PARSING { - pXML->error=eXMLErrorUnmatchedEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->error = eXMLErrorUnmatchedEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; return FALSE; } #else { - pXML->error=eXMLErrorMissingEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->error = eXMLErrorMissingEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; pXML->lpEndTag = lpszTemp; pXML->cbEndTag = cbTemp; } @@ -1489,12 +1736,12 @@ int XMLNode::ParseXMLElement(void *pa) exactMemory(d); return TRUE; - // If we found a clear (unformatted) token + // If we found a clear (unformatted) token case eTokenClear: // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; + if (maybeAddTxT(pXML, token.pStr)) return FALSE; if (parseClearTag(pXML, token.pClr)) return FALSE; - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; break; default: @@ -1502,21 +1749,19 @@ int XMLNode::ParseXMLElement(void *pa) } break; - // If we are inside a tag definition we need to search for attributes + // If we are inside a tag definition we need to search for attributes case eInsideTag: // Check what part of the attribute (name, equals, value) we // are looking for. - switch(attrib) - { - // If we are looking for a new attribute + switch (attrib) { + // If we are looking for a new attribute case eAttribName: // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'attribute' + switch (xtype) { + // If the current type is text... + // Eg. 'attribute' case eTokenText: // Cache the token then indicate that we are next to // look for the equals @@ -1525,22 +1770,22 @@ int XMLNode::ParseXMLElement(void *pa) attrib = eAttribEquals; break; - // If we found a closing tag... - // Eg. '>' + // If we found a closing tag... + // Eg. '>' case eTokenCloseTag: // We are now outside the tag status = eOutsideTag; - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; break; - // If we found a short hand '/>' closing tag then we can - // return to the caller + // If we found a short hand '/>' closing tag then we can + // return to the caller case eTokenShortHandClose: exactMemory(d); - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; return TRUE; - // Errors... + // Errors... case eTokenQuotedText: /* '"SomeText"' */ case eTokenTagStart: /* '<' */ case eTokenTagEnd: /* 'error = eXMLErrorUnexpectedToken; return FALSE; - default: break; + default: + break; } break; - // If we are looking for an equals + // If we are looking for an equals case eAttribEquals: // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'Attribute AnotherAttribute' + switch (xtype) { + // If the current type is text... + // Eg. 'Attribute AnotherAttribute' case eTokenText: // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), NULL); // Cache the token then indicate. We are next to // look for the equals attribute lpszTemp = token.pStr; cbTemp = cbToken; break; - // If we found a closing tag 'Attribute >' or a short hand - // closing tag 'Attribute />' + // If we found a closing tag 'Attribute >' or a short hand + // closing tag 'Attribute />' case eTokenShortHandClose: case eTokenCloseTag: // If we are a declaration element 'lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; if (d->isDeclaration && - (lpszTemp[cbTemp-1]) == _CXML('?')) - { + (lpszTemp[cbTemp-1]) == _CXML('?')) { cbTemp--; - if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose; + if (d->pParent && d->pParent->pParent) { + xtype = eTokenShortHandClose; + } } - if (cbTemp) - { + if (cbTemp) { // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), NULL); } // If this is the end of the tag then return to the caller - if (xtype == eTokenShortHandClose) - { + if (xtype == eTokenShortHandClose) { exactMemory(d); return TRUE; } @@ -1601,15 +1847,15 @@ int XMLNode::ParseXMLElement(void *pa) status = eOutsideTag; break; - // If we found the equals token... - // Eg. 'Attribute =' + // If we found the equals token... + // Eg. 'Attribute =' case eTokenEquals: // Indicate that we next need to search for the value // for the attribute attrib = eAttribValue; break; - // Errors... + // Errors... case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ case eTokenTagStart: /* 'Attribute <' */ case eTokenTagEnd: /* 'Attribute error = eXMLErrorUnexpectedToken; return FALSE; - default: break; + default: + break; } break; - // If we are looking for an attribute value + // If we are looking for an attribute value case eAttribValue: // Check what the current token type is - switch(xtype) - { - // If the current type is text or quoted text... - // Eg. 'Attribute = "Value"' or 'Attribute = Value' or - // 'Attribute = 'Value''. + switch (xtype) { + // If the current type is text or quoted text... + // Eg. 'Attribute = "Value"' or 'Attribute = Value' or + // 'Attribute = 'Value''. case eTokenText: case eTokenQuotedText: // If we are a declaration element 'isDeclaration && - (token.pStr[cbToken-1]) == _CXML('?')) - { + (token.pStr[cbToken-1]) == _CXML('?')) { cbToken--; } - if (cbTemp) - { + if (cbTemp) { // Add the valued attribute to the list - if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; } - XMLSTR attrVal=(XMLSTR)token.pStr; - if (attrVal) - { - attrVal=fromXMLString(attrVal,cbToken,pXML); + if (xtype == eTokenQuotedText) { + token.pStr++; + cbToken -= 2; + } + XMLSTR attrVal = (XMLSTR)token.pStr; + if (attrVal) { + attrVal = fromXMLString(attrVal, cbToken, pXML); if (!attrVal) return FALSE; } - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), + attrVal); } // Indicate we are searching for a new attribute attrib = eAttribName; break; - // Errors... + // Errors... case eTokenTagStart: /* 'Attr = <' */ case eTokenTagEnd: /* 'Attr = ' */ @@ -1667,109 +1915,104 @@ int XMLNode::ParseXMLElement(void *pa) pXML->error = eXMLErrorUnexpectedToken; return FALSE; break; - default: break; + default: + break; } } } } // If we failed to obtain the next token - else - { - if ((!d->isDeclaration)&&(d->pParent)) - { + else { + if ((!d->isDeclaration) && (d->pParent)) { #ifdef STRICT_PARSING - pXML->error=eXMLErrorUnmatchedEndTag; + pXML->error = eXMLErrorUnmatchedEndTag; #else - pXML->error=eXMLErrorMissingEndTag; + pXML->error = eXMLErrorMissingEndTag; #endif - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->nIndexMissigEndTag = pXML->nIndex; } - maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex); + maybeAddTxT(pXML, pXML->lpXML + pXML->nIndex); return FALSE; } } } // Count the number of lines and columns in an XML string. -static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) -{ +static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, + XMLResults *pResults) { XMLCHAR ch; assert(lpXML); assert(pResults); - struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; + struct XML xml = { lpXML, lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, + TRUE }; pResults->nLine = 1; pResults->nColumn = 1; - while (xml.nIndexnColumn++; - else - { + else { pResults->nLine++; - pResults->nColumn=1; + pResults->nColumn = 1; } } } // Parse XML and return the root element. -XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) -{ - if (!lpszXML) - { - if (pResults) - { - pResults->error=eXMLErrorNoElements; - pResults->nLine=0; - pResults->nColumn=0; +XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, + XMLResults *pResults) { + if (!lpszXML) { + if (pResults) { + pResults->error = eXMLErrorNoElements; + pResults->nLine = 0; + pResults->nColumn = 0; } return emptyXMLNode; } - XMLNode xnode(NULL,NULL,FALSE); - struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; + XMLNode xnode(NULL, NULL, FALSE); + struct XML xml = { lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, + TRUE }; // Create header element xnode.ParseXMLElement(&xml); enum XMLError error = xml.error; - if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound; - if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node + if (!xnode.nChildNode()) error = eXMLErrorNoXMLTagFound; + if ((xnode.nChildNode() == 1) && (xnode.nElement() == 1)) { + xnode = xnode.getChildNode(); // skip the empty node + } // If no error occurred - if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound)) - { - XMLCSTR name=xnode.getName(); - if (tag&&(*tag)&&((!name)||(xstricmp(name,tag)))) - { - xnode=xnode.getChildNode(tag); - if (xnode.isEmpty()) - { - if (pResults) - { - pResults->error=eXMLErrorFirstTagNotFound; - pResults->nLine=0; - pResults->nColumn=0; + if ((error == eXMLErrorNone) || (error == eXMLErrorMissingEndTag) || + (error == eXMLErrorNoXMLTagFound)) { + XMLCSTR name = xnode.getName(); + if (tag && (*tag) && ((!name) || (xstricmp(name, tag)))) { + xnode = xnode.getChildNode(tag); + if (xnode.isEmpty()) { + if (pResults) { + pResults->error = eXMLErrorFirstTagNotFound; + pResults->nLine = 0; + pResults->nColumn = 0; } return emptyXMLNode; } } - } else - { + } else { // Cleanup: this will destroy all the nodes xnode = emptyXMLNode; } // If we have been given somewhere to place results - if (pResults) - { + if (pResults) { pResults->error = error; // If we have an error - if (error!=eXMLErrorNone) - { - if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag; + if (error != eXMLErrorNone) { + if (error == eXMLErrorMissingEndTag) { + xml.nIndex = xml.nIndexMissigEndTag; + } // Find which line and column it starts on. CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); } @@ -1777,72 +2020,95 @@ XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) return xnode; } -XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) -{ - if (pResults) { pResults->nLine=0; pResults->nColumn=0; } - FILE *f=xfopen(filename,_CXML("rb")); - if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } - fseek(f,0,SEEK_END); - int l=ftell(f),headerSz=0; - if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; } - fseek(f,0,SEEK_SET); - unsigned char *buf=(unsigned char*)malloc(l+4); - l=fread(buf,1,l,f); +XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) { + if (pResults) { + pResults->nLine = 0; + pResults->nColumn = 0; + } + FILE *f = xfopen(filename, _CXML("rb")); + if (f == NULL) { + if (pResults) pResults->error = eXMLErrorFileNotFound; + return emptyXMLNode; + } + fseek(f, 0, SEEK_END); + int l = ftell(f), headerSz = 0; + if (!l) { + if (pResults) pResults->error = eXMLErrorEmpty; + fclose(f); + return emptyXMLNode; + } + fseek(f, 0, SEEK_SET); + unsigned char *buf = (unsigned char*)malloc(l + 4); + l = fread(buf, 1, l, f); fclose(f); - buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0; + buf[l] = 0; + buf[l+1] = 0; + buf[l+2] = 0; + buf[l+3] = 0; #ifdef _XMLWIDECHAR - if (guessWideCharChars) - { - if (!myIsTextWideChar(buf,l)) - { - XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy; - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; } - XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; + if (guessWideCharChars) { + if (!myIsTextWideChar(buf, l)) { + XMLNode::XMLCharEncoding ce = XMLNode::char_encoding_legacy; + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) { + headerSz = 3; + ce = XMLNode::char_encoding_UTF8; + } + XMLSTR b2 = myMultiByteToWideChar((const char*)(buf + headerSz), ce); + free(buf); + buf = (unsigned char*)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2; } } #else - if (guessWideCharChars) - { - if (myIsTextWideChar(buf,l)) - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; - char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz)); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; + if (guessWideCharChars) { + if (myIsTextWideChar(buf, l)) { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2; + char *b2 = myWideCharToMultiByte((const wchar_t*)(buf + headerSz)); + free(buf); + buf = (unsigned char*)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) { + headerSz = 3; + } } } #endif - if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } - XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); + if (!buf) { + if (pResults) pResults->error = eXMLErrorCharConversionError; + return emptyXMLNode; + } + XMLNode x = parseString((XMLSTR)(buf + headerSz), tag, pResults); free(buf); return x; } -static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; } +static inline void charmemset(XMLSTR dest, XMLCHAR c, int l) { + while (l--) *(dest++) = c; +} // private: // Creates an user friendly XML string from a given element with // appropriate white space and carriage returns. // // This recurses through all subnodes then adds contents of the nodes to the // string. -int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat) -{ +int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, + int nFormat) { int nResult = 0; - int cb=nFormat<0?0:nFormat; + int cb = nFormat < 0 ? 0 : nFormat; int cbElement; - int nChildFormat=-1; - int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear; - int i,j; - if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2; + int nChildFormat = -1; + int nElementI = pEntry->nChild + pEntry->nText + pEntry->nClear; + int i, j; + if ((nFormat >= 0) && (nElementI == 1) && (pEntry->nText == 1) && + (!pEntry->isDeclaration)) { + nFormat = -2; + } assert(pEntry); @@ -1851,47 +2117,43 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // If the element has no name then assume this is the head node. cbElement = (int)LENSTR(pEntry->lpszName); - if (cbElement) - { + if (cbElement) { // "isDeclaration) lpszMarker[nResult++]=_CXML('?'); + lpszMarker[nResult++] = _CXML('<'); + if (pEntry->isDeclaration) lpszMarker[nResult++] = _CXML('?'); xstrcpy(&lpszMarker[nResult], pEntry->lpszName); - nResult+=cbElement; - lpszMarker[nResult++]=_CXML(' '); + nResult += cbElement; + lpszMarker[nResult++] = _CXML(' '); - } else - { - nResult+=cbElement+2+cb; + } else { + nResult += cbElement + 2 + cb; if (pEntry->isDeclaration) nResult++; } // Enumerate attributes and add them to the string - XMLAttribute *pAttr=pEntry->pAttribute; - for (i=0; inAttribute; i++) - { + XMLAttribute *pAttr = pEntry->pAttribute; + for (i = 0; i < pEntry->nAttribute; i++) { // "Attrib cb = (int)LENSTR(pAttr->lpszName); - if (cb) - { + if (cb) { if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName); nResult += cb; // "Attrib=Value " - if (pAttr->lpszValue) - { - cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('='); - lpszMarker[nResult+1]=_CXML('"'); - if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue); - lpszMarker[nResult+cb+2]=_CXML('"'); + if (pAttr->lpszValue) { + cb = (int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); + if (lpszMarker) { + lpszMarker[nResult] = _CXML('='); + lpszMarker[nResult+1] = _CXML('"'); + if (cb) { + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2], + pAttr->lpszValue); + } + lpszMarker[nResult+cb+2] = _CXML('"'); } - nResult+=cb+3; + nResult += cb + 3; } if (lpszMarker) lpszMarker[nResult] = _CXML(' '); nResult++; @@ -1899,27 +2161,22 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma pAttr++; } - if (pEntry->isDeclaration) - { - if (lpszMarker) - { - lpszMarker[nResult-1]=_CXML('?'); - lpszMarker[nResult]=_CXML('>'); + if (pEntry->isDeclaration) { + if (lpszMarker) { + lpszMarker[nResult-1] = _CXML('?'); + lpszMarker[nResult] = _CXML('>'); } nResult++; - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); + if (nFormat != -1) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); nResult++; } } else // If there are child nodes we need to terminate the start tag - if (nElementI) - { - if (lpszMarker) lpszMarker[nResult-1]=_CXML('>'); - if (nFormat>=0) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); + if (nElementI) { + if (lpszMarker) lpszMarker[nResult-1] = _CXML('>'); + if (nFormat >= 0) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); nResult++; } } else nResult--; @@ -1927,145 +2184,137 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // Calculate the child format for when we recurse. This is used to // determine the number of spaces used for prefixes. - if (nFormat!=-1) - { - if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1; - else nChildFormat=nFormat; + if (nFormat != -1) { + if (cbElement && (!pEntry->isDeclaration)) nChildFormat = nFormat + 1; + else nChildFormat = nFormat; } // Enumerate through remaining children - for (i=0; ipOrder[i]; - switch((XMLElementType)(j&3)) - { - // Text nodes - case eNodeText: - { - // "Text" - XMLCSTR pChild=pEntry->pText[j>>2]; - cb = (int)ToXMLStringTool::lengthXMLString(pChild); - if (cb) - { - if (nFormat>=0) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1); - ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild); - lpszMarker[nResult+nFormat+1+cb]=_CXML('\n'); - } - nResult+=cb+nFormat+2; - } else - { - if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); - nResult += cb; + for (i = 0; i < nElementI; i++) { + j = pEntry->pOrder[i]; + switch ((XMLElementType)(j&3)) { + // Text nodes + case eNodeText: { + // "Text" + XMLCSTR pChild = pEntry->pText[j>>2]; + cb = (int)ToXMLStringTool::lengthXMLString(pChild); + if (cb) { + if (nFormat >= 0) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, + nFormat + 1); + ToXMLStringTool::toXMLUnSafe( + &lpszMarker[nResult+nFormat+1], pChild); + lpszMarker[nResult+nFormat+1+cb] = _CXML('\n'); } + nResult += cb + nFormat + 2; + } else { + if (lpszMarker) { + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], + pChild); + } + nResult += cb; } - break; } + break; + } // Clear type nodes - case eNodeClear: - { - XMLClear *pChild=pEntry->pClear+(j>>2); - // "OpenTag" - cb = (int)LENSTR(pChild->lpszOpenTag); - if (cb) - { - if (nFormat!=-1) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1); - xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag); - } - nResult+=cb+nFormat+1; + case eNodeClear: { + XMLClear *pChild = pEntry->pClear + (j >> 2); + // "OpenTag" + cb = (int)LENSTR(pChild->lpszOpenTag); + if (cb) { + if (nFormat != -1) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, + nFormat + 1); + xstrcpy(&lpszMarker[nResult+nFormat+1], + pChild->lpszOpenTag); } - else - { - if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); - nResult += cb; + nResult += cb + nFormat + 1; + } else { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); } - } - - // "OpenTag Value" - cb = (int)LENSTR(pChild->lpszValue); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue); nResult += cb; } - - // "OpenTag Value CloseTag" - cb = (int)LENSTR(pChild->lpszCloseTag); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); - nResult += cb; - } - - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); - nResult++; - } - break; } + // "OpenTag Value" + cb = (int)LENSTR(pChild->lpszValue); + if (cb) { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszValue); + } + nResult += cb; + } + + // "OpenTag Value CloseTag" + cb = (int)LENSTR(pChild->lpszCloseTag); + if (cb) { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); + } + nResult += cb; + } + + if (nFormat != -1) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); + nResult++; + } + break; + } + // Element nodes - case eNodeChild: - { - // Recursively add child nodes - nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat); - break; - } - default: break; + case eNodeChild: { + // Recursively add child nodes + nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, + lpszMarker ? lpszMarker + nResult : 0, + nChildFormat); + break; + } + default: + break; } } - if ((cbElement)&&(!pEntry->isDeclaration)) - { + if ((cbElement) && (!pEntry->isDeclaration)) { // If we have child entries we need to use long XML notation for // closing the element - "blah blah blah" - if (nElementI) - { + if (nElementI) { // "\0" - if (lpszMarker) - { - if (nFormat >=0) - { - charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat); - nResult+=nFormat; + if (lpszMarker) { + if (nFormat >= 0) { + charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat); + nResult += nFormat; } - lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/'); + lpszMarker[nResult] = _CXML('<'); + lpszMarker[nResult+1] = _CXML('/'); nResult += 2; xstrcpy(&lpszMarker[nResult], pEntry->lpszName); nResult += cbElement; - lpszMarker[nResult]=_CXML('>'); + lpszMarker[nResult] = _CXML('>'); if (nFormat == -1) nResult++; - else - { - lpszMarker[nResult+1]=_CXML('\n'); - nResult+=2; + else { + lpszMarker[nResult+1] = _CXML('\n'); + nResult += 2; } - } else - { - if (nFormat>=0) nResult+=cbElement+4+nFormat; - else if (nFormat==-1) nResult+=cbElement+3; - else nResult+=cbElement+4; + } else { + if (nFormat >= 0) nResult += cbElement + 4 + nFormat; + else if (nFormat == -1) nResult += cbElement + 3; + else nResult += cbElement + 4; } - } else - { + } else { // If there are no children we can use shorthand XML notation - // "" // "/>\0" - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>'); - if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n'); + if (lpszMarker) { + lpszMarker[nResult] = _CXML('/'); + lpszMarker[nResult+1] = _CXML('>'); + if (nFormat != -1) lpszMarker[nResult+2] = _CXML('\n'); } nResult += nFormat == -1 ? 2 : 3; } @@ -2085,342 +2334,401 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // NULL terminator. // @return XMLSTR - Allocated XML string, you must free // this with free(). -XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const -{ - if (!d) { if (pnSize) *pnSize=0; return NULL; } +XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const { + if (!d) { + if (pnSize) *pnSize = 0; + return NULL; + } XMLSTR lpszResult = NULL; int cbStr; // Recursively Calculate the size of the XML string - if (!dropWhiteSpace) nFormat=0; + if (!dropWhiteSpace) nFormat = 0; nFormat = nFormat ? 0 : -1; cbStr = CreateXMLStringR(d, 0, nFormat); // Alllocate memory for the XML string + the NULL terminator and // create the recursively XML string. - lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR)); + lpszResult = (XMLSTR)malloc((cbStr + 1) * sizeof(XMLCHAR)); CreateXMLStringR(d, lpszResult, nFormat); - lpszResult[cbStr]=_CXML('\0'); + lpszResult[cbStr] = _CXML('\0'); if (pnSize) *pnSize = cbStr; return lpszResult; } -int XMLNode::detachFromParent(XMLNodeData *d) -{ - XMLNode *pa=d->pParent->pChild; - int i=0; - while (((void*)(pa[i].d))!=((void*)d)) i++; +int XMLNode::detachFromParent(XMLNodeData *d) { + XMLNode *pa = d->pParent->pChild; + int i = 0; + while (((void*)(pa[i].d)) != ((void*)d)) i++; d->pParent->nChild--; - if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode)); - else { free(pa); d->pParent->pChild=NULL; } - return removeOrderElement(d->pParent,eNodeChild,i); + if (d->pParent->nChild) { + memmove(pa + i, pa + i + 1, (d->pParent->nChild - i)*sizeof(XMLNode)); + } else { + free(pa); + d->pParent->pChild = NULL; + } + return removeOrderElement(d->pParent, eNodeChild, i); } -XMLNode::~XMLNode() -{ +XMLNode::~XMLNode() { if (!d) return; d->ref_count--; emptyTheNode(0); } -void XMLNode::deleteNodeContent() -{ +void XMLNode::deleteNodeContent() { if (!d) return; - if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; } + if (d->pParent) { + detachFromParent(d); + d->pParent = NULL; + d->ref_count--; + } emptyTheNode(1); } -void XMLNode::emptyTheNode(char force) -{ - XMLNodeData *dd=d; // warning: must stay this way! - if ((dd->ref_count==0)||force) - { +void XMLNode::emptyTheNode(char force) { + XMLNodeData *dd = d; // warning: must stay this way! + if ((dd->ref_count == 0) || force) { if (d->pParent) detachFromParent(d); int i; XMLNode *pc; - for(i=0; inChild; i++) - { - pc=dd->pChild+i; - pc->d->pParent=NULL; + for (i = 0; i < dd->nChild; i++) { + pc = dd->pChild + i; + pc->d->pParent = NULL; pc->d->ref_count--; pc->emptyTheNode(force); } myFree(dd->pChild); - for(i=0; inText; i++) free((void*)dd->pText[i]); + for (i = 0; i < dd->nText; i++) free((void*)dd->pText[i]); myFree(dd->pText); - for(i=0; inClear; i++) free((void*)dd->pClear[i].lpszValue); + for (i = 0; i < dd->nClear; i++) free((void*)dd->pClear[i].lpszValue); myFree(dd->pClear); - for(i=0; inAttribute; i++) - { + for (i = 0; i < dd->nAttribute; i++) { free((void*)dd->pAttribute[i].lpszName); - if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue); + if (dd->pAttribute[i].lpszValue) { + free((void*)dd->pAttribute[i].lpszValue); + } } myFree(dd->pAttribute); myFree(dd->pOrder); myFree((void*)dd->lpszName); - dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0; - dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL; - dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL; + dd->nChild = 0; + dd->nText = 0; + dd->nClear = 0; + dd->nAttribute = 0; + dd->pChild = NULL; + dd->pText = NULL; + dd->pClear = NULL; + dd->pAttribute = NULL; + dd->pOrder = NULL; + dd->lpszName = NULL; + dd->pParent = NULL; } - if (dd->ref_count==0) - { + if (dd->ref_count == 0) { free(dd); - d=NULL; + d = NULL; } } -XMLNode& XMLNode::operator=( const XMLNode& A ) -{ +XMLNode& XMLNode::operator=( const XMLNode & A ) { // shallow copy - if (this != &A) - { - if (d) { d->ref_count--; emptyTheNode(0); } - d=A.d; + if (this != &A) { + if (d) { + d->ref_count--; + emptyTheNode(0); + } + d = A.d; if (d) (d->ref_count) ++ ; } return *this; } -XMLNode::XMLNode(const XMLNode &A) -{ +XMLNode::XMLNode(const XMLNode &A) { // shallow copy - d=A.d; + d = A.d; if (d) (d->ref_count)++ ; } -XMLNode XMLNode::deepCopy() const -{ +XMLNode XMLNode::deepCopy() const { if (!d) return XMLNode::emptyXMLNode; - XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration); - XMLNodeData *p=x.d; - int n=d->nAttribute; - if (n) - { - p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute)); - while (n--) - { - p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName); - p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue); + XMLNode x(NULL, stringDup(d->lpszName), d->isDeclaration); + XMLNodeData *p = x.d; + int n = d->nAttribute; + if (n) { + p->nAttribute = n; + p->pAttribute = (XMLAttribute*)malloc(n * sizeof(XMLAttribute)); + while (n--) { + p->pAttribute[n].lpszName = stringDup(d->pAttribute[n].lpszName); + p->pAttribute[n].lpszValue = stringDup(d->pAttribute[n].lpszValue); } } - if (d->pOrder) - { - n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n); + if (d->pOrder) { + n = (d->nChild + d->nText + d->nClear) * sizeof(int); + p->pOrder = (int*)malloc(n); + memcpy(p->pOrder, d->pOrder, n); } - n=d->nText; - if (n) - { - p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR)); - while(n--) p->pText[n]=stringDup(d->pText[n]); + n = d->nText; + if (n) { + p->nText = n; + p->pText = (XMLCSTR*)malloc(n * sizeof(XMLCSTR)); + while (n--) p->pText[n] = stringDup(d->pText[n]); } - n=d->nClear; - if (n) - { - p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear)); - while (n--) - { - p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag; - p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag; - p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue); + n = d->nClear; + if (n) { + p->nClear = n; + p->pClear = (XMLClear*)malloc(n * sizeof(XMLClear)); + while (n--) { + p->pClear[n].lpszCloseTag = d->pClear[n].lpszCloseTag; + p->pClear[n].lpszOpenTag = d->pClear[n].lpszOpenTag; + p->pClear[n].lpszValue = stringDup(d->pClear[n].lpszValue); } } - n=d->nChild; - if (n) - { - p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode)); - while (n--) - { - p->pChild[n].d=NULL; - p->pChild[n]=d->pChild[n].deepCopy(); - p->pChild[n].d->pParent=p; + n = d->nChild; + if (n) { + p->nChild = n; + p->pChild = (XMLNode*)malloc(n * sizeof(XMLNode)); + while (n--) { + p->pChild[n].d = NULL; + p->pChild[n] = d->pChild[n].deepCopy(); + p->pChild[n].d->pParent = p; } } return x; } -XMLNode XMLNode::addChild(XMLNode childNode, int pos) -{ - XMLNodeData *dc=childNode.d; - if ((!dc)||(!d)) return childNode; - if (!dc->lpszName) - { +XMLNode XMLNode::addChild(XMLNode childNode, int pos) { + XMLNodeData *dc = childNode.d; + if ((!dc) || (!d)) return childNode; + if (!dc->lpszName) { // this is a root node: todo: correct fix - int j=pos; - while (dc->nChild) - { - addChild(dc->pChild[0],j); - if (pos>=0) j++; + int j = pos; + while (dc->nChild) { + addChild(dc->pChild[0], j); + if (pos >= 0) j++; } return childNode; } - if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++; - dc->pParent=d; + if (dc->pParent) { + if ((detachFromParent(dc) <= pos) && (dc->pParent == d)) pos--; + } else dc->ref_count++; + dc->pParent = d; // int nc=d->nChild; // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); - d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=dc; + d->pChild = (XMLNode*)addToOrder(0, &pos, d->nChild, d->pChild, + sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = dc; d->nChild++; return childNode; } -void XMLNode::deleteAttribute(int i) -{ - if ((!d)||(i<0)||(i>=d->nAttribute)) return; +void XMLNode::deleteAttribute(int i) { + if ((!d) || (i < 0) || (i >= d->nAttribute)) return; d->nAttribute--; - XMLAttribute *p=d->pAttribute+i; + XMLAttribute *p = d->pAttribute + i; free((void*)p->lpszName); if (p->lpszValue) free((void*)p->lpszValue); - if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; } + if (d->nAttribute) { + memmove(p, p + 1, (d->nAttribute - i)*sizeof(XMLAttribute)); + } + else { + free(p); + d->pAttribute = NULL; + } } -void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); } -void XMLNode::deleteAttribute(XMLCSTR lpszName) -{ - int j=0; - getAttribute(lpszName,&j); - if (j) deleteAttribute(j-1); +void XMLNode::deleteAttribute(XMLAttribute *a) { + if (a) deleteAttribute(a->lpszName); +} +void XMLNode::deleteAttribute(XMLCSTR lpszName) { + int j = 0; + getAttribute(lpszName, &j); + if (j) deleteAttribute(j - 1); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; } - if (i>=d->nAttribute) - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, int i) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + if (lpszNewName) free(lpszNewName); return NULL; } - XMLAttribute *p=d->pAttribute+i; - if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue); - p->lpszValue=lpszNewValue; - if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; }; + if (i >= d->nAttribute) { + if (lpszNewName) return addAttribute_WOSD(lpszNewName, lpszNewValue); + return NULL; + } + XMLAttribute *p = d->pAttribute + i; + if (p->lpszValue && p->lpszValue != lpszNewValue) { + free((void*)p->lpszValue); + } + p->lpszValue = lpszNewValue; + if (lpszNewName && p->lpszName != lpszNewName) { + free((void*)p->lpszName); + p->lpszName = lpszNewName; + }; return p; } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) -{ - if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName); - return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue); +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + if (oldAttribute) { + return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue, + (XMLSTR)newAttribute->lpszName, + oldAttribute->lpszName); + } + return addAttribute_WOSD((XMLSTR)newAttribute->lpszName, + (XMLSTR)newAttribute->lpszValue); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName) -{ - int j=0; - getAttribute(lpszOldName,&j); - if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1); - else - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); - else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue); +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, + XMLCSTR lpszOldName) { + int j = 0; + getAttribute(lpszOldName, &j); + if (j) return updateAttribute_WOSD(lpszNewValue, lpszNewName, j - 1); + else { + if (lpszNewName) { + return addAttribute_WOSD(lpszNewName, lpszNewValue); + } else { + return addAttribute_WOSD(stringDup(lpszOldName), lpszNewValue); + } } } -int XMLNode::indexText(XMLCSTR lpszValue) const -{ +int XMLNode::indexText(XMLCSTR lpszValue) const { if (!d) return -1; - int i,l=d->nText; - if (!lpszValue) { if (l) return 0; return -1; } - XMLCSTR *p=d->pText; - for (i=0; inText; + if (!lpszValue) { + if (l) return 0; + return -1; + } + XMLCSTR *p = d->pText; + for (i = 0; i < l; i++) if (lpszValue == p[i]) return i; return -1; } -void XMLNode::deleteText(int i) -{ - if ((!d)||(i<0)||(i>=d->nText)) return; +void XMLNode::deleteText(int i) { + if ((!d) || (i < 0) || (i >= d->nText)) return; d->nText--; - XMLCSTR *p=d->pText+i; + XMLCSTR *p = d->pText + i; free((void*)*p); - if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; } - removeOrderElement(d,eNodeText,i); + if (d->nText) memmove(p, p + 1, (d->nText - i)*sizeof(XMLCSTR)); + else { + free(p); + d->pText = NULL; + } + removeOrderElement(d, eNodeText, i); } -void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); } +void XMLNode::deleteText(XMLCSTR lpszValue) { + deleteText(indexText(lpszValue)); +} -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - if (i>=d->nText) return addText_WOSD(lpszNewValue); - XMLCSTR *p=d->pText+i; - if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; } +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + return NULL; + } + if (i >= d->nText) return addText_WOSD(lpszNewValue); + XMLCSTR *p = d->pText + i; + if (*p != lpszNewValue) { + free((void*)*p); + *p = lpszNewValue; + } return lpszNewValue; } -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - int i=indexText(lpszOldValue); - if (i>=0) return updateText_WOSD(lpszNewValue,i); +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + return NULL; + } + int i = indexText(lpszOldValue); + if (i >= 0) return updateText_WOSD(lpszNewValue, i); return addText_WOSD(lpszNewValue); } -void XMLNode::deleteClear(int i) -{ - if ((!d)||(i<0)||(i>=d->nClear)) return; +void XMLNode::deleteClear(int i) { + if ((!d) || (i < 0) || (i >= d->nClear)) return; d->nClear--; - XMLClear *p=d->pClear+i; + XMLClear *p = d->pClear + i; free((void*)p->lpszValue); - if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; } - removeOrderElement(d,eNodeClear,i); + if (d->nClear) memmove(p, p + 1, (d->nClear - i)*sizeof(XMLClear)); + else { + free(p); + d->pClear = NULL; + } + removeOrderElement(d, eNodeClear, i); } -int XMLNode::indexClear(XMLCSTR lpszValue) const -{ +int XMLNode::indexClear(XMLCSTR lpszValue) const { if (!d) return -1; - int i,l=d->nClear; - if (!lpszValue) { if (l) return 0; return -1; } - XMLClear *p=d->pClear; - for (i=0; inClear; + if (!lpszValue) { + if (l) return 0; + return -1; + } + XMLClear *p = d->pClear; + for (i = 0; i < l; i++) if (lpszValue == p[i].lpszValue) return i; return -1; } -void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); } -void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); } +void XMLNode::deleteClear(XMLCSTR lpszValue) { + deleteClear(indexClear(lpszValue)); +} +void XMLNode::deleteClear(XMLClear *a) { + if (a) deleteClear(a->lpszValue); +} -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - if (i>=d->nClear) return addClear_WOSD(lpszNewContent); - XMLClear *p=d->pClear+i; - if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; } +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) { + if (!d) { + if (lpszNewContent) free(lpszNewContent); + return NULL; + } + if (i >= d->nClear) return addClear_WOSD(lpszNewContent); + XMLClear *p = d->pClear + i; + if (lpszNewContent != p->lpszValue) { + free((void*)p->lpszValue); + p->lpszValue = lpszNewContent; + } return p; } -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - int i=indexClear(lpszOldValue); - if (i>=0) return updateClear_WOSD(lpszNewContent,i); +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, + XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewContent) free(lpszNewContent); + return NULL; + } + int i = indexClear(lpszOldValue); + if (i >= 0) return updateClear_WOSD(lpszNewContent, i); return addClear_WOSD(lpszNewContent); } -XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP) -{ - if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue); +XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP, XMLClear *oldP) { + if (oldP) { + return updateClear_WOSD((XMLSTR)newP->lpszValue, + (XMLSTR)oldP->lpszValue); + } return NULL; } -int XMLNode::nChildNode(XMLCSTR name) const -{ +int XMLNode::nChildNode(XMLCSTR name) const { if (!d) return 0; - int i,j=0,n=d->nChild; - XMLNode *pc=d->pChild; - for (i=0; id->lpszName, name)==0) j++; + int i, j = 0, n = d->nChild; + XMLNode *pc = d->pChild; + for (i = 0; i < n; i++) { + if (xstricmp(pc->d->lpszName, name) == 0) j++; pc++; } return j; } -XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const -{ +XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const { if (!d) return emptyXMLNode; - int i=0,n=d->nChild; - if (j) i=*j; - XMLNode *pc=d->pChild+i; - for (; id->lpszName, name)) - { - if (j) *j=i+1; + int i = 0, n = d->nChild; + if (j) i = *j; + XMLNode *pc = d->pChild + i; + for (; i < n; i++) { + if (!xstricmp(pc->d->lpszName, name)) { + if (j) *j = i + 1; return *pc; } pc++; @@ -2428,117 +2736,149 @@ XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const return emptyXMLNode; } -XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const -{ +XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const { if (!d) return emptyXMLNode; - if (j>=0) - { - int i=0; - while (j-->0) getChildNode(name,&i); - return getChildNode(name,&i); + if (j >= 0) { + int i = 0; + while (j-- > 0) getChildNode(name, &i); + return getChildNode(name, &i); } - int i=d->nChild; - while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break; - if (i<0) return emptyXMLNode; + int i = d->nChild; + while (i--) if (!xstricmp(name, d->pChild[i].d->lpszName)) break; + if (i < 0) return emptyXMLNode; return getChildNode(i); } -XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) -{ - XMLSTR path=stringDup(_path); - XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep); +XMLNode* XMLNode::getChildNodePtr(XMLCSTR name, int *j) const { + if (!d) return &emptyXMLNode; + int i = 0, n = d->nChild; + int foundIndex = 0; + XMLNode *pc = d->pChild + i; + for (; i < n; i++) { + if (!xstricmp(pc->d->lpszName, name)) { + if (*j == foundIndex) return pc; + foundIndex++; + } + pc++; + } + return &emptyXMLNode; +} + +XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, + XMLCHAR sep) { + XMLSTR path = stringDup(_path); + XMLNode x = getChildNodeByPathNonConst(path, createMissing, sep); if (path) free(path); return x; } -XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep) -{ - if ((!path)||(!(*path))) return *this; - XMLNode xn,xbase=*this; - XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0; - tend1=xstrstr(path,sepString); - while(tend1) - { - *tend1=0; - xn=xbase.getChildNode(path); - if (xn.isEmpty()) - { - if (createIfMissing) xn=xbase.addChild(path); - else { *tend1=sep; return XMLNode::emptyXMLNode; } +XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, + char createIfMissing, XMLCHAR sep) { + if ((!path) || (!(*path))) return *this; + XMLNode xn, xbase = *this; + XMLCHAR *tend1, sepString[2]; + sepString[0] = sep; + sepString[1] = 0; + tend1 = xstrstr(path, sepString); + while (tend1) { + *tend1 = 0; + xn = xbase.getChildNode(path); + if (xn.isEmpty()) { + if (createIfMissing) xn = xbase.addChild(path); + else { + *tend1 = sep; + return XMLNode::emptyXMLNode; + } } - *tend1=sep; - xbase=xn; - path=tend1+1; - tend1=xstrstr(path,sepString); + *tend1 = sep; + xbase = xn; + path = tend1 + 1; + tend1 = xstrstr(path, sepString); } - xn=xbase.getChildNode(path); - if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path); + xn = xbase.getChildNode(path); + if (xn.isEmpty() && createIfMissing) xn = xbase.addChild(path); return xn; } -XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); } -XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); } -XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); } -XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); } -XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const -{ - if ((!d)||(!x.d)) return -1; - XMLNodeData *dd=x.d; - XMLNode *pc=d->pChild; - int i=d->nChild; - while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild); +XMLElementPosition XMLNode::positionOfText (int i) const { + if (i >= d->nText ) i = d->nText - 1; + return findPosition(d, i, eNodeText ); +} +XMLElementPosition XMLNode::positionOfClear (int i) const { + if (i >= d->nClear) i = d->nClear - 1; + return findPosition(d, i, eNodeClear); +} +XMLElementPosition XMLNode::positionOfChildNode(int i) const { + if (i >= d->nChild) i = d->nChild - 1; + return findPosition(d, i, eNodeChild); +} +XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { + return positionOfText (indexText (lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { + return positionOfClear(indexClear(lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { + if (a) return positionOfClear(a->lpszValue); + return positionOfClear(); +} +XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const { + if ((!d) || (!x.d)) return -1; + XMLNodeData *dd = x.d; + XMLNode *pc = d->pChild; + int i = d->nChild; + while (i--) if (pc[i].d == dd) return findPosition(d, i, eNodeChild); return -1; } -XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const -{ +XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const { if (!name) return positionOfChildNode(count); - int j=0; - do { getChildNode(name,&j); if (j<0) return -1; } while (count--); - return findPosition(d,j-1,eNodeChild); + int j = 0; + do { + getChildNode(name, &j); + if (j < 0) return -1; + } while (count--); + return findPosition(d, j - 1, eNodeChild); } -XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const -{ - int i=0,j; - if (k) i=*k; - XMLNode x; - XMLCSTR t; - do - { - x=getChildNode(name,&i); - if (!x.isEmpty()) - { - if (attributeValue) - { - j=0; - do - { - t=x.getAttribute(attributeName,&j); - if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; } - } while (t); - } else - { - if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; } - } - } - } while (!x.isEmpty()); - return emptyXMLNode; +XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, XMLCSTR attributeName, + XMLCSTR attributeValue, + int *k) const { + int i = 0, j; + if (k) i = *k; + XMLNode x; + XMLCSTR t; + do { + x = getChildNode(name, &i); + if (!x.isEmpty()) { + if (attributeValue) { + j = 0; + do { + t = x.getAttribute(attributeName, &j); + if (t && (xstricmp(attributeValue, t) == 0)) { + if (k) *k = i; + return x; + } + } while (t); + } else { + if (x.isAttributeSet(attributeName)) { + if (k) *k = i; + return x; + } + } + } + } while (!x.isEmpty()); + return emptyXMLNode; } // Find an attribute on an node. -XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const -{ +XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const { if (!d) return NULL; - int i=0,n=d->nAttribute; - if (j) i=*j; - XMLAttribute *pAttr=d->pAttribute+i; - for (; ilpszName, lpszAttrib)==0) - { - if (j) *j=i+1; + int i = 0, n = d->nAttribute; + if (j) i = *j; + XMLAttribute *pAttr = d->pAttribute + i; + for (; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { + if (j) *j = i + 1; return pAttr->lpszValue; } pAttr++; @@ -2546,15 +2886,12 @@ XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const return NULL; } -char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const -{ +char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const { if (!d) return FALSE; - int i,n=d->nAttribute; - XMLAttribute *pAttr=d->pAttribute; - for (i=0; ilpszName, lpszAttrib)==0) - { + int i, n = d->nAttribute; + XMLAttribute *pAttr = d->pAttribute; + for (i = 0; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { return TRUE; } pAttr++; @@ -2562,159 +2899,283 @@ char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const return FALSE; } -XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const -{ +XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const { if (!d) return NULL; - int i=0; - while (j-->0) getAttribute(name,&i); - return getAttribute(name,&i); + int i = 0; + while (j-- > 0) getAttribute(name, &i); + return getAttribute(name, &i); } -XMLNodeContents XMLNode::enumContents(int i) const -{ +XMLNodeContents XMLNode::enumContents(int i) const { XMLNodeContents c; - if (!d) { c.etype=eNodeNULL; return c; } - if (inAttribute) - { - c.etype=eNodeAttribute; - c.attrib=d->pAttribute[i]; + if (!d) { + c.etype = eNodeNULL; return c; } - i-=d->nAttribute; - c.etype=(XMLElementType)(d->pOrder[i]&3); - i=(d->pOrder[i])>>2; - switch (c.etype) - { - case eNodeChild: c.child = d->pChild[i]; break; - case eNodeText: c.text = d->pText[i]; break; - case eNodeClear: c.clear = d->pClear[i]; break; - default: break; + if (i < d->nAttribute) { + c.etype = eNodeAttribute; + c.attrib = d->pAttribute[i]; + return c; + } + i -= d->nAttribute; + c.etype = (XMLElementType)(d->pOrder[i] & 3); + i = (d->pOrder[i]) >> 2; + switch (c.etype) { + case eNodeChild: + c.child = d->pChild[i]; + break; + case eNodeText: + c.text = d->pText[i]; + break; + case eNodeClear: + c.clear = d->pClear[i]; + break; + default: + break; } return c; } -XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; } -int XMLNode::nText() const { if (!d) return 0; return d->nText; } -int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; } -int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; } -int XMLNode::nClear() const { if (!d) return 0; return d->nClear; } -int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; } -XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; } -XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; } -XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; } -XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; } -XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; } -XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; } -XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); } -char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; } -char XMLNode::isEmpty ( ) const { return (d==NULL); } -XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; } +XMLCSTR XMLNode::getName() const { + if (!d) return NULL; + return d->lpszName; +} +int XMLNode::nText() const { + if (!d) return 0; + return d->nText; +} +int XMLNode::nChildNode() const { + if (!d) return 0; + return d->nChild; +} +int XMLNode::nAttribute() const { + if (!d) return 0; + return d->nAttribute; +} +int XMLNode::nClear() const { + if (!d) return 0; + return d->nClear; +} +int XMLNode::nElement() const { + if (!d) return 0; + return d->nAttribute + d->nChild + d->nText + d->nClear; +} +XMLClear XMLNode::getClear (int i) const { + if ((!d) || (i >= d->nClear )) return emptyXMLClear; + return d->pClear[i]; +} +XMLAttribute XMLNode::getAttribute (int i) const { + if ((!d) || (i >= d->nAttribute)) return emptyXMLAttribute; + return d->pAttribute[i]; +} +XMLCSTR XMLNode::getAttributeName (int i) const { + if ((!d) || (i >= d->nAttribute)) return NULL; + return d->pAttribute[i].lpszName; +} +XMLCSTR XMLNode::getAttributeValue(int i) const { + if ((!d) || (i >= d->nAttribute)) return NULL; + return d->pAttribute[i].lpszValue; +} +XMLCSTR XMLNode::getText (int i) const { + if ((!d) || (i >= d->nText )) return NULL; + return d->pText[i]; +} +XMLNode XMLNode::getChildNode (int i) const { + if ((!d) || (i >= d->nChild )) return emptyXMLNode; + return d->pChild[i]; +} +XMLNode XMLNode::getParentNode ( ) const { + if ((!d) || (!d->pParent )) return emptyXMLNode; + return XMLNode(d->pParent); +} +char XMLNode::isDeclaration ( ) const { + if (!d) return 0; + return d->isDeclaration; +} +char XMLNode::isEmpty ( ) const { + return (d == NULL); +} +XMLNode XMLNode::emptyNode ( ) { + return XMLNode::emptyXMLNode; +} -XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); } -XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,lpszName,isDeclaration,pos); } -XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) - { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); } -XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) - { return addAttribute_priv(0,lpszName,lpszValuev); } -XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,stringDup(lpszValue),pos); } -XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,lpszValue,pos); } -XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); } -XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); } -XMLCSTR XMLNode::updateName(XMLCSTR lpszName) - { return updateName_WOSD(stringDup(lpszName)); } -XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) - { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) - { return updateText_WOSD(stringDup(lpszNewValue),i); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) - { return updateClear_WOSD(stringDup(lpszNewContent),i); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP) - { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); } +XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, stringDup(lpszName), isDeclaration, pos); +} +XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, lpszName, isDeclaration, pos); +} +XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) { + return addAttribute_priv(0, stringDup(lpszName), stringDup(lpszValue)); +} +XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) { + return addAttribute_priv(0, lpszName, lpszValuev); +} +XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, stringDup(lpszValue), pos); +} +XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, lpszValue, pos); +} +XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, stringDup(lpszValue), lpszOpen, lpszClose, pos); +} +XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, lpszValue, lpszOpen, lpszClose, pos); +} +XMLCSTR XMLNode::updateName(XMLCSTR lpszName) { + return updateName_WOSD(stringDup(lpszName)); +} +XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + return updateAttribute_WOSD(stringDup(newAttribute->lpszValue), + stringDup(newAttribute->lpszName), + oldAttribute->lpszName); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, int i) { + return updateAttribute_WOSD(stringDup(lpszNewValue), + stringDup(lpszNewName), i); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, + XMLCSTR lpszOldName) { + return updateAttribute_WOSD(stringDup(lpszNewValue), + stringDup(lpszNewName), lpszOldName); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) { + return updateText_WOSD(stringDup(lpszNewValue), i); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateText_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) { + return updateClear_WOSD(stringDup(lpszNewContent), i); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateClear_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLClear *newP, XMLClear *oldP) { + return updateClear_WOSD(stringDup(newP->lpszValue), oldP->lpszValue); +} -char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars, - char _dropWhiteSpace, char _removeCommentsInMiddleOfText) -{ - guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText; +char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, + char _guessWideCharChars, + char _dropWhiteSpace, + char _removeCommentsInMiddleOfText) { + guessWideCharChars = _guessWideCharChars; + dropWhiteSpace = _dropWhiteSpace; + removeCommentsInMiddleOfText = _removeCommentsInMiddleOfText; #ifdef _XMLWIDECHAR - if (_characterEncoding) characterEncoding=_characterEncoding; + if (_characterEncoding) characterEncoding = _characterEncoding; #else - switch(_characterEncoding) - { - case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break; - case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break; - case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break; - case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break; + switch (_characterEncoding) { + case char_encoding_UTF8: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_utf8ByteTable; + break; + case char_encoding_legacy: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_legacyByteTable; + break; + case char_encoding_ShiftJIS: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_sjisByteTable; + break; + case char_encoding_GB2312: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gb2312ByteTable; + break; case char_encoding_Big5: - case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break; - default: return 1; + case char_encoding_GBK: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gbk_big5_ByteTable; + break; + default: + return 1; } #endif return 0; } -XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute) -{ +XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf, int l, + char useXMLEncodingAttribute) { #ifdef _XMLWIDECHAR return (XMLCharEncoding)0; #else - if (l<25) return (XMLCharEncoding)0; - if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0; - unsigned char *b=(unsigned char*)buf; - if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8; + if (l < 25) return (XMLCharEncoding)0; + if (guessWideCharChars && (myIsTextWideChar(buf, l))) { + return (XMLCharEncoding)0; + } + unsigned char *b = (unsigned char*)buf; + if ((b[0] == 0xef) && (b[1] == 0xbb) && (b[2] == 0xbf)) { + return char_encoding_UTF8; + } // Match utf-8 model ? - XMLCharEncoding bestGuess=char_encoding_UTF8; - int i=0; - while (i>18 ]; + *(curr++) = base64EncodeTable[(j>>12)&0x3f]; + *(curr++) = base64EncodeTable[(j>> 6)&0x3f]; + *(curr++) = base64EncodeTable[(j )&0x3f]; + if (formatted) { + if (!k) { + *(curr++) = _CXML('\n'); + k = 18; + } + k--; + } } - eLen=inlen-eLen*3; // 0 - 2. - if (eLen==1) - { - *(curr++)=base64EncodeTable[ inbuf[0]>>2 ]; - *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F]; - *(curr++)=base64Fillchar; - *(curr++)=base64Fillchar; - } else if (eLen==2) - { - j=(inbuf[0]<<8)|inbuf[1]; - *(curr++)=base64EncodeTable[ j>>10 ]; - *(curr++)=base64EncodeTable[(j>> 4)&0x3f]; - *(curr++)=base64EncodeTable[(j<< 2)&0x3f]; - *(curr++)=base64Fillchar; + eLen = inlen - eLen * 3; // 0 - 2. + if (eLen == 1) { + *(curr++) = base64EncodeTable[ inbuf[0] >> 2 ]; + *(curr++) = base64EncodeTable[(inbuf[0] << 4) & 0x3F]; + *(curr++) = base64Fillchar; + *(curr++) = base64Fillchar; + } else if (eLen == 2) { + j = (inbuf[0] << 8) | inbuf[1]; + *(curr++) = base64EncodeTable[ j>>10 ]; + *(curr++) = base64EncodeTable[(j>> 4)&0x3f]; + *(curr++) = base64EncodeTable[(j<< 2)&0x3f]; + *(curr++) = base64Fillchar; } - *(curr++)=0; + *(curr++) = 0; return (XMLSTR)buf; } -unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int size=0; +unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + int size = 0; unsigned char c; //skip any extra characters (e.g. newlines or spaces) - while (*data) - { + while (*data) { #ifdef _XMLWIDECHAR - if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } + if (*data > 255) { + if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; + } #endif - c=base64DecodeTable[(unsigned char)(*data)]; - if (c<97) size++; - else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } + c = base64DecodeTable[(unsigned char)(*data)]; + if (c < 97) size++; + else if (c == 98) { + if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; + } data++; } - if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4; - if (size==0) return 0; - do { data--; size--; } while(*data==base64Fillchar); size++; - return (unsigned int)((size*3)/4); + if (xe && (size % 4 != 0)) *xe = eXMLErrorBase64DataSizeIsNotMultipleOf4; + if (size == 0) return 0; + do { + data--; + size--; + } while (*data == base64Fillchar); + size++; + return (unsigned int)((size*3) / 4); } -unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int i=0,p=0; - unsigned char d,c; - for(;;) - { +unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, + int len, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + int i = 0, p = 0; + unsigned char d, c; + for (;;) { #ifdef _XMLWIDECHAR #define BASE64DECODE_READ_NEXT_CHAR(c) \ @@ -2834,58 +3312,82 @@ unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int #endif BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { return 2; } - if (c==96) - { - if (p==(int)len) return 2; - if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; + if (c == 99) { + return 2; + } + if (c == 96) { + if (p == (int)len) return 2; + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; return 1; } BASE64DECODE_READ_NEXT_CHAR(d) - if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; } - buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3)); + if ((d == 99) || (d == 96)) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; + return 0; + } + buf[p++] = (unsigned char)((c << 2) | ((d >> 4) & 0x3)); BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (c==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; + if (c == 99) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (c == 96) return 2; + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; return 0; } - if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf)); + if (c == 96) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((d << 4) & 0xf0) | ((c >> 2) & 0xf)); BASE64DECODE_READ_NEXT_CHAR(d) - if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (d==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; + if (d == 99 ) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (d == 96) return 2; + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; return 0; } - if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((c<<6)&0xc0)|d); + if (d == 96) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((c << 6) & 0xc0) | d); } } #undef BASE64DECODE_READ_NEXT_CHAR -void XMLParserBase64Tool::alloc(int newsize) -{ - if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; } - if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } +void XMLParserBase64Tool::alloc(int newsize) { + if ((!buf) && (newsize)) { + buf = malloc(newsize); + buflen = newsize; + return; + } + if (newsize > buflen) { + buf = realloc(buf, newsize); + buflen = newsize; + } } -unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - unsigned int len=decodeSize(data,xe); - if (outlen) *outlen=len; +unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + unsigned int len = decodeSize(data, xe); + if (outlen) *outlen = len; if (!len) return NULL; - alloc(len+1); - if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; } + alloc(len + 1); + if (!decode(data, (unsigned char*)buf, len, xe)) { + return NULL; + } return (unsigned char*)buf; } diff --git a/ext/mcpat/xmlParser.h b/ext/mcpat/xmlParser.h index e29136cb9..dd43694bb 100644 --- a/ext/mcpat/xmlParser.h +++ b/ext/mcpat/xmlParser.h @@ -42,6 +42,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (c) 2002, Business-Insight + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * Business-Insight * All rights reserved. * @@ -160,33 +161,32 @@ #define XMLDLLENTRY #ifndef XML_NO_WIDE_CHAR #include // to have 'wcsrtombs' for ANSI version - // to have 'mbsrtowcs' for WIDECHAR version +// to have 'mbsrtowcs' for WIDECHAR version #endif #endif // Some common types for char set portable code #ifdef _XMLWIDECHAR - #define _CXML(c) L ## c - #define XMLCSTR const wchar_t * - #define XMLSTR wchar_t * - #define XMLCHAR wchar_t +#define _CXML(c) L ## c +#define XMLCSTR const wchar_t * +#define XMLSTR wchar_t * +#define XMLCHAR wchar_t #else - #define _CXML(c) c - #define XMLCSTR const char * - #define XMLSTR char * - #define XMLCHAR char +#define _CXML(c) c +#define XMLCSTR const char * +#define XMLSTR char * +#define XMLCHAR char #endif #ifndef FALSE - #define FALSE 0 +#define FALSE 0 #endif /* FALSE */ #ifndef TRUE - #define TRUE 1 +#define TRUE 1 #endif /* TRUE */ /// Enumeration for XML parse errors. -typedef enum XMLError -{ +typedef enum XMLError { eXMLErrorNone = 0, eXMLErrorMissingEndTag, eXMLErrorNoXMLTagFound, @@ -213,30 +213,32 @@ typedef enum XMLError /// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents -typedef enum XMLElementType -{ - eNodeChild=0, - eNodeAttribute=1, - eNodeText=2, - eNodeClear=3, - eNodeNULL=4 +typedef enum XMLElementType { + eNodeChild = 0, + eNodeAttribute = 1, + eNodeText = 2, + eNodeClear = 3, + eNodeNULL = 4 } XMLElementType; /// Structure used to obtain error details if the parse fails. -typedef struct XMLResults -{ +typedef struct XMLResults { enum XMLError error; - int nLine,nColumn; + int nLine; + int nColumn; } XMLResults; /// Structure for XML clear (unformatted) node (usually comments) typedef struct XMLClear { - XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag; + XMLCSTR lpszValue; + XMLCSTR lpszOpenTag; + XMLCSTR lpszCloseTag; } XMLClear; /// Structure for XML attribute. typedef struct XMLAttribute { - XMLCSTR lpszName; XMLCSTR lpszValue; + XMLCSTR lpszName; + XMLCSTR lpszValue; } XMLAttribute; /// XMLElementPosition are not interchangeable with simple indexes @@ -256,9 +258,8 @@ struct XMLNodeContents; *
  • XMLNode::openFileHelper
  • *
  • XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)
  • * */ -typedef struct XMLDLLENTRY XMLNode -{ - private: +typedef struct XMLDLLENTRY XMLNode { +private: struct XMLNodeDataTag; @@ -267,7 +268,7 @@ typedef struct XMLDLLENTRY XMLNode /// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode XMLNode(struct XMLNodeDataTag *p); - public: +public: static XMLCSTR getVersion();///< Return the XMLParser library version number /** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string. @@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode * @{ */ /// Parse an XML string and return the root of a XMLNode tree representing the string. - static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL); + static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); /**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is * the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the * "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error. @@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode */ /// Parse an XML file and return the root of a XMLNode tree representing the file. - static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL); + static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); /**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is * the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the * "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error. @@ -301,7 +304,7 @@ typedef struct XMLDLLENTRY XMLNode */ /// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made. - static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL); + static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL); /**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file. * This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each * application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files @@ -322,7 +325,7 @@ typedef struct XMLDLLENTRY XMLNode static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error /// Create an XML string starting from the current XMLNode. - XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const; + XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const; /**< The returned string should be free'd using the "freeXMLString" function. * * If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element @@ -330,8 +333,8 @@ typedef struct XMLDLLENTRY XMLNode /// Save the content of an xmlNode inside a file XMLError writeToFile(XMLCSTR filename, - const char *encoding=NULL, - char nFormat=1) const; + const char *encoding = NULL, + char nFormat = 1) const; /**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns. * If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8". * If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS". @@ -349,14 +352,15 @@ typedef struct XMLDLLENTRY XMLNode XMLNode getChildNode(int i=0) const; ///< return ith child node XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name. XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing) + XMLNode* getChildNodePtr(XMLCSTR name, int *j) const; XMLNode getChildNodeWithAttribute(XMLCSTR tagName, XMLCSTR attributeName, XMLCSTR attributeValue=NULL, int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing) XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path + ///< return the first child node with specific path XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path. + ///< return the first child node with specific path. int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name int nChildNode() const; ///< nbr of child node @@ -418,12 +422,12 @@ typedef struct XMLDLLENTRY XMLNode */ XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added + XMLClear *updateClear(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added /** @} */ @@ -482,12 +486,12 @@ typedef struct XMLDLLENTRY XMLNode XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added + XMLClear *updateClear_WOSD(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added /** @} */ @@ -508,15 +512,14 @@ typedef struct XMLDLLENTRY XMLNode /** @} */ /// Enumeration for XML character encoding. - typedef enum XMLCharEncoding - { - char_encoding_error=0, - char_encoding_UTF8=1, - char_encoding_legacy=2, - char_encoding_ShiftJIS=3, - char_encoding_GB2312=4, - char_encoding_Big5=5, - char_encoding_GBK=6 // this is actually the same as Big5 + typedef enum XMLCharEncoding { + char_encoding_error = 0, + char_encoding_UTF8 = 1, + char_encoding_legacy = 2, + char_encoding_ShiftJIS = 3, + char_encoding_GB2312 = 4, + char_encoding_Big5 = 5, + char_encoding_GBK = 6 // this is actually the same as Big5 } XMLCharEncoding; /** \addtogroup conversions @@ -589,48 +592,46 @@ typedef struct XMLDLLENTRY XMLNode * If an inconsistency in the encoding is detected, then the return value is "0". */ /** @} */ - private: - // these are functions and structures used internally by the XMLNode class (don't bother about them): +private: + // these are functions and structures used internally by the XMLNode class (don't bother about them): - typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete): - { - XMLCSTR lpszName; // Element name (=NULL if root) - int nChild, // Number of child nodes - nText, // Number of text fields - nClear, // Number of Clear fields (comments) - nAttribute; // Number of attributes - char isDeclaration; // Whether node is an XML declaration - '' - struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) - XMLNode *pChild; // Array of child nodes - XMLCSTR *pText; // Array of text fields - XMLClear *pClear; // Array of clear fields - XMLAttribute *pAttribute; // Array of attributes - int *pOrder; // order of the child_nodes,text_fields,clear_fields - int ref_count; // for garbage collection (smart pointers) - } XMLNodeData; - XMLNodeData *d; + typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete): + XMLCSTR lpszName; // Element name (=NULL if root) + int nChild, // Number of child nodes + nText, // Number of text fields + nClear, // Number of Clear fields (comments) + nAttribute; // Number of attributes + char isDeclaration; // Whether node is an XML declaration - '' + struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) + XMLNode *pChild; // Array of child nodes + XMLCSTR *pText; // Array of text fields + XMLClear *pClear; // Array of clear fields + XMLAttribute *pAttribute; // Array of attributes + int *pOrder; // order of the child_nodes,text_fields,clear_fields + int ref_count; // for garbage collection (smart pointers) + } XMLNodeData; + XMLNodeData *d; - char parseClearTag(void *px, void *pa); - char maybeAddTxT(void *pa, XMLCSTR tokenPStr); - int ParseXMLElement(void *pXML); - void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); - int indexText(XMLCSTR lpszValue) const; - int indexClear(XMLCSTR lpszValue) const; - XMLNode addChild_priv(int,XMLSTR,char,int); - XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR); - XMLCSTR addText_priv(int,XMLSTR,int); - XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int); - void emptyTheNode(char force); - static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype); - static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); - static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); - static void exactMemory(XMLNodeData *d); - static int detachFromParent(XMLNodeData *d); + char parseClearTag(void *px, void *pa); + char maybeAddTxT(void *pa, XMLCSTR tokenPStr); + int ParseXMLElement(void *pXML); + void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); + int indexText(XMLCSTR lpszValue) const; + int indexClear(XMLCSTR lpszValue) const; + XMLNode addChild_priv(int, XMLSTR, char, int); + XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR); + XMLCSTR addText_priv(int, XMLSTR, int); + XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int); + void emptyTheNode(char force); + static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype); + static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); + static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); + static void exactMemory(XMLNodeData *d); + static int detachFromParent(XMLNodeData *d); } XMLNode; /// This structure is given by the function XMLNode::enumContents. -typedef struct XMLNodeContents -{ +typedef struct XMLNodeContents { /// This dictates what's the content of the XMLNodeContent enum XMLElementType etype; /**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */ @@ -664,12 +665,12 @@ XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);} * delete them without any trouble. * * @{ */ -XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0); -XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0); -XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0); -XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0); -XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML("")); -XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0')); +XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue=0); +XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue=0); +XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue=0); +XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue=.0); +XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue=_CXML("")); +XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, XMLCHAR defautValue=_CXML('\0')); /** @} */ /** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions. @@ -685,10 +686,9 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0')); * \note If you are creating from scratch an XML file using the provided XMLNode class * you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the * processing job for you during rendering).*/ -typedef struct XMLDLLENTRY ToXMLStringTool -{ +typedef struct XMLDLLENTRY ToXMLStringTool { public: - ToXMLStringTool(): buf(NULL),buflen(0){} + ToXMLStringTool(): buf(NULL), buflen(0){} ~ToXMLStringTool(); void freeBuffer();///