ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
This commit is contained in:
parent
1104199115
commit
0deef376d9
File diff suppressed because it is too large
Load diff
|
@ -1,591 +0,0 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef XML_PARSE_H_
|
||||
#define XML_PARSE_H_
|
||||
|
||||
|
||||
//#ifdef WIN32
|
||||
//#define _CRT_SECURE_NO_DEPRECATE
|
||||
//#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "xmlParser.h"
|
||||
using namespace std;
|
||||
|
||||
/*
|
||||
void myfree(char *t); // {free(t);}
|
||||
ToXMLStringTool tx,tx2;
|
||||
*/
|
||||
//all subnodes at the level of system.core(0-n)
|
||||
//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
|
||||
typedef struct{
|
||||
int prediction_width;
|
||||
char prediction_scheme[20];
|
||||
int predictor_size;
|
||||
int predictor_entries;
|
||||
int local_predictor_size[20];
|
||||
int local_predictor_entries;
|
||||
int global_predictor_entries;
|
||||
int global_predictor_bits;
|
||||
int chooser_predictor_entries;
|
||||
int chooser_predictor_bits;
|
||||
double predictor_accesses;
|
||||
} predictor_systemcore;
|
||||
typedef struct{
|
||||
int number_entries;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
double total_hits;
|
||||
double total_accesses;
|
||||
double total_misses;
|
||||
double conflicts;
|
||||
} itlb_systemcore;
|
||||
typedef struct{
|
||||
//params
|
||||
double icache_config[20];
|
||||
int buffer_sizes[20];
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double read_misses;
|
||||
double replacements;
|
||||
double read_hits;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double miss_buffer_access;
|
||||
double fill_buffer_accesses;
|
||||
double prefetch_buffer_accesses;
|
||||
double prefetch_buffer_writes;
|
||||
double prefetch_buffer_reads;
|
||||
double prefetch_buffer_hits;
|
||||
double conflicts;
|
||||
} icache_systemcore;
|
||||
typedef struct{
|
||||
//params
|
||||
int number_entries;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double write_hits;
|
||||
double read_hits;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double conflicts;
|
||||
} dtlb_systemcore;
|
||||
typedef struct{
|
||||
//params
|
||||
double dcache_config[20];
|
||||
int buffer_sizes[20];
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double read_hits;
|
||||
double write_hits;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double replacements;
|
||||
double write_backs;
|
||||
double miss_buffer_access;
|
||||
double fill_buffer_accesses;
|
||||
double prefetch_buffer_accesses;
|
||||
double prefetch_buffer_writes;
|
||||
double prefetch_buffer_reads;
|
||||
double prefetch_buffer_hits;
|
||||
double wbb_writes;
|
||||
double wbb_reads;
|
||||
double conflicts;
|
||||
} dcache_systemcore;
|
||||
typedef struct{
|
||||
//params
|
||||
int BTB_config[20];
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double read_hits;
|
||||
double write_hits;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double replacements;
|
||||
} BTB_systemcore;
|
||||
typedef struct{
|
||||
//all params at the level of system.core(0-n)
|
||||
int clock_rate;
|
||||
bool opt_local;
|
||||
bool x86;
|
||||
int machine_bits;
|
||||
int virtual_address_width;
|
||||
int physical_address_width;
|
||||
int opcode_width;
|
||||
int micro_opcode_width;
|
||||
int instruction_length;
|
||||
int machine_type;
|
||||
int internal_datapath_width;
|
||||
int number_hardware_threads;
|
||||
int fetch_width;
|
||||
int number_instruction_fetch_ports;
|
||||
int decode_width;
|
||||
int issue_width;
|
||||
int peak_issue_width;
|
||||
int commit_width;
|
||||
int pipelines_per_core[20];
|
||||
int pipeline_depth[20];
|
||||
char FPU[20];
|
||||
char divider_multiplier[20];
|
||||
int ALU_per_core;
|
||||
double FPU_per_core;
|
||||
int MUL_per_core;
|
||||
int instruction_buffer_size;
|
||||
int decoded_stream_buffer_size;
|
||||
int instruction_window_scheme;
|
||||
int instruction_window_size;
|
||||
int fp_instruction_window_size;
|
||||
int ROB_size;
|
||||
int archi_Regs_IRF_size;
|
||||
int archi_Regs_FRF_size;
|
||||
int phy_Regs_IRF_size;
|
||||
int phy_Regs_FRF_size;
|
||||
int rename_scheme;
|
||||
int register_windows_size;
|
||||
char LSU_order[20];
|
||||
int store_buffer_size;
|
||||
int load_buffer_size;
|
||||
int memory_ports;
|
||||
char Dcache_dual_pump[20];
|
||||
int RAS_size;
|
||||
int fp_issue_width;
|
||||
int prediction_width;
|
||||
int number_of_BTB;
|
||||
int number_of_BPT;
|
||||
|
||||
//all stats at the level of system.core(0-n)
|
||||
double total_instructions;
|
||||
double int_instructions;
|
||||
double fp_instructions;
|
||||
double branch_instructions;
|
||||
double branch_mispredictions;
|
||||
double committed_instructions;
|
||||
double committed_int_instructions;
|
||||
double committed_fp_instructions;
|
||||
double load_instructions;
|
||||
double store_instructions;
|
||||
double total_cycles;
|
||||
double idle_cycles;
|
||||
double busy_cycles;
|
||||
double instruction_buffer_reads;
|
||||
double instruction_buffer_write;
|
||||
double ROB_reads;
|
||||
double ROB_writes;
|
||||
double rename_accesses;
|
||||
double fp_rename_accesses;
|
||||
double rename_reads;
|
||||
double rename_writes;
|
||||
double fp_rename_reads;
|
||||
double fp_rename_writes;
|
||||
double inst_window_reads;
|
||||
double inst_window_writes;
|
||||
double inst_window_wakeup_accesses;
|
||||
double inst_window_selections;
|
||||
double fp_inst_window_reads;
|
||||
double fp_inst_window_writes;
|
||||
double fp_inst_window_wakeup_accesses;
|
||||
double fp_inst_window_selections;
|
||||
double archi_int_regfile_reads;
|
||||
double archi_float_regfile_reads;
|
||||
double phy_int_regfile_reads;
|
||||
double phy_float_regfile_reads;
|
||||
double phy_int_regfile_writes;
|
||||
double phy_float_regfile_writes;
|
||||
double archi_int_regfile_writes;
|
||||
double archi_float_regfile_writes;
|
||||
double int_regfile_reads;
|
||||
double float_regfile_reads;
|
||||
double int_regfile_writes;
|
||||
double float_regfile_writes;
|
||||
double windowed_reg_accesses;
|
||||
double windowed_reg_transports;
|
||||
double function_calls;
|
||||
double context_switches;
|
||||
double ialu_accesses;
|
||||
double fpu_accesses;
|
||||
double mul_accesses;
|
||||
double cdb_alu_accesses;
|
||||
double cdb_mul_accesses;
|
||||
double cdb_fpu_accesses;
|
||||
double load_buffer_reads;
|
||||
double load_buffer_writes;
|
||||
double load_buffer_cams;
|
||||
double store_buffer_reads;
|
||||
double store_buffer_writes;
|
||||
double store_buffer_cams;
|
||||
double store_buffer_forwards;
|
||||
double main_memory_access;
|
||||
double main_memory_read;
|
||||
double main_memory_write;
|
||||
double pipeline_duty_cycle;
|
||||
|
||||
double IFU_duty_cycle ;
|
||||
double BR_duty_cycle ;
|
||||
double LSU_duty_cycle ;
|
||||
double MemManU_I_duty_cycle;
|
||||
double MemManU_D_duty_cycle ;
|
||||
double ALU_duty_cycle ;
|
||||
double MUL_duty_cycle ;
|
||||
double FPU_duty_cycle ;
|
||||
double ALU_cdb_duty_cycle ;
|
||||
double MUL_cdb_duty_cycle ;
|
||||
double FPU_cdb_duty_cycle ;
|
||||
|
||||
//all subnodes at the level of system.core(0-n)
|
||||
predictor_systemcore predictor;
|
||||
itlb_systemcore itlb;
|
||||
icache_systemcore icache;
|
||||
dtlb_systemcore dtlb;
|
||||
dcache_systemcore dcache;
|
||||
BTB_systemcore BTB;
|
||||
|
||||
} system_core;
|
||||
typedef struct{
|
||||
//params
|
||||
int Directory_type;
|
||||
double Dir_config[20];
|
||||
int buffer_sizes[20];
|
||||
int clockrate;
|
||||
int ports[20];
|
||||
int device_type;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
char threeD_stack[20];
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double conflicts;
|
||||
double duty_cycle;
|
||||
} system_L1Directory;
|
||||
typedef struct{
|
||||
//params
|
||||
int Directory_type;
|
||||
double Dir_config[20];
|
||||
int buffer_sizes[20];
|
||||
int clockrate;
|
||||
int ports[20];
|
||||
int device_type;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
char threeD_stack[20];
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double conflicts;
|
||||
double duty_cycle;
|
||||
} system_L2Directory;
|
||||
typedef struct{
|
||||
//params
|
||||
double L2_config[20];
|
||||
int clockrate;
|
||||
int ports[20];
|
||||
int device_type;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
char threeD_stack[20];
|
||||
int buffer_sizes[20];
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double read_hits;
|
||||
double write_hits;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double replacements;
|
||||
double write_backs;
|
||||
double miss_buffer_accesses;
|
||||
double fill_buffer_accesses;
|
||||
double prefetch_buffer_accesses;
|
||||
double prefetch_buffer_writes;
|
||||
double prefetch_buffer_reads;
|
||||
double prefetch_buffer_hits;
|
||||
double wbb_writes;
|
||||
double wbb_reads;
|
||||
double conflicts;
|
||||
double duty_cycle;
|
||||
|
||||
bool merged_dir;
|
||||
double homenode_read_accesses;
|
||||
double homenode_write_accesses;
|
||||
double homenode_read_hits;
|
||||
double homenode_write_hits;
|
||||
double homenode_read_misses;
|
||||
double homenode_write_misses;
|
||||
double dir_duty_cycle;
|
||||
} system_L2;
|
||||
typedef struct{
|
||||
//params
|
||||
double L3_config[20];
|
||||
int clockrate;
|
||||
int ports[20];
|
||||
int device_type;
|
||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
||||
char threeD_stack[20];
|
||||
int buffer_sizes[20];
|
||||
//stats
|
||||
double total_accesses;
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double total_hits;
|
||||
double total_misses;
|
||||
double read_hits;
|
||||
double write_hits;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double replacements;
|
||||
double write_backs;
|
||||
double miss_buffer_accesses;
|
||||
double fill_buffer_accesses;
|
||||
double prefetch_buffer_accesses;
|
||||
double prefetch_buffer_writes;
|
||||
double prefetch_buffer_reads;
|
||||
double prefetch_buffer_hits;
|
||||
double wbb_writes;
|
||||
double wbb_reads;
|
||||
double conflicts;
|
||||
double duty_cycle;
|
||||
|
||||
bool merged_dir;
|
||||
double homenode_read_accesses;
|
||||
double homenode_write_accesses;
|
||||
double homenode_read_hits;
|
||||
double homenode_write_hits;
|
||||
double homenode_read_misses;
|
||||
double homenode_write_misses;
|
||||
double dir_duty_cycle;
|
||||
} system_L3;
|
||||
typedef struct{
|
||||
//params
|
||||
int number_of_inputs_of_crossbars;
|
||||
int number_of_outputs_of_crossbars;
|
||||
int flit_bits;
|
||||
int input_buffer_entries_per_port;
|
||||
int ports_of_input_buffer[20];
|
||||
//stats
|
||||
double crossbar_accesses;
|
||||
} xbar0_systemNoC;
|
||||
typedef struct{
|
||||
//params
|
||||
int clockrate;
|
||||
bool type;
|
||||
bool has_global_link;
|
||||
char topology[20];
|
||||
int horizontal_nodes;
|
||||
int vertical_nodes;
|
||||
int link_throughput;
|
||||
int link_latency;
|
||||
int input_ports;
|
||||
int output_ports;
|
||||
int virtual_channel_per_port;
|
||||
int flit_bits;
|
||||
int input_buffer_entries_per_vc;
|
||||
int ports_of_input_buffer[20];
|
||||
int dual_pump;
|
||||
int number_of_crossbars;
|
||||
char crossbar_type[20];
|
||||
char crosspoint_type[20];
|
||||
xbar0_systemNoC xbar0;
|
||||
int arbiter_type;
|
||||
double chip_coverage;
|
||||
//stats
|
||||
double total_accesses;
|
||||
double duty_cycle;
|
||||
double route_over_perc;
|
||||
} system_NoC;
|
||||
typedef struct{
|
||||
//params
|
||||
int mem_tech_node;
|
||||
int device_clock;
|
||||
int peak_transfer_rate;
|
||||
int internal_prefetch_of_DRAM_chip;
|
||||
int capacity_per_channel;
|
||||
int number_ranks;
|
||||
int num_banks_of_DRAM_chip;
|
||||
int Block_width_of_DRAM_chip;
|
||||
int output_width_of_DRAM_chip;
|
||||
int page_size_of_DRAM_chip;
|
||||
int burstlength_of_DRAM_chip;
|
||||
//stats
|
||||
double memory_accesses;
|
||||
double memory_reads;
|
||||
double memory_writes;
|
||||
} system_mem;
|
||||
typedef struct{
|
||||
//params
|
||||
//Common Param for mc and fc
|
||||
double peak_transfer_rate;
|
||||
int number_mcs;
|
||||
bool withPHY;
|
||||
int type;
|
||||
|
||||
//FCParam
|
||||
//stats
|
||||
double duty_cycle;
|
||||
double total_load_perc;
|
||||
|
||||
//McParam
|
||||
int mc_clock;
|
||||
int llc_line_length;
|
||||
int memory_channels_per_mc;
|
||||
int number_ranks;
|
||||
int req_window_size_per_channel;
|
||||
int IO_buffer_size_per_channel;
|
||||
int databus_width;
|
||||
int addressbus_width;
|
||||
bool LVDS;
|
||||
|
||||
//stats
|
||||
double memory_accesses;
|
||||
double memory_reads;
|
||||
double memory_writes;
|
||||
} system_mc;
|
||||
|
||||
typedef struct{
|
||||
//params
|
||||
int clockrate;
|
||||
int number_units;
|
||||
int type;
|
||||
//stats
|
||||
double duty_cycle;
|
||||
double total_load_perc;
|
||||
} system_niu;
|
||||
|
||||
typedef struct{
|
||||
//params
|
||||
int clockrate;
|
||||
int number_units;
|
||||
int num_channels;
|
||||
int type;
|
||||
bool withPHY;
|
||||
//stats
|
||||
double duty_cycle;
|
||||
double total_load_perc;
|
||||
} system_pcie;
|
||||
|
||||
typedef struct{
|
||||
//All number_of_* at the level of 'system' Ying 03/21/2009
|
||||
int number_of_cores;
|
||||
int number_of_L1Directories;
|
||||
int number_of_L2Directories;
|
||||
int number_of_L2s;
|
||||
bool Private_L2;
|
||||
int number_of_L3s;
|
||||
int number_of_NoCs;
|
||||
int number_of_dir_levels;
|
||||
int domain_size;
|
||||
int first_level_dir;
|
||||
// All params at the level of 'system'
|
||||
int homogeneous_cores;
|
||||
int homogeneous_L1Directories;
|
||||
int homogeneous_L2Directories;
|
||||
double core_tech_node;
|
||||
int target_core_clockrate;
|
||||
int target_chip_area;
|
||||
int temperature;
|
||||
int number_cache_levels;
|
||||
int L1_property;
|
||||
int L2_property;
|
||||
int homogeneous_L2s;
|
||||
int L3_property;
|
||||
int homogeneous_L3s;
|
||||
int homogeneous_NoCs;
|
||||
int homogeneous_ccs;
|
||||
int Max_area_deviation;
|
||||
int Max_power_deviation;
|
||||
int device_type;
|
||||
bool longer_channel_device;
|
||||
bool Embedded;
|
||||
bool opt_dynamic_power;
|
||||
bool opt_lakage_power;
|
||||
bool opt_clockrate;
|
||||
bool opt_area;
|
||||
int interconnect_projection_type;
|
||||
int machine_bits;
|
||||
int virtual_address_width;
|
||||
int physical_address_width;
|
||||
int virtual_memory_page_size;
|
||||
double total_cycles;
|
||||
//system.core(0-n):3rd level
|
||||
system_core core[64];
|
||||
system_L1Directory L1Directory[64];
|
||||
system_L2Directory L2Directory[64];
|
||||
system_L2 L2[64];
|
||||
system_L3 L3[64];
|
||||
system_NoC NoC[64];
|
||||
system_mem mem;
|
||||
system_mc mc;
|
||||
system_mc flashc;
|
||||
system_niu niu;
|
||||
system_pcie pcie;
|
||||
} root_system;
|
||||
|
||||
class ParseXML
|
||||
{
|
||||
public:
|
||||
void parse(char* filepath);
|
||||
void initialize();
|
||||
public:
|
||||
root_system sys;
|
||||
};
|
||||
|
||||
|
||||
#endif /* XML_PARSE_H_ */
|
||||
|
||||
|
||||
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,62 +26,84 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#define GLOBALVAR
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "area.h"
|
||||
#include "array.h"
|
||||
#include "common.h"
|
||||
#include "decoder.h"
|
||||
#include "globalvar.h"
|
||||
#include "parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ArrayST::ArrayST(const InputParameter *configure_interface,
|
||||
string _name,
|
||||
enum Device_ty device_ty_,
|
||||
bool opt_local_,
|
||||
enum Core_type core_ty_,
|
||||
bool _is_default)
|
||||
:l_ip(*configure_interface),
|
||||
name(_name),
|
||||
device_ty(device_ty_),
|
||||
opt_local(opt_local_),
|
||||
core_ty(core_ty_),
|
||||
is_default(_is_default)
|
||||
{
|
||||
double ArrayST::area_efficiency_threshold = 20.0;
|
||||
int ArrayST::ed = 0;
|
||||
//Fixed number, make sure timing can be satisfied.
|
||||
int ArrayST::delay_wt = 100;
|
||||
int ArrayST::cycle_time_wt = 1000;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int ArrayST::area_wt = 10;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int ArrayST::dynamic_power_wt = 10;
|
||||
int ArrayST::leakage_power_wt = 10;
|
||||
//Fixed number, make sure timing can be satisfied.
|
||||
int ArrayST::delay_dev = 1000000;
|
||||
int ArrayST::cycle_time_dev = 100;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int ArrayST::area_dev = 1000000;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int ArrayST::dynamic_power_dev = 1000000;
|
||||
int ArrayST::leakage_power_dev = 1000000;
|
||||
int ArrayST::cycle_time_dev_threshold = 10;
|
||||
|
||||
if (l_ip.cache_sz<64) l_ip.cache_sz=64;
|
||||
l_ip.error_checking();//not only do the error checking but also fill some missing parameters
|
||||
optimize_array();
|
||||
|
||||
ArrayST::ArrayST(XMLNode* _xml_data,
|
||||
const InputParameter *configure_interface, string _name,
|
||||
enum Device_ty device_ty_, double _clockRate,
|
||||
bool opt_local_, enum Core_type core_ty_, bool _is_default)
|
||||
: McPATComponent(_xml_data), l_ip(*configure_interface),
|
||||
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
|
||||
is_default(_is_default) {
|
||||
name = _name;
|
||||
clockRate = _clockRate;
|
||||
if (l_ip.cache_sz < MIN_BUFFER_SIZE)
|
||||
l_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||
|
||||
if (!l_ip.error_checking(name)) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
output_data.reset();
|
||||
|
||||
void ArrayST::compute_base_power()
|
||||
{
|
||||
//l_ip.out_w =l_ip.line_sz*8;
|
||||
computeEnergy();
|
||||
computeArea();
|
||||
}
|
||||
|
||||
void ArrayST::compute_base_power() {
|
||||
local_result = cacti_interface(&l_ip);
|
||||
|
||||
}
|
||||
|
||||
void ArrayST::optimize_array()
|
||||
{
|
||||
void ArrayST::computeArea() {
|
||||
area.set_area(local_result.area);
|
||||
output_data.area = local_result.area / 1e6;
|
||||
}
|
||||
|
||||
void ArrayST::computeEnergy() {
|
||||
list<uca_org_t > candidate_solutions(0);
|
||||
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
||||
|
||||
uca_org_t * temp_res = 0;
|
||||
uca_org_t* temp_res = NULL;
|
||||
local_result.valid = false;
|
||||
|
||||
double throughput=l_ip.throughput, latency=l_ip.latency;
|
||||
double area_efficiency_threshold = 20.0;
|
||||
bool throughput_overflow=true, latency_overflow=true;
|
||||
double throughput = l_ip.throughput;
|
||||
double latency = l_ip.latency;
|
||||
bool throughput_overflow = true;
|
||||
bool latency_overflow = true;
|
||||
compute_base_power();
|
||||
|
||||
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
||||
|
@ -88,119 +111,100 @@ void ArrayST::optimize_array()
|
|||
if ((local_result.access_time - latency) <= 1e-10)
|
||||
latency_overflow = false;
|
||||
|
||||
if (opt_for_clk && opt_local)
|
||||
{
|
||||
if (throughput_overflow || latency_overflow)
|
||||
{
|
||||
l_ip.ed=0;
|
||||
if (opt_for_clk && opt_local) {
|
||||
if (throughput_overflow || latency_overflow) {
|
||||
l_ip.ed = ed;
|
||||
|
||||
l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
|
||||
l_ip.cycle_time_wt = 1000;
|
||||
l_ip.delay_wt = delay_wt;
|
||||
l_ip.cycle_time_wt = cycle_time_wt;
|
||||
|
||||
l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
|
||||
l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
|
||||
l_ip.leakage_power_wt = 10;
|
||||
l_ip.area_wt = area_wt;
|
||||
l_ip.dynamic_power_wt = dynamic_power_wt;
|
||||
l_ip.leakage_power_wt = leakage_power_wt;
|
||||
|
||||
l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied.
|
||||
l_ip.cycle_time_dev = 100;
|
||||
l_ip.delay_dev = delay_dev;
|
||||
l_ip.cycle_time_dev = cycle_time_dev;
|
||||
|
||||
l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
|
||||
l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
|
||||
l_ip.leakage_power_dev = 1000000;
|
||||
l_ip.area_dev = area_dev;
|
||||
l_ip.dynamic_power_dev = dynamic_power_dev;
|
||||
l_ip.leakage_power_dev = leakage_power_dev;
|
||||
|
||||
throughput_overflow=true; //Reset overflow flag before start optimization iterations
|
||||
//Reset overflow flag before start optimization iterations
|
||||
throughput_overflow = true;
|
||||
latency_overflow = true;
|
||||
|
||||
temp_res = &local_result; //Clean up the result for optimized for ED^2P
|
||||
//Clean up the result for optimized for ED^2P
|
||||
temp_res = &local_result;
|
||||
temp_res->cleanup();
|
||||
}
|
||||
|
||||
|
||||
while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
|
||||
{
|
||||
while ((throughput_overflow || latency_overflow) &&
|
||||
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||
compute_base_power();
|
||||
|
||||
l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
|
||||
//This is the time_dev to be used for next iteration
|
||||
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
|
||||
|
||||
// from best area to worst area -->worst timing to best timing
|
||||
if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
|
||||
(local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
|
||||
{ //if no satisfiable solution is found,the most aggressive one is left
|
||||
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
|
||||
(local_result.access_time - latency) <= 1e-10) ||
|
||||
(local_result.data_array2->area_efficiency <
|
||||
area_efficiency_threshold && l_ip.assoc == 0)) {
|
||||
//if no satisfiable solution is found,the most aggressive one
|
||||
//is left
|
||||
candidate_solutions.push_back(local_result);
|
||||
//output_data_csv(candidate_solutions.back());
|
||||
if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
|
||||
if (((local_result.cycle_time - throughput) <= 1e-10) &&
|
||||
((local_result.access_time - latency) <= 1e-10)) {
|
||||
//ensure stop opt not because of cam
|
||||
{
|
||||
throughput_overflow = false;
|
||||
latency_overflow = false;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
//TODO: whether checking the partial satisfied results too, or just change the mark???
|
||||
} else {
|
||||
if ((local_result.cycle_time - throughput) <= 1e-10)
|
||||
throughput_overflow = false;
|
||||
if ((local_result.access_time - latency) <= 1e-10)
|
||||
latency_overflow = false;
|
||||
|
||||
if (l_ip.cycle_time_dev > 10)
|
||||
{ //if not >10 local_result is the last result, it cannot be cleaned up
|
||||
temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
|
||||
//if not >10 local_result is the last result, it cannot be
|
||||
//cleaned up
|
||||
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||
//Only solutions not saved in the list need to be
|
||||
//cleaned up
|
||||
temp_res = &local_result;
|
||||
temp_res->cleanup();
|
||||
}
|
||||
}
|
||||
// l_ip.cycle_time_dev-=10;
|
||||
// l_ip.delay_dev-=10;
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (l_ip.assoc > 0)
|
||||
{
|
||||
//For array structures except CAM and FA, Give warning but still provide a result with best timing found
|
||||
if (l_ip.assoc > 0) {
|
||||
//For array structures except CAM and FA, Give warning but still
|
||||
//provide a result with best timing found
|
||||
if (throughput_overflow == true)
|
||||
cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
|
||||
cout << "Warning: " << name
|
||||
<< " array structure cannot satisfy throughput constraint."
|
||||
<< endl;
|
||||
if (latency_overflow == true)
|
||||
cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
|
||||
cout << "Warning: " << name
|
||||
<< " array structure cannot satisfy latency constraint."
|
||||
<< endl;
|
||||
}
|
||||
|
||||
// else
|
||||
// {
|
||||
// /*According to "Content-Addressable Memory (CAM) Circuits and
|
||||
// Architectures": A Tutorial and Survey
|
||||
// by Kostas Pagiamtzis et al.
|
||||
// CAM structures can be heavily pipelined and use look-ahead techniques,
|
||||
// therefore timing can be relaxed. But McPAT does not model the advanced
|
||||
// techniques. If continue optimizing, the area efficiency will be too low
|
||||
// */
|
||||
// //For CAM and FA, stop opt if area efficiency is too low
|
||||
// if (throughput_overflow==true)
|
||||
// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
|
||||
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
|
||||
// if (latency_overflow==true)
|
||||
// cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
|
||||
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
|
||||
// }
|
||||
|
||||
//double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
|
||||
double min_dynamic_energy = BIGNUM;
|
||||
if (candidate_solutions.empty()==false)
|
||||
{
|
||||
if (candidate_solutions.empty() == false) {
|
||||
local_result.valid = true;
|
||||
for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter)
|
||||
|
||||
{
|
||||
if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic)
|
||||
{
|
||||
min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
|
||||
for (candidate_iter = candidate_solutions.begin();
|
||||
candidate_iter != candidate_solutions.end();
|
||||
++candidate_iter) {
|
||||
if (min_dynamic_energy >
|
||||
(candidate_iter)->power.readOp.dynamic) {
|
||||
min_dynamic_energy =
|
||||
(candidate_iter)->power.readOp.dynamic;
|
||||
min_dynamic_energy_iter = candidate_iter;
|
||||
local_result = *(min_dynamic_energy_iter);
|
||||
//TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
candidate_iter->cleanup() ;
|
||||
}
|
||||
|
||||
|
@ -211,7 +215,8 @@ void ArrayST::optimize_array()
|
|||
candidate_solutions.clear();
|
||||
}
|
||||
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(device_ty, core_ty);
|
||||
|
||||
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
||||
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
||||
|
@ -235,22 +240,28 @@ void ArrayST::optimize_array()
|
|||
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
||||
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||
local_result.data_array2->power.readOp.longer_channel_leakage =
|
||||
local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
|
||||
local_result.data_array2->power.readOp.leakage *
|
||||
long_channel_device_reduction;
|
||||
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
|
||||
|
||||
|
||||
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
|
||||
{
|
||||
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
|
||||
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
||||
local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
|
||||
local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
|
||||
local_result.tag_array2->power.readOp.leakage *
|
||||
long_channel_device_reduction;
|
||||
local_result.tag_array2->power =
|
||||
local_result.tag_array2->power * pppm_t;
|
||||
}
|
||||
|
||||
power = local_result.power;
|
||||
|
||||
output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
|
||||
output_data.subthreshold_leakage_power = power.readOp.leakage;
|
||||
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||
}
|
||||
|
||||
void ArrayST::leakage_feedback(double temperature)
|
||||
|
@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature)
|
|||
}
|
||||
}
|
||||
|
||||
ArrayST:: ~ArrayST()
|
||||
{
|
||||
ArrayST::~ArrayST() {
|
||||
local_result.cleanup();
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -43,59 +44,42 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
class ArrayST :public Component{
|
||||
class ArrayST : public McPATComponent {
|
||||
public:
|
||||
ArrayST(){};
|
||||
ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true);
|
||||
static double area_efficiency_threshold;
|
||||
|
||||
// These are used for the CACTI interface.
|
||||
static int ed;
|
||||
static int delay_wt;
|
||||
static int cycle_time_wt;
|
||||
static int area_wt;
|
||||
static int dynamic_power_wt;
|
||||
static int leakage_power_wt;
|
||||
static int delay_dev;
|
||||
static int cycle_time_dev;
|
||||
static int area_dev;
|
||||
static int dynamic_power_dev;
|
||||
static int leakage_power_dev;
|
||||
static int cycle_time_dev_threshold;
|
||||
|
||||
InputParameter l_ip;
|
||||
string name;
|
||||
enum Device_ty device_ty;
|
||||
bool opt_local;
|
||||
enum Core_type core_ty;
|
||||
bool is_default;
|
||||
uca_org_t local_result;
|
||||
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
|
||||
virtual void optimize_array();
|
||||
virtual void compute_base_power();
|
||||
virtual ~ArrayST();
|
||||
ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
|
||||
bool opt_local_ = true,
|
||||
enum Core_type core_ty_ = Inorder, bool _is_default = true);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
void compute_base_power();
|
||||
~ArrayST();
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
};
|
||||
|
||||
class InstCache :public Component{
|
||||
public:
|
||||
ArrayST* caches;
|
||||
ArrayST* missb;
|
||||
ArrayST* ifb;
|
||||
ArrayST* prefetchb;
|
||||
powerDef power_t;//temp value holder for both (max) power and runtime power
|
||||
InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;};
|
||||
~InstCache(){
|
||||
if (caches) {//caches->local_result.cleanup();
|
||||
delete caches; caches=0;}
|
||||
if (missb) {//missb->local_result.cleanup();
|
||||
delete missb; missb=0;}
|
||||
if (ifb) {//ifb->local_result.cleanup();
|
||||
delete ifb; ifb=0;}
|
||||
if (prefetchb) {//prefetchb->local_result.cleanup();
|
||||
delete prefetchb; prefetchb=0;}
|
||||
};
|
||||
};
|
||||
|
||||
class DataCache :public InstCache{
|
||||
public:
|
||||
ArrayST* wbb;
|
||||
DataCache(){wbb=0;};
|
||||
~DataCache(){
|
||||
if (wbb) {//wbb->local_result.cleanup();
|
||||
delete wbb; wbb=0;}
|
||||
};
|
||||
};
|
||||
|
||||
#endif /* TLB_H_ */
|
||||
#endif /* ARRAY_H_ */
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -34,11 +35,253 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "basic_components.h"
|
||||
#include "cacheunit.h"
|
||||
#include "common.h"
|
||||
|
||||
// Turn this to true to get debugging messages
|
||||
bool McPATComponent::debug = false;
|
||||
|
||||
bool McPATComponent::opt_for_clk = true;
|
||||
int McPATComponent::longer_channel_device = 0;
|
||||
// Number of cycles per second, 2GHz = 2e9
|
||||
double McPATComponent::target_core_clockrate = 2e9;
|
||||
double McPATComponent::total_cycles = 0.0f;
|
||||
double McPATComponent::execution_time = 0.0f;
|
||||
int McPATComponent::physical_address_width = 0;
|
||||
int McPATComponent::virtual_address_width = 0;
|
||||
int McPATComponent::virtual_memory_page_size = 0;
|
||||
int McPATComponent::data_path_width = 0;
|
||||
|
||||
void McPATOutput::reset() {
|
||||
storage = 0.0;
|
||||
area = 0.0;
|
||||
peak_dynamic_power = 0.0;
|
||||
subthreshold_leakage_power = 0.0;
|
||||
gate_leakage_power = 0.0;
|
||||
runtime_dynamic_energy = 0.0;
|
||||
}
|
||||
|
||||
McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) {
|
||||
McPATOutput to_return;
|
||||
to_return.storage = lhs.storage + rhs.storage;
|
||||
to_return.area = lhs.area + rhs.area;
|
||||
to_return.peak_dynamic_power = lhs.peak_dynamic_power +
|
||||
rhs.peak_dynamic_power;
|
||||
to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power +
|
||||
rhs.subthreshold_leakage_power;
|
||||
to_return.gate_leakage_power = lhs.gate_leakage_power +
|
||||
rhs.gate_leakage_power;
|
||||
to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy +
|
||||
rhs.runtime_dynamic_energy;
|
||||
return to_return;
|
||||
}
|
||||
|
||||
void McPATOutput::operator+=(const McPATOutput &rhs) {
|
||||
storage += rhs.storage;
|
||||
area += rhs.area;
|
||||
peak_dynamic_power += rhs.peak_dynamic_power;
|
||||
subthreshold_leakage_power += rhs.subthreshold_leakage_power;
|
||||
gate_leakage_power += rhs.gate_leakage_power;
|
||||
runtime_dynamic_energy += rhs.runtime_dynamic_energy;
|
||||
}
|
||||
|
||||
McPATComponent::McPATComponent()
|
||||
: xml_data(NULL), name("") {
|
||||
}
|
||||
|
||||
McPATComponent::McPATComponent(XMLNode* _xml_data)
|
||||
: xml_data(_xml_data), name("") {
|
||||
}
|
||||
|
||||
McPATComponent::McPATComponent(XMLNode* _xml_data,
|
||||
InputParameter* _interface_ip)
|
||||
: xml_data(_xml_data), interface_ip(*_interface_ip), name("") {
|
||||
}
|
||||
|
||||
McPATComponent::~McPATComponent() {
|
||||
}
|
||||
|
||||
void McPATComponent::recursiveInstantiate() {
|
||||
if (debug) {
|
||||
fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ",
|
||||
"'type' %s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||
}
|
||||
int i;
|
||||
int numChildren = xml_data->nChildNode("component");
|
||||
for (i = 0; i < numChildren; i++ ) {
|
||||
// For each child node of the system,
|
||||
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
|
||||
XMLCSTR type = childXML->getAttribute("type");
|
||||
|
||||
if (!type)
|
||||
warnMissingComponentType(childXML->getAttribute("id"));
|
||||
|
||||
STRCMP(type, "Core")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "CacheUnit")
|
||||
children.push_back(new CacheUnit(childXML, &interface_ip));
|
||||
STRCMP(type, "CacheController")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "MemoryController")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "Memory")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "OnChipNetwork")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "BusInterconnect")
|
||||
warnIncompleteComponentType(type);
|
||||
STRCMP(type, "Directory")
|
||||
warnIncompleteComponentType(type);
|
||||
|
||||
else
|
||||
warnUnrecognizedComponent(type);
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::computeArea() {
|
||||
if (debug) {
|
||||
fprintf(stderr, "WARNING: Called computeArea from %s, with 'type' ",
|
||||
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||
}
|
||||
|
||||
// TODO: This calculation is incorrect and is overwritten by computeEnergy
|
||||
// Fix it up so that the values are available at the correct times
|
||||
int i;
|
||||
int numChildren = children.size();
|
||||
area.set_area(0.0);
|
||||
output_data.area = 0.0;
|
||||
for (i = 0; i < numChildren; i++) {
|
||||
children[i]->computeArea();
|
||||
output_data.area += area.get_area();
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::computeEnergy() {
|
||||
if (debug) {
|
||||
fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ",
|
||||
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||
}
|
||||
|
||||
power.reset();
|
||||
rt_power.reset();
|
||||
memset(&output_data, 0, sizeof(McPATOutput));
|
||||
int i;
|
||||
int numChildren = children.size();
|
||||
for (i = 0; i < numChildren; i++) {
|
||||
children[i]->computeEnergy();
|
||||
output_data += children[i]->output_data;
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::displayData(uint32_t indent, int plevel) {
|
||||
if (debug) {
|
||||
fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ",
|
||||
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||
}
|
||||
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent + 2, ' ');
|
||||
|
||||
double leakage_power = output_data.subthreshold_leakage_power +
|
||||
output_data.gate_leakage_power;
|
||||
double total_runtime_energy = output_data.runtime_dynamic_energy +
|
||||
leakage_power * execution_time;
|
||||
cout << indent_str << name << ":" << endl;
|
||||
cout << indent_str_next << "Area = " << output_data.area << " mm^2"
|
||||
<< endl;
|
||||
cout << indent_str_next << "Peak Dynamic Power = "
|
||||
<< output_data.peak_dynamic_power << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage Power = "
|
||||
<< output_data.subthreshold_leakage_power << " W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage Power = "
|
||||
<< output_data.gate_leakage_power << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic Power = "
|
||||
<< (output_data.runtime_dynamic_energy / execution_time) << " W"
|
||||
<< endl;
|
||||
cout << indent_str_next << "Runtime Dynamic Energy = "
|
||||
<< output_data.runtime_dynamic_energy << " J" << endl;
|
||||
cout << indent_str_next << "Total Runtime Energy = "
|
||||
<< total_runtime_energy << " J" << endl;
|
||||
cout << endl;
|
||||
|
||||
// Recursively print children
|
||||
int i;
|
||||
int numChildren = children.size();
|
||||
for (i = 0; i < numChildren; i++) {
|
||||
children[i]->displayData(indent + 4, plevel);
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::errorUnspecifiedParam(string param) {
|
||||
fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n",
|
||||
name.c_str(), param.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void McPATComponent::errorNonPositiveParam(string param) {
|
||||
fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n",
|
||||
name.c_str(), param.c_str());
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) {
|
||||
fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n",
|
||||
name.c_str(), component);
|
||||
}
|
||||
|
||||
void McPATComponent::warnUnrecognizedParam(XMLCSTR param) {
|
||||
fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n",
|
||||
name.c_str(), param);
|
||||
}
|
||||
|
||||
void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) {
|
||||
fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n",
|
||||
name.c_str(), stat);
|
||||
}
|
||||
|
||||
void McPATComponent::warnIncompleteComponentType(XMLCSTR type) {
|
||||
fprintf(stderr, " WARNING: %s handling not yet complete\n", type);
|
||||
}
|
||||
|
||||
void McPATComponent::warnMissingComponentType(XMLCSTR id) {
|
||||
if (id) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a component due to the missing type: %s\n",
|
||||
id);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a component in %s due to the missing type\n",
|
||||
name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::warnMissingParamName(XMLCSTR id) {
|
||||
if (id) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a parameter due to the missing name: %s\n",
|
||||
id);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a parameter in %s due to the missing name\n",
|
||||
name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void McPATComponent::warnMissingStatName(XMLCSTR id) {
|
||||
if (id) {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a statistic due to the missing name: %s\n",
|
||||
id);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"WARNING: Ignoring a statistic in %s due to the missing name\n",
|
||||
name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
double longer_channel_device_reduction(
|
||||
enum Device_ty device_ty,
|
||||
enum Core_type core_ty)
|
||||
{
|
||||
enum Core_type core_ty) {
|
||||
|
||||
double longer_channel_device_percentage_core;
|
||||
double longer_channel_device_percentage_uncore;
|
||||
|
@ -48,44 +291,38 @@ double longer_channel_device_reduction(
|
|||
|
||||
longer_channel_device_percentage_llc = 1.0;
|
||||
longer_channel_device_percentage_uncore = 0.82;
|
||||
if (core_ty==OOO)
|
||||
{
|
||||
longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
|
||||
//longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
|
||||
//longer_channel_device_percentage_uncore = 0.9;//Niagara
|
||||
if (core_ty == OOO) {
|
||||
//0.54 Xeon Tulsa //0.58 Nehelam
|
||||
longer_channel_device_percentage_core = 0.56;
|
||||
} else {
|
||||
//0.8;//Niagara
|
||||
longer_channel_device_percentage_core = 0.8;
|
||||
}
|
||||
|
||||
if (device_ty==Core_device)
|
||||
{
|
||||
long_channel_device_reduction = (1- longer_channel_device_percentage_core)
|
||||
+ longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
|
||||
}
|
||||
else if (device_ty==Uncore_device)
|
||||
{
|
||||
long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
|
||||
+ longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
|
||||
}
|
||||
else if (device_ty==LLC_device)
|
||||
{
|
||||
long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
|
||||
+ longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout<<"unknown device category"<<endl;
|
||||
if (device_ty == Core_device) {
|
||||
long_channel_device_reduction =
|
||||
(1 - longer_channel_device_percentage_core) +
|
||||
longer_channel_device_percentage_core *
|
||||
g_tp.peri_global.long_channel_leakage_reduction;
|
||||
} else if (device_ty == Uncore_device) {
|
||||
long_channel_device_reduction =
|
||||
(1 - longer_channel_device_percentage_uncore) +
|
||||
longer_channel_device_percentage_uncore *
|
||||
g_tp.peri_global.long_channel_leakage_reduction;
|
||||
} else if (device_ty == LLC_device) {
|
||||
long_channel_device_reduction =
|
||||
(1 - longer_channel_device_percentage_llc) +
|
||||
longer_channel_device_percentage_llc *
|
||||
g_tp.peri_global.long_channel_leakage_reduction;
|
||||
} else {
|
||||
cout << "ERROR: Unknown device category: " << device_ty << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
return long_channel_device_reduction;
|
||||
}
|
||||
|
||||
statsComponents operator+(const statsComponents & x, const statsComponents & y)
|
||||
{
|
||||
statsComponents operator+(const statsComponents & x, const statsComponents & y) {
|
||||
statsComponents z;
|
||||
|
||||
z.access = x.access + y.access;
|
||||
|
@ -95,8 +332,7 @@ statsComponents operator+(const statsComponents & x, const statsComponents & y)
|
|||
return z;
|
||||
}
|
||||
|
||||
statsComponents operator*(const statsComponents & x, double const * const y)
|
||||
{
|
||||
statsComponents operator*(const statsComponents & x, double const * const y) {
|
||||
statsComponents z;
|
||||
|
||||
z.access = x.access * y[0];
|
||||
|
@ -106,8 +342,7 @@ statsComponents operator*(const statsComponents & x, double const * const y)
|
|||
return z;
|
||||
}
|
||||
|
||||
statsDef operator+(const statsDef & x, const statsDef & y)
|
||||
{
|
||||
statsDef operator+(const statsDef & x, const statsDef & y) {
|
||||
statsDef z;
|
||||
|
||||
z.readAc = x.readAc + y.readAc;
|
||||
|
@ -116,8 +351,7 @@ statsDef operator+(const statsDef & x, const statsDef & y)
|
|||
return z;
|
||||
}
|
||||
|
||||
statsDef operator*(const statsDef & x, double const * const y)
|
||||
{
|
||||
statsDef operator*(const statsDef & x, double const * const y) {
|
||||
statsDef z;
|
||||
|
||||
z.readAc = x.readAc * y;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -34,9 +35,15 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
#include "xmlParser.h"
|
||||
|
||||
/**
|
||||
* TODO: Since revisions to McPAT aim to make the component hierarchy more
|
||||
* modular, many of the parameter and statistics classes/structs included in
|
||||
* this file should be moved to the files for their respective components.
|
||||
*/
|
||||
const double cdb_overhead = 1.1;
|
||||
|
||||
enum FU_type {
|
||||
|
@ -60,7 +67,14 @@ enum Scheduler_type {
|
|||
ReservationStation
|
||||
};
|
||||
|
||||
enum cache_level {
|
||||
enum Cache_type {
|
||||
DATA_CACHE,
|
||||
INSTRUCTION_CACHE,
|
||||
MIXED
|
||||
};
|
||||
|
||||
enum CacheLevel {
|
||||
L1,
|
||||
L2,
|
||||
L3,
|
||||
L1Directory,
|
||||
|
@ -91,175 +105,385 @@ enum Device_ty {
|
|||
LLC_device
|
||||
};
|
||||
|
||||
class statsComponents
|
||||
{
|
||||
enum Access_mode {
|
||||
Normal,
|
||||
Sequential,
|
||||
Fast
|
||||
};
|
||||
|
||||
class statsComponents {
|
||||
public:
|
||||
double access;
|
||||
double hit;
|
||||
double miss;
|
||||
|
||||
statsComponents() : access(0), hit(0), miss(0) {}
|
||||
statsComponents(const statsComponents & obj) { *this = obj; }
|
||||
statsComponents & operator=(const statsComponents & rhs)
|
||||
{
|
||||
statsComponents(const statsComponents & obj) {
|
||||
*this = obj;
|
||||
}
|
||||
statsComponents & operator=(const statsComponents & rhs) {
|
||||
access = rhs.access;
|
||||
hit = rhs.hit;
|
||||
miss = rhs.miss;
|
||||
return *this;
|
||||
}
|
||||
void reset() { access = 0; hit = 0; miss = 0;}
|
||||
void reset() {
|
||||
access = 0;
|
||||
hit = 0;
|
||||
miss = 0;
|
||||
}
|
||||
|
||||
friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
|
||||
friend statsComponents operator*(const statsComponents & x, double const * const y);
|
||||
friend statsComponents operator+(const statsComponents & x,
|
||||
const statsComponents & y);
|
||||
friend statsComponents operator*(const statsComponents & x,
|
||||
double const * const y);
|
||||
};
|
||||
|
||||
class statsDef
|
||||
{
|
||||
class statsDef {
|
||||
public:
|
||||
statsComponents readAc;
|
||||
statsComponents writeAc;
|
||||
statsComponents searchAc;
|
||||
statsComponents dataReadAc;
|
||||
statsComponents dataWriteAc;
|
||||
statsComponents tagReadAc;
|
||||
statsComponents tagWriteAc;
|
||||
|
||||
statsDef() : readAc(), writeAc(), searchAc() { }
|
||||
void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
|
||||
void reset() {
|
||||
readAc.reset();
|
||||
writeAc.reset();
|
||||
searchAc.reset();
|
||||
}
|
||||
|
||||
friend statsDef operator+(const statsDef & x, const statsDef & y);
|
||||
friend statsDef operator*(const statsDef & x, double const * const y);
|
||||
};
|
||||
|
||||
/**
|
||||
* An object to store the computed data that will be output from McPAT on a
|
||||
* per-component-instance basis. Currently, this includes the amount of storage
|
||||
* that the component comprises, its chip area, and power and energy
|
||||
* calculations.
|
||||
*/
|
||||
class McPATOutput {
|
||||
public:
|
||||
// Storage is in bytes (B)
|
||||
double storage;
|
||||
// Area is in mm^2
|
||||
double area;
|
||||
// Peak Dynamic Power is in W
|
||||
double peak_dynamic_power;
|
||||
// Subthreshold Leakage Power is in W
|
||||
double subthreshold_leakage_power;
|
||||
// Gate Leakage Power is in W
|
||||
double gate_leakage_power;
|
||||
// Runtime Dynamic Energy is in J
|
||||
double runtime_dynamic_energy;
|
||||
|
||||
void reset();
|
||||
|
||||
friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs);
|
||||
void operator+=(const McPATOutput &rhs);
|
||||
};
|
||||
|
||||
/**
|
||||
* A McPATComponent encompasses all the parts that are common to any component
|
||||
* for which McPAT may compute and print power, area, and timing data. It
|
||||
* includes a pointer to the XML data from which the component gathers its
|
||||
* input parameters, it stores the variables that are commonly used in all
|
||||
* components, and it maintains the hierarchical structure to recursively
|
||||
* compute and print output. This is a base class from which all components
|
||||
* should inherit these functionality (possibly through other descended
|
||||
* classes.
|
||||
*/
|
||||
class McPATComponent : public Component {
|
||||
public:
|
||||
static bool debug;
|
||||
|
||||
// Variables shared across the system by all McPATComponents
|
||||
static bool opt_for_clk;
|
||||
static int longer_channel_device;
|
||||
static double execution_time;
|
||||
static int physical_address_width;
|
||||
static int virtual_address_width;
|
||||
static int virtual_memory_page_size;
|
||||
static int data_path_width;
|
||||
|
||||
// Although these two variables are static right now, they need to be
|
||||
// modulated on a per-frequency-domain basis eventually.
|
||||
static double target_core_clockrate;
|
||||
static double total_cycles;
|
||||
|
||||
XMLNode* xml_data;
|
||||
InputParameter interface_ip;
|
||||
string name;
|
||||
// Number of cycles per second (consider changing name)
|
||||
double clockRate;
|
||||
vector<McPATComponent*> children;
|
||||
// The data structure that is printed in displayData
|
||||
McPATOutput output_data;
|
||||
// Set this to contain the stats to calculate peak dynamic power
|
||||
statsDef tdp_stats;
|
||||
// Set this to contain the stats to calculate runtime dynamic energy/power
|
||||
statsDef rtp_stats;
|
||||
// Holds the peak dynamic power calculation
|
||||
powerDef power_t;
|
||||
// Holds the runtime dynamic power calculation
|
||||
powerDef rt_power;
|
||||
|
||||
McPATComponent();
|
||||
// Which of these is a better way of doing things?!
|
||||
McPATComponent(XMLNode* _xml_data);
|
||||
McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||
virtual void recursiveInstantiate();
|
||||
virtual void computeArea();
|
||||
// This function should probably be pure virtual, but it's too early in
|
||||
// the modifying process to know for sure. Note that each component has
|
||||
// to calculate it's own power consumption
|
||||
virtual void computeEnergy();
|
||||
virtual void displayData(uint32_t indent, int plevel);
|
||||
~McPATComponent();
|
||||
|
||||
protected:
|
||||
void errorUnspecifiedParam(string param);
|
||||
void errorNonPositiveParam(string param);
|
||||
void warnUnrecognizedComponent(XMLCSTR component);
|
||||
void warnUnrecognizedParam(XMLCSTR param);
|
||||
void warnUnrecognizedStat(XMLCSTR stat);
|
||||
void warnIncompleteComponentType(XMLCSTR type);
|
||||
void warnMissingComponentType(XMLCSTR id);
|
||||
void warnMissingParamName(XMLCSTR id);
|
||||
void warnMissingStatName(XMLCSTR id);
|
||||
};
|
||||
|
||||
double longer_channel_device_reduction(
|
||||
enum Device_ty device_ty = Core_device,
|
||||
enum Core_type core_ty = Inorder);
|
||||
|
||||
class CoreDynParam {
|
||||
class CoreParameters {
|
||||
public:
|
||||
CoreDynParam(){};
|
||||
CoreDynParam(ParseXML *XML_interface, int ithCore_);
|
||||
// :XML(XML_interface),
|
||||
// ithCore(ithCore_)
|
||||
// core_ty(inorder),
|
||||
// rm_ty(CAMbased),
|
||||
// scheu_ty(PhysicalRegFile),
|
||||
// clockRate(1e9),//1GHz
|
||||
// arch_ireg_width(32),
|
||||
// arch_freg_width(32),
|
||||
// phy_ireg_width(128),
|
||||
// phy_freg_width(128),
|
||||
// perThreadState(8),
|
||||
// globalCheckpoint(32),
|
||||
// instructionLength(32){};
|
||||
//ParseXML * XML;
|
||||
bool opt_local;
|
||||
bool x86;
|
||||
bool Embedded;
|
||||
enum Core_type core_ty;
|
||||
enum Renaming_type rm_ty;
|
||||
enum Scheduler_type scheu_ty;
|
||||
double clockRate,executionTime;
|
||||
int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width;
|
||||
int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
|
||||
int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
|
||||
int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
|
||||
int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
|
||||
int num_alus, num_muls;
|
||||
double num_fpus;
|
||||
int int_data_width, fp_data_width,v_address_width, p_address_width;
|
||||
double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
|
||||
bool regWindowing,multithreaded;
|
||||
double pppm_lkg_multhread[4];
|
||||
double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
|
||||
MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
|
||||
FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
|
||||
FPU_cdb_duty_cycle;
|
||||
~CoreDynParam(){};
|
||||
};
|
||||
|
||||
class CacheDynParam {
|
||||
public:
|
||||
CacheDynParam(){};
|
||||
CacheDynParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
enum Dir_type dir_ty;
|
||||
double clockRate,executionTime;
|
||||
double capacity, blockW, assoc, nbanks;
|
||||
double throughput, latency;
|
||||
double duty_cycle, dir_duty_cycle;
|
||||
//double duty_cycle;
|
||||
int missb_size, fu_size, prefetchb_size, wbb_size;
|
||||
~CacheDynParam(){};
|
||||
};
|
||||
|
||||
class MCParam {
|
||||
public:
|
||||
MCParam(){};
|
||||
MCParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
double clockRate,num_mcs, peakDataTransferRate, num_channels;
|
||||
// double mcTEPowerperGhz;
|
||||
// double mcPHYperGbit;
|
||||
// double area;
|
||||
int llcBlockSize, dataBusWidth, addressBusWidth;
|
||||
int opcodeW;
|
||||
int memAccesses;
|
||||
int memRank;
|
||||
int type;
|
||||
double frontend_duty_cycle, duty_cycle, perc_load;
|
||||
double executionTime, reads, writes;
|
||||
bool LVDS, withPHY;
|
||||
|
||||
~MCParam(){};
|
||||
};
|
||||
|
||||
class NoCParam {
|
||||
public:
|
||||
NoCParam(){};
|
||||
NoCParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
double clockRate;
|
||||
int flit_size;
|
||||
int input_ports, output_ports, min_ports, global_linked_ports;
|
||||
int virtual_channel_per_port,input_buffer_entries_per_vc;
|
||||
int horizontal_nodes,vertical_nodes, total_nodes;
|
||||
double executionTime, total_access, link_throughput,link_latency,
|
||||
duty_cycle, chip_coverage, route_over_perc;
|
||||
bool has_global_link, type;
|
||||
|
||||
~NoCParam(){};
|
||||
int arch_ireg_width;
|
||||
int arch_freg_width;
|
||||
int archi_Regs_IRF_size;
|
||||
int archi_Regs_FRF_size;
|
||||
int phy_ireg_width;
|
||||
int phy_freg_width;
|
||||
int num_IRF_entry;
|
||||
int num_FRF_entry;
|
||||
int num_ifreelist_entries;
|
||||
int num_ffreelist_entries;
|
||||
int fetchW;
|
||||
int decodeW;
|
||||
int issueW;
|
||||
int peak_issueW;
|
||||
int commitW;
|
||||
int peak_commitW;
|
||||
int predictionW;
|
||||
int fp_issueW;
|
||||
int fp_decodeW;
|
||||
int perThreadState;
|
||||
int globalCheckpoint;
|
||||
int instruction_length;
|
||||
int pc_width;
|
||||
int opcode_width;
|
||||
int micro_opcode_length;
|
||||
int num_hthreads;
|
||||
int pipeline_stages;
|
||||
int fp_pipeline_stages;
|
||||
int num_pipelines;
|
||||
int num_fp_pipelines;
|
||||
int num_alus;
|
||||
int num_muls;
|
||||
double num_fpus;
|
||||
int int_data_width;
|
||||
int fp_data_width;
|
||||
int v_address_width;
|
||||
int p_address_width;
|
||||
bool regWindowing;
|
||||
bool multithreaded;
|
||||
double pppm_lkg_multhread[4];
|
||||
int ROB_size;
|
||||
int ROB_assoc;
|
||||
int ROB_nbanks;
|
||||
int ROB_tag_width;
|
||||
int scheduler_assoc;
|
||||
int scheduler_nbanks;
|
||||
int register_window_size;
|
||||
double register_window_throughput;
|
||||
double register_window_latency;
|
||||
int register_window_assoc;
|
||||
int register_window_nbanks;
|
||||
int register_window_tag_width;
|
||||
int register_window_rw_ports;
|
||||
int phy_Regs_IRF_size;
|
||||
int phy_Regs_IRF_assoc;
|
||||
int phy_Regs_IRF_nbanks;
|
||||
int phy_Regs_IRF_tag_width;
|
||||
int phy_Regs_IRF_rd_ports;
|
||||
int phy_Regs_IRF_wr_ports;
|
||||
int phy_Regs_FRF_size;
|
||||
int phy_Regs_FRF_assoc;
|
||||
int phy_Regs_FRF_nbanks;
|
||||
int phy_Regs_FRF_tag_width;
|
||||
int phy_Regs_FRF_rd_ports;
|
||||
int phy_Regs_FRF_wr_ports;
|
||||
int front_rat_nbanks;
|
||||
int front_rat_rw_ports;
|
||||
int retire_rat_nbanks;
|
||||
int retire_rat_rw_ports;
|
||||
int freelist_nbanks;
|
||||
int freelist_rw_ports;
|
||||
int memory_ports;
|
||||
int load_buffer_size;
|
||||
int load_buffer_assoc;
|
||||
int load_buffer_nbanks;
|
||||
int store_buffer_size;
|
||||
int store_buffer_assoc;
|
||||
int store_buffer_nbanks;
|
||||
int instruction_window_size;
|
||||
int fp_instruction_window_size;
|
||||
int instruction_buffer_size;
|
||||
int instruction_buffer_assoc;
|
||||
int instruction_buffer_nbanks;
|
||||
int instruction_buffer_tag_width;
|
||||
int number_instruction_fetch_ports;
|
||||
int RAS_size;
|
||||
int execu_int_bypass_ports;
|
||||
int execu_mul_bypass_ports;
|
||||
int execu_fp_bypass_ports;
|
||||
Wire_type execu_bypass_wire_type;
|
||||
Wire_type execu_broadcast_wt;
|
||||
int execu_wire_mat_type;
|
||||
double execu_bypass_base_width;
|
||||
double execu_bypass_base_height;
|
||||
int execu_bypass_start_wiring_level;
|
||||
double execu_bypass_route_over_perc;
|
||||
double broadcast_numerator;
|
||||
};
|
||||
|
||||
class ProcParam {
|
||||
class CoreStatistics {
|
||||
public:
|
||||
ProcParam(){};
|
||||
ProcParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
|
||||
bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
|
||||
|
||||
~ProcParam(){};
|
||||
double pipeline_duty_cycle;
|
||||
double total_cycles;
|
||||
double busy_cycles;
|
||||
double idle_cycles;
|
||||
double IFU_duty_cycle;
|
||||
double BR_duty_cycle;
|
||||
double LSU_duty_cycle;
|
||||
double MemManU_I_duty_cycle;
|
||||
double MemManU_D_duty_cycle;
|
||||
double ALU_duty_cycle;
|
||||
double MUL_duty_cycle;
|
||||
double FPU_duty_cycle;
|
||||
double ALU_cdb_duty_cycle;
|
||||
double MUL_cdb_duty_cycle;
|
||||
double FPU_cdb_duty_cycle;
|
||||
double ROB_reads;
|
||||
double ROB_writes;
|
||||
double total_instructions;
|
||||
double int_instructions;
|
||||
double fp_instructions;
|
||||
double branch_instructions;
|
||||
double branch_mispredictions;
|
||||
double load_instructions;
|
||||
double store_instructions;
|
||||
double committed_instructions;
|
||||
double committed_int_instructions;
|
||||
double committed_fp_instructions;
|
||||
double rename_reads;
|
||||
double rename_writes;
|
||||
double fp_rename_reads;
|
||||
double fp_rename_writes;
|
||||
double inst_window_reads;
|
||||
double inst_window_writes;
|
||||
double inst_window_wakeup_accesses;
|
||||
double fp_inst_window_reads;
|
||||
double fp_inst_window_writes;
|
||||
double fp_inst_window_wakeup_accesses;
|
||||
double int_regfile_reads;
|
||||
double float_regfile_reads;
|
||||
double int_regfile_writes;
|
||||
double float_regfile_writes;
|
||||
double context_switches;
|
||||
double ialu_accesses;
|
||||
double fpu_accesses;
|
||||
double mul_accesses;
|
||||
double cdb_alu_accesses;
|
||||
double cdb_fpu_accesses;
|
||||
double cdb_mul_accesses;
|
||||
double function_calls;
|
||||
};
|
||||
|
||||
class NIUParam {
|
||||
class MCParameters {
|
||||
public:
|
||||
double clockRate;
|
||||
enum MemoryCtrl_type mc_type;
|
||||
double num_mcs;
|
||||
int num_channels;
|
||||
int llcBlockSize;
|
||||
int dataBusWidth;
|
||||
int databus_width;
|
||||
int llc_line_length;
|
||||
int req_window_size_per_channel;
|
||||
int IO_buffer_size_per_channel;
|
||||
int addressbus_width;
|
||||
int opcodeW;
|
||||
int type;
|
||||
bool LVDS;
|
||||
bool withPHY;
|
||||
int peak_transfer_rate;
|
||||
int number_ranks;
|
||||
int reorder_buffer_assoc;
|
||||
int reorder_buffer_nbanks;
|
||||
int read_buffer_assoc;
|
||||
int read_buffer_nbanks;
|
||||
int read_buffer_tag_width;
|
||||
int write_buffer_assoc;
|
||||
int write_buffer_nbanks;
|
||||
int write_buffer_tag_width;
|
||||
};
|
||||
|
||||
class MCStatistics {
|
||||
public:
|
||||
double duty_cycle;
|
||||
double perc_load;
|
||||
double reads;
|
||||
double writes;
|
||||
};
|
||||
|
||||
class NIUParameters {
|
||||
public:
|
||||
NIUParam(){};
|
||||
NIUParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
double clockRate;
|
||||
int num_units;
|
||||
int type;
|
||||
double duty_cycle, perc_load;
|
||||
~NIUParam(){};
|
||||
};
|
||||
|
||||
class PCIeParam {
|
||||
class NIUStatistics {
|
||||
public:
|
||||
double duty_cycle;
|
||||
double perc_load;
|
||||
};
|
||||
|
||||
class PCIeParameters {
|
||||
public:
|
||||
PCIeParam(){};
|
||||
PCIeParam(ParseXML *XML_interface, int ithCache_);
|
||||
string name;
|
||||
double clockRate;
|
||||
int num_channels, num_units;
|
||||
int num_channels;
|
||||
int num_units;
|
||||
bool withPHY;
|
||||
int type;
|
||||
double duty_cycle, perc_load;
|
||||
~PCIeParam(){};
|
||||
};
|
||||
|
||||
class PCIeStatistics {
|
||||
public:
|
||||
double duty_cycle;
|
||||
double perc_load;
|
||||
};
|
||||
#endif /* BASIC_COMPONENTS_H_ */
|
||||
|
|
179
ext/mcpat/bus_interconnect.cc
Normal file
179
ext/mcpat/bus_interconnect.cc
Normal file
|
@ -0,0 +1,179 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Joel Hestness
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "basic_circuit.h"
|
||||
#include "bus_interconnect.h"
|
||||
#include "common.h"
|
||||
#include "const.h"
|
||||
#include "io.h"
|
||||
#include "parameter.h"
|
||||
|
||||
BusInterconnect::BusInterconnect(XMLNode* _xml_data,
|
||||
InputParameter* interface_ip_)
|
||||
: McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) {
|
||||
name = "Bus Interconnect";
|
||||
set_param_stats();
|
||||
local_result = init_interface(&interface_ip, name);
|
||||
scktRatio = g_tp.sckt_co_eff;
|
||||
|
||||
interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate;
|
||||
interface_ip.latency = bus_params.link_latency / bus_params.clockRate;
|
||||
|
||||
link_len /= bus_params.total_nodes;
|
||||
if (bus_params.total_nodes > 1) {
|
||||
//All links are shared by neighbors
|
||||
link_len /= 2;
|
||||
}
|
||||
|
||||
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
|
||||
bus_params.link_base_width,
|
||||
bus_params.link_base_height,
|
||||
bus_params.flit_size, link_len, &interface_ip,
|
||||
bus_params.link_start_wiring_level,
|
||||
bus_params.clockRate,
|
||||
bus_params.pipelinable,
|
||||
bus_params.route_over_perc);
|
||||
children.push_back(link_bus);
|
||||
}
|
||||
|
||||
void BusInterconnect::computeEnergy() {
|
||||
// Initialize stats for TDP
|
||||
tdp_stats.reset();
|
||||
tdp_stats.readAc.access = bus_stats.duty_cycle;
|
||||
link_bus->int_params.active_ports = bus_params.min_ports - 1;
|
||||
link_bus->int_stats.duty_cycle =
|
||||
bus_params.M_traffic_pattern * bus_stats.duty_cycle;
|
||||
|
||||
// Initialize stats for runtime energy and power
|
||||
rtp_stats.reset();
|
||||
rtp_stats.readAc.access = bus_stats.total_access;
|
||||
link_bus->int_stats.accesses = bus_stats.total_access;
|
||||
|
||||
// Recursively compute energy
|
||||
McPATComponent::computeEnergy();
|
||||
}
|
||||
|
||||
void BusInterconnect::set_param_stats() {
|
||||
memset(&bus_params, 0, sizeof(BusInterconnectParameters));
|
||||
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
int mat_type;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("clockrate", bus_params.clockRate);
|
||||
ASSIGN_INT_IF("flit_bits", bus_params.flit_size);
|
||||
ASSIGN_FP_IF("link_throughput", bus_params.link_throughput);
|
||||
ASSIGN_FP_IF("link_latency", bus_params.link_latency);
|
||||
ASSIGN_INT_IF("total_nodes", bus_params.total_nodes);
|
||||
ASSIGN_INT_IF("input_ports", bus_params.input_ports);
|
||||
ASSIGN_INT_IF("output_ports", bus_params.output_ports);
|
||||
ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports);
|
||||
ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage);
|
||||
ASSIGN_INT_IF("pipelinable", bus_params.pipelinable);
|
||||
ASSIGN_FP_IF("link_routing_over_percentage",
|
||||
bus_params.route_over_perc);
|
||||
ASSIGN_INT_IF("virtual_channel_per_port",
|
||||
bus_params.virtual_channel_per_port);
|
||||
ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern);
|
||||
ASSIGN_FP_IF("link_len", link_len);
|
||||
ASSIGN_FP_IF("link_base_width", bus_params.link_base_width);
|
||||
ASSIGN_FP_IF("link_base_height", bus_params.link_base_height);
|
||||
ASSIGN_FP_IF("link_start_wiring_level",
|
||||
bus_params.link_start_wiring_level);
|
||||
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Change from MHz to Hz
|
||||
bus_params.clockRate *= 1e6;
|
||||
|
||||
interface_ip.wire_is_mat_type = mat_type;
|
||||
interface_ip.wire_os_mat_type = mat_type;
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle);
|
||||
ASSIGN_FP_IF("total_accesses", bus_stats.total_access);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
clockRate = bus_params.clockRate;
|
||||
bus_params.min_ports =
|
||||
min(bus_params.input_ports, bus_params.output_ports);
|
||||
|
||||
assert(bus_params.chip_coverage <= 1);
|
||||
assert(bus_params.route_over_perc <= 1);
|
||||
assert(link_len > 0);
|
||||
}
|
||||
|
||||
void
|
||||
BusInterconnect::set_duty_cycle(double duty_cycle) {
|
||||
bus_stats.duty_cycle = duty_cycle;
|
||||
}
|
||||
|
||||
void
|
||||
BusInterconnect::set_number_of_accesses(double total_accesses) {
|
||||
bus_stats.total_access = total_accesses;
|
||||
}
|
||||
|
||||
BusInterconnect::~BusInterconnect() {
|
||||
delete link_bus;
|
||||
link_bus = NULL;
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,65 +25,71 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Joel Hestness
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef SHAREDCACHE_H_
|
||||
#define SHAREDCACHE_H_
|
||||
#include <vector>
|
||||
#ifndef BUS_INTERCONNECT_H_
|
||||
#define BUS_INTERCONNECT_H_
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "area.h"
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "interconnect.h"
|
||||
#include "logic.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class SharedCache :public Component{
|
||||
class BusInterconnectParameters {
|
||||
public:
|
||||
ParseXML * XML;
|
||||
int ithCache;
|
||||
InputParameter interface_ip;
|
||||
enum cache_level cacheL;
|
||||
DataCache unicache;//Shared cache
|
||||
CacheDynParam cachep;
|
||||
statsDef homenode_tdp_stats;
|
||||
statsDef homenode_rtp_stats;
|
||||
statsDef homenode_stats_t;
|
||||
double dir_overhead;
|
||||
// cache_processor llCache,directory, directory1, inv_dir;
|
||||
|
||||
//pipeline pipeLogicCache, pipeLogicDirectory;
|
||||
//clock_network clockNetwork;
|
||||
double scktRatio, executionTime;
|
||||
// Component L2Tot, cc, cc1, ccTot;
|
||||
|
||||
SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2);
|
||||
void set_cache_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
|
||||
~SharedCache(){};
|
||||
double clockRate;
|
||||
int flit_size;
|
||||
int input_ports;
|
||||
int output_ports;
|
||||
int min_ports;
|
||||
int global_linked_ports;
|
||||
int virtual_channel_per_port;
|
||||
int input_buffer_entries_per_vc;
|
||||
int total_nodes;
|
||||
double link_throughput;
|
||||
double link_latency;
|
||||
double chip_coverage;
|
||||
bool pipelinable;
|
||||
double route_over_perc;
|
||||
bool has_global_link;
|
||||
bool type;
|
||||
double M_traffic_pattern;
|
||||
double link_base_width;
|
||||
double link_base_height;
|
||||
int link_start_wiring_level;
|
||||
};
|
||||
|
||||
class CCdir :public Component{
|
||||
class BusInterconnectStatistics {
|
||||
public:
|
||||
ParseXML * XML;
|
||||
int ithCache;
|
||||
InputParameter interface_ip;
|
||||
DataCache dc;//Shared cache
|
||||
ArrayST * shadow_dir;
|
||||
// cache_processor llCache,directory, directory1, inv_dir;
|
||||
|
||||
//pipeline pipeLogicCache, pipeLogicDirectory;
|
||||
//clock_network clockNetwork;
|
||||
double scktRatio, clockRate, executionTime;
|
||||
Component L2Tot, cc, cc1, ccTot;
|
||||
|
||||
CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
|
||||
~CCdir();
|
||||
double duty_cycle;
|
||||
double total_access;
|
||||
};
|
||||
|
||||
#endif /* SHAREDCACHE_H_ */
|
||||
class BusInterconnect : public McPATComponent {
|
||||
public:
|
||||
Interconnect* link_bus;
|
||||
|
||||
int ithNoC;
|
||||
InputParameter interface_ip;
|
||||
double link_len;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
BusInterconnectParameters bus_params;
|
||||
BusInterconnectStatistics bus_stats;
|
||||
uca_org_t local_result;
|
||||
statsDef stats_t;
|
||||
double M_traffic_pattern;
|
||||
|
||||
BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||
void set_param_stats();
|
||||
void set_duty_cycle(double duty_cycle);
|
||||
void set_number_of_accesses(double total_accesses);
|
||||
void computeEnergy();
|
||||
~BusInterconnect();
|
||||
};
|
||||
|
||||
#endif /* BUS_INTERCONNECT_H_ */
|
321
ext/mcpat/cachearray.cc
Normal file
321
ext/mcpat/cachearray.cc
Normal file
|
@ -0,0 +1,321 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
#include "area.h"
|
||||
#include "cachearray.h"
|
||||
#include "common.h"
|
||||
#include "decoder.h"
|
||||
#include "parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
double CacheArray::area_efficiency_threshold = 20.0;
|
||||
int CacheArray::ed = 0;
|
||||
//Fixed number, make sure timing can be satisfied.
|
||||
int CacheArray::delay_wt = 100;
|
||||
int CacheArray::cycle_time_wt = 1000;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int CacheArray::area_wt = 10;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int CacheArray::dynamic_power_wt = 10;
|
||||
int CacheArray::leakage_power_wt = 10;
|
||||
//Fixed number, make sure timing can be satisfied.
|
||||
int CacheArray::delay_dev = 1000000;
|
||||
int CacheArray::cycle_time_dev = 100;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int CacheArray::area_dev = 1000000;
|
||||
//Fixed number, This is used to exhaustive search for individual components.
|
||||
int CacheArray::dynamic_power_dev = 1000000;
|
||||
int CacheArray::leakage_power_dev = 1000000;
|
||||
int CacheArray::cycle_time_dev_threshold = 10;
|
||||
|
||||
CacheArray::CacheArray(XMLNode* _xml_data,
|
||||
const InputParameter *configure_interface, string _name,
|
||||
enum Device_ty device_ty_, double _clockRate,
|
||||
bool opt_local_, enum Core_type core_ty_, bool _is_default)
|
||||
: McPATComponent(_xml_data), l_ip(*configure_interface),
|
||||
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
|
||||
is_default(_is_default), sbt_dir_overhead(0) {
|
||||
name = _name;
|
||||
clockRate = _clockRate;
|
||||
if (l_ip.cache_sz < MIN_BUFFER_SIZE) {
|
||||
l_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
if (!l_ip.error_checking(name)) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sbt_tdp_stats.reset();
|
||||
sbt_rtp_stats.reset();
|
||||
|
||||
// Compute initial search point
|
||||
local_result.valid = false;
|
||||
compute_base_power();
|
||||
|
||||
// Set up the cache by searching design space with cacti
|
||||
list<uca_org_t > candidate_solutions(0);
|
||||
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
||||
uca_org_t* temp_res = NULL;
|
||||
double throughput = l_ip.throughput;
|
||||
double latency = l_ip.latency;
|
||||
bool throughput_overflow = true;
|
||||
bool latency_overflow = true;
|
||||
|
||||
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
||||
throughput_overflow = false;
|
||||
if ((local_result.access_time - latency) <= 1e-10)
|
||||
latency_overflow = false;
|
||||
|
||||
if (opt_for_clk && opt_local) {
|
||||
if (throughput_overflow || latency_overflow) {
|
||||
l_ip.ed = ed;
|
||||
|
||||
l_ip.delay_wt = delay_wt;
|
||||
l_ip.cycle_time_wt = cycle_time_wt;
|
||||
|
||||
l_ip.area_wt = area_wt;
|
||||
l_ip.dynamic_power_wt = dynamic_power_wt;
|
||||
l_ip.leakage_power_wt = leakage_power_wt;
|
||||
|
||||
l_ip.delay_dev = delay_dev;
|
||||
l_ip.cycle_time_dev = cycle_time_dev;
|
||||
|
||||
l_ip.area_dev = area_dev;
|
||||
l_ip.dynamic_power_dev = dynamic_power_dev;
|
||||
l_ip.leakage_power_dev = leakage_power_dev;
|
||||
|
||||
//Reset overflow flag before start optimization iterations
|
||||
throughput_overflow = true;
|
||||
latency_overflow = true;
|
||||
|
||||
//Clean up the result for optimized for ED^2P
|
||||
temp_res = &local_result;
|
||||
temp_res->cleanup();
|
||||
}
|
||||
|
||||
|
||||
while ((throughput_overflow || latency_overflow) &&
|
||||
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||
compute_base_power();
|
||||
|
||||
//This is the time_dev to be used for next iteration
|
||||
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
|
||||
|
||||
// from best area to worst area -->worst timing to best timing
|
||||
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
|
||||
(local_result.access_time - latency) <= 1e-10) ||
|
||||
(local_result.data_array2->area_efficiency <
|
||||
area_efficiency_threshold && l_ip.assoc == 0)) {
|
||||
//if no satisfiable solution is found,the most aggressive one
|
||||
//is left
|
||||
candidate_solutions.push_back(local_result);
|
||||
if (((local_result.cycle_time - throughput) <= 1e-10) &&
|
||||
((local_result.access_time - latency) <= 1e-10)) {
|
||||
//ensure stop opt not because of cam
|
||||
throughput_overflow = false;
|
||||
latency_overflow = false;
|
||||
}
|
||||
|
||||
} else {
|
||||
if ((local_result.cycle_time - throughput) <= 1e-10)
|
||||
throughput_overflow = false;
|
||||
if ((local_result.access_time - latency) <= 1e-10)
|
||||
latency_overflow = false;
|
||||
|
||||
//if not >10 local_result is the last result, it cannot be
|
||||
//cleaned up
|
||||
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||
//Only solutions not saved in the list need to be
|
||||
//cleaned up
|
||||
temp_res = &local_result;
|
||||
temp_res->cleanup();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (l_ip.assoc > 0) {
|
||||
//For array structures except CAM and FA, Give warning but still
|
||||
//provide a result with best timing found
|
||||
if (throughput_overflow == true)
|
||||
cout << "Warning: " << name
|
||||
<< " array structure cannot satisfy throughput constraint."
|
||||
<< endl;
|
||||
if (latency_overflow == true)
|
||||
cout << "Warning: " << name
|
||||
<< " array structure cannot satisfy latency constraint."
|
||||
<< endl;
|
||||
}
|
||||
|
||||
double min_dynamic_energy = BIGNUM;
|
||||
if (candidate_solutions.empty() == false) {
|
||||
local_result.valid = true;
|
||||
for (candidate_iter = candidate_solutions.begin();
|
||||
candidate_iter != candidate_solutions.end();
|
||||
++candidate_iter) {
|
||||
if (min_dynamic_energy >
|
||||
(candidate_iter)->power.readOp.dynamic) {
|
||||
min_dynamic_energy =
|
||||
(candidate_iter)->power.readOp.dynamic;
|
||||
min_dynamic_energy_iter = candidate_iter;
|
||||
local_result = *(min_dynamic_energy_iter);
|
||||
|
||||
} else {
|
||||
candidate_iter->cleanup() ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
candidate_solutions.clear();
|
||||
}
|
||||
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(device_ty, core_ty);
|
||||
|
||||
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
||||
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
||||
double total_overhead = macro_layout_overhead * chip_PR_overhead;
|
||||
local_result.area *= total_overhead;
|
||||
|
||||
//maintain constant power density
|
||||
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
|
||||
|
||||
double sckRation = g_tp.sckt_co_eff;
|
||||
local_result.power.readOp.dynamic *= sckRation;
|
||||
local_result.power.writeOp.dynamic *= sckRation;
|
||||
local_result.power.searchOp.dynamic *= sckRation;
|
||||
local_result.power.readOp.leakage *= l_ip.nbanks;
|
||||
local_result.power.readOp.longer_channel_leakage =
|
||||
local_result.power.readOp.leakage * long_channel_device_reduction;
|
||||
local_result.power = local_result.power * pppm_t;
|
||||
|
||||
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
||||
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
||||
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
||||
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||
local_result.data_array2->power.readOp.longer_channel_leakage =
|
||||
local_result.data_array2->power.readOp.leakage *
|
||||
long_channel_device_reduction;
|
||||
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
|
||||
|
||||
|
||||
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
|
||||
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
||||
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
||||
local_result.tag_array2->power.readOp.leakage *
|
||||
long_channel_device_reduction;
|
||||
local_result.tag_array2->power =
|
||||
local_result.tag_array2->power * pppm_t;
|
||||
}
|
||||
}
|
||||
|
||||
void CacheArray::compute_base_power() {
|
||||
local_result = cacti_interface(&l_ip);
|
||||
}
|
||||
|
||||
void CacheArray::computeArea() {
|
||||
area.set_area(local_result.area);
|
||||
output_data.area = local_result.area / 1e6;
|
||||
}
|
||||
|
||||
void CacheArray::computeEnergy() {
|
||||
// Set the leakage power numbers
|
||||
output_data.subthreshold_leakage_power = local_result.power.readOp.leakage;
|
||||
output_data.gate_leakage_power = local_result.power.readOp.gate_leakage;
|
||||
|
||||
if (l_ip.assoc && l_ip.is_cache) {
|
||||
// This is a standard cache array with data and tags
|
||||
// Calculate peak dynamic power
|
||||
output_data.peak_dynamic_power =
|
||||
(local_result.tag_array2->power.readOp.dynamic +
|
||||
local_result.data_array2->power.readOp.dynamic) *
|
||||
tdp_stats.readAc.hit +
|
||||
(local_result.tag_array2->power.readOp.dynamic) *
|
||||
tdp_stats.readAc.miss +
|
||||
(local_result.tag_array2->power.readOp.dynamic +
|
||||
local_result.data_array2->power.writeOp.dynamic) *
|
||||
tdp_stats.writeAc.hit +
|
||||
(local_result.tag_array2->power.readOp.dynamic) *
|
||||
tdp_stats.writeAc.miss;
|
||||
output_data.peak_dynamic_power *= clockRate;
|
||||
|
||||
// Calculate the runtime dynamic power
|
||||
output_data.runtime_dynamic_energy =
|
||||
local_result.data_array2->power.readOp.dynamic *
|
||||
rtp_stats.dataReadAc.access +
|
||||
local_result.data_array2->power.writeOp.dynamic *
|
||||
rtp_stats.dataWriteAc.access +
|
||||
(local_result.tag_array2->power.readOp.dynamic *
|
||||
rtp_stats.tagReadAc.access +
|
||||
local_result.tag_array2->power.writeOp.dynamic *
|
||||
rtp_stats.tagWriteAc.access) * l_ip.assoc;
|
||||
} else {
|
||||
// Calculate peak dynamic power
|
||||
output_data.peak_dynamic_power =
|
||||
local_result.power.readOp.dynamic * tdp_stats.readAc.access +
|
||||
local_result.power.writeOp.dynamic * tdp_stats.writeAc.access +
|
||||
local_result.power.searchOp.dynamic * tdp_stats.searchAc.access;
|
||||
output_data.peak_dynamic_power *= clockRate;
|
||||
|
||||
// Calculate the runtime dynamic power
|
||||
output_data.runtime_dynamic_energy =
|
||||
local_result.power.readOp.dynamic * rtp_stats.readAc.access +
|
||||
local_result.power.writeOp.dynamic * rtp_stats.writeAc.access +
|
||||
local_result.power.searchOp.dynamic * rtp_stats.searchAc.access;
|
||||
}
|
||||
|
||||
// An SBT directory has more dynamic power
|
||||
if (sbt_dir_overhead > 0) {
|
||||
// Calculate peak dynamic power
|
||||
output_data.peak_dynamic_power +=
|
||||
(computeSBTDynEnergy(&sbt_tdp_stats) * clockRate);
|
||||
|
||||
// Calculate the runtime dynamic power
|
||||
output_data.runtime_dynamic_energy +=
|
||||
computeSBTDynEnergy(&sbt_rtp_stats);
|
||||
}
|
||||
}
|
||||
|
||||
CacheArray::~CacheArray() {
|
||||
local_result.cleanup();
|
||||
}
|
117
ext/mcpat/cachearray.h
Normal file
117
ext/mcpat/cachearray.h
Normal file
|
@ -0,0 +1,117 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef CACHEARRAY_H_
|
||||
#define CACHEARRAY_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "basic_components.h"
|
||||
#include "cacti_interface.h"
|
||||
#include "component.h"
|
||||
#include "const.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class CacheArray : public McPATComponent {
|
||||
public:
|
||||
static double area_efficiency_threshold;
|
||||
|
||||
// These are used for the CACTI interface.
|
||||
static int ed;
|
||||
static int delay_wt;
|
||||
static int cycle_time_wt;
|
||||
static int area_wt;
|
||||
static int dynamic_power_wt;
|
||||
static int leakage_power_wt;
|
||||
static int delay_dev;
|
||||
static int cycle_time_dev;
|
||||
static int area_dev;
|
||||
static int dynamic_power_dev;
|
||||
static int leakage_power_dev;
|
||||
static int cycle_time_dev_threshold;
|
||||
|
||||
InputParameter l_ip;
|
||||
enum Device_ty device_ty;
|
||||
bool opt_local;
|
||||
enum Core_type core_ty;
|
||||
bool is_default;
|
||||
uca_org_t local_result;
|
||||
|
||||
// These are only used for static bank tag (SBT) directory type.
|
||||
double sbt_dir_overhead;
|
||||
// Set this to contain SBT peak power stats
|
||||
statsDef sbt_tdp_stats;
|
||||
// Set this to contain SBT runtime power stats
|
||||
statsDef sbt_rtp_stats;
|
||||
|
||||
CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
|
||||
bool opt_local_ = true,
|
||||
enum Core_type core_ty_ = Inorder, bool _is_default = true);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
void compute_base_power();
|
||||
void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; }
|
||||
~CacheArray();
|
||||
|
||||
private:
|
||||
double computeSBTDynEnergy(statsDef *sbt_stats_ptr);
|
||||
};
|
||||
|
||||
extern inline
|
||||
double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) {
|
||||
if (sbt_dir_overhead == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Write miss on dynamic home node will generate a replacement write on
|
||||
// whole cache block
|
||||
double dynamic =
|
||||
sbt_stats_p->readAc.hit *
|
||||
(local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead +
|
||||
local_result.tag_array2->power.readOp.dynamic) +
|
||||
sbt_stats_p->readAc.miss *
|
||||
local_result.tag_array2->power.readOp.dynamic +
|
||||
sbt_stats_p->writeAc.miss *
|
||||
local_result.tag_array2->power.readOp.dynamic +
|
||||
sbt_stats_p->writeAc.hit *
|
||||
(local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead +
|
||||
local_result.tag_array2->power.readOp.dynamic+
|
||||
sbt_stats_p->writeAc.miss *
|
||||
local_result.power.writeOp.dynamic);
|
||||
return dynamic;
|
||||
}
|
||||
|
||||
#endif /* CACHEARRAY_H_ */
|
42
ext/mcpat/cachecontroller.cc
Normal file
42
ext/mcpat/cachecontroller.cc
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Joel Hestness
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "cachecontroller.h"
|
||||
|
||||
CacheController::CacheController(XMLNode* _xml_data,
|
||||
InputParameter* _interface_ip)
|
||||
: McPATComponent(_xml_data, _interface_ip) {
|
||||
name = "Cache Controller";
|
||||
clockRate = target_core_clockrate;
|
||||
McPATComponent::recursiveInstantiate();
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,24 +25,21 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Joel Hestness
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef CACHECONTROLLER_H_
|
||||
#define CACHECONTROLLER_H_
|
||||
|
||||
#ifndef GLOBALVAR_H_
|
||||
#define GLOBALVAR_H_
|
||||
|
||||
#ifdef GLOBALVAR
|
||||
#define EXTERN
|
||||
#else
|
||||
#define EXTERN extern
|
||||
#endif
|
||||
|
||||
EXTERN bool opt_for_clk;
|
||||
|
||||
#endif /* GLOBALVAR_H_ */
|
||||
|
||||
|
||||
#include "basic_components.h"
|
||||
|
||||
class CacheController : public McPATComponent {
|
||||
public:
|
||||
CacheController(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||
~CacheController();
|
||||
};
|
||||
|
||||
#endif /* CACHECONTROLLER_H_ */
|
647
ext/mcpat/cacheunit.cc
Normal file
647
ext/mcpat/cacheunit.cc
Normal file
|
@ -0,0 +1,647 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#include "arbiter.h"
|
||||
#include "array.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "cachearray.h"
|
||||
#include "cacheunit.h"
|
||||
#include "common.h"
|
||||
#include "const.h"
|
||||
#include "io.h"
|
||||
#include "logic.h"
|
||||
#include "parameter.h"
|
||||
|
||||
bool CacheUnit::is_cache = true;
|
||||
bool CacheUnit::pure_cam = false;
|
||||
bool CacheUnit::opt_local = true;
|
||||
bool CacheUnit::force_cache_config = false;
|
||||
|
||||
CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip)
|
||||
: dir_overhead(0), McPATComponent(_xml_data, _interface_ip) {
|
||||
|
||||
int tag;
|
||||
int data;
|
||||
|
||||
name = "Cache Unit";
|
||||
CacheArray* arrayPtr = NULL;
|
||||
|
||||
set_cache_param_from_xml_data();
|
||||
|
||||
//All lower level cache are physically indexed and tagged.
|
||||
double size;
|
||||
double line;
|
||||
double assoc;
|
||||
double banks;
|
||||
size = cache_params.capacity;
|
||||
line = cache_params.blockW;
|
||||
assoc = cache_params.assoc;
|
||||
banks = cache_params.nbanks;
|
||||
if ((cache_params.dir_ty == ST &&
|
||||
cache_params.cache_level == L1Directory) ||
|
||||
(cache_params.dir_ty == ST &&
|
||||
cache_params.cache_level == L2Directory)) {
|
||||
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||
} else {
|
||||
tag = physical_address_width - int(ceil(log2(size / line / assoc))) -
|
||||
int(ceil(log2(line))) + EXTRA_TAG_BITS;
|
||||
|
||||
if (cache_params.dir_ty == SBT) {
|
||||
dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) *
|
||||
BITS_PER_BYTE / (line * BITS_PER_BYTE);
|
||||
line *= (1 + dir_overhead);
|
||||
size *= (1 + dir_overhead);
|
||||
}
|
||||
}
|
||||
|
||||
interface_ip.cache_sz = (int)size;
|
||||
interface_ip.line_sz = (int)line;
|
||||
interface_ip.assoc = (int)assoc;
|
||||
interface_ip.nbanks = (int)banks;
|
||||
interface_ip.specific_tag = tag > 0;
|
||||
interface_ip.tag_w = tag;
|
||||
|
||||
if (cache_params.cache_level == L1) {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||
} else {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||
}
|
||||
|
||||
interface_ip.access_mode = cache_params.cache_access_mode;
|
||||
interface_ip.throughput= cache_params.throughput;
|
||||
interface_ip.latency = cache_params.latency;
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.is_cache = is_cache;
|
||||
interface_ip.pure_ram = cache_params.pure_ram;
|
||||
interface_ip.pure_cam = pure_cam;
|
||||
interface_ip.num_rw_ports = cache_params.cache_rw_ports;
|
||||
interface_ip.num_rd_ports = cache_params.cache_rd_ports;
|
||||
interface_ip.num_wr_ports = cache_params.cache_wr_ports;
|
||||
interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports;
|
||||
interface_ip.num_search_ports = cache_params.cache_search_ports;
|
||||
|
||||
arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays",
|
||||
cache_params.device_ty, clockRate, opt_local,
|
||||
cache_params.core_ty);
|
||||
children.push_back(arrayPtr);
|
||||
|
||||
// This is for calculating TDP, which depends on the number of
|
||||
// available ports
|
||||
int num_tdp_ports = arrayPtr->l_ip.num_rw_ports +
|
||||
arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports;
|
||||
|
||||
// Set new array stats for calculating TDP and runtime power
|
||||
arrayPtr->tdp_stats.reset();
|
||||
arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar *
|
||||
num_tdp_ports * cache_stats.duty_cycle *
|
||||
cache_stats.homenode_access_scalar;
|
||||
arrayPtr->tdp_stats.readAc.miss = 0;
|
||||
arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access -
|
||||
arrayPtr->tdp_stats.readAc.miss;
|
||||
arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar *
|
||||
num_tdp_ports * cache_stats.duty_cycle *
|
||||
cache_stats.homenode_access_scalar;
|
||||
arrayPtr->tdp_stats.writeAc.miss = 0;
|
||||
arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access -
|
||||
arrayPtr->tdp_stats.writeAc.miss;
|
||||
arrayPtr->tdp_stats.searchAc.access = 0;
|
||||
arrayPtr->tdp_stats.searchAc.miss = 0;
|
||||
arrayPtr->tdp_stats.searchAc.hit = 0;
|
||||
|
||||
arrayPtr->rtp_stats.reset();
|
||||
if (cache_stats.use_detailed_stats) {
|
||||
arrayPtr->rtp_stats.dataReadAc.access =
|
||||
cache_stats.num_data_array_reads;
|
||||
arrayPtr->rtp_stats.dataWriteAc.access =
|
||||
cache_stats.num_data_array_writes;
|
||||
arrayPtr->rtp_stats.tagReadAc.access =
|
||||
cache_stats.num_tag_array_reads;
|
||||
arrayPtr->rtp_stats.tagWriteAc.access =
|
||||
cache_stats.num_tag_array_writes;
|
||||
} else {
|
||||
// This code makes assumptions. For instance, it assumes that
|
||||
// tag and data arrays are accessed in parallel on a read request and
|
||||
// this is a write-allocate cache. It also ignores any coherence
|
||||
// requests. Using detailed stats as above can avoid the ambiguity
|
||||
// that is introduced here
|
||||
arrayPtr->rtp_stats.dataReadAc.access =
|
||||
cache_stats.read_accesses + cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.dataWriteAc.access =
|
||||
cache_stats.write_accesses + cache_stats.read_misses;
|
||||
arrayPtr->rtp_stats.tagReadAc.access =
|
||||
cache_stats.read_accesses + cache_stats.write_accesses;
|
||||
arrayPtr->rtp_stats.tagWriteAc.access =
|
||||
cache_stats.read_misses + cache_stats.write_misses;
|
||||
}
|
||||
|
||||
// Set SBT stats if this is an SBT directory type
|
||||
if (dir_overhead > 0) {
|
||||
arrayPtr->setSBTDirOverhead(dir_overhead);
|
||||
|
||||
// TDP stats
|
||||
arrayPtr->sbt_tdp_stats.readAc.access =
|
||||
cache_stats.tdp_read_access_scalar *
|
||||
num_tdp_ports * cache_stats.dir_duty_cycle *
|
||||
(1 - cache_stats.homenode_access_scalar);
|
||||
arrayPtr->sbt_tdp_stats.readAc.miss = 0;
|
||||
arrayPtr->sbt_tdp_stats.readAc.hit =
|
||||
arrayPtr->sbt_tdp_stats.readAc.access -
|
||||
arrayPtr->sbt_tdp_stats.readAc.miss;
|
||||
arrayPtr->sbt_tdp_stats.writeAc.access =
|
||||
cache_stats.tdp_sbt_write_access_scalar *
|
||||
num_tdp_ports * cache_stats.dir_duty_cycle *
|
||||
(1 - cache_stats.homenode_access_scalar);
|
||||
arrayPtr->sbt_tdp_stats.writeAc.miss = 0;
|
||||
arrayPtr->sbt_tdp_stats.writeAc.hit =
|
||||
arrayPtr->sbt_tdp_stats.writeAc.access -
|
||||
arrayPtr->sbt_tdp_stats.writeAc.miss;
|
||||
|
||||
// Runtime power stats
|
||||
arrayPtr->sbt_rtp_stats.readAc.access =
|
||||
cache_stats.homenode_read_accesses;
|
||||
arrayPtr->sbt_rtp_stats.readAc.miss =
|
||||
cache_stats.homenode_read_misses;
|
||||
arrayPtr->sbt_rtp_stats.readAc.access =
|
||||
cache_stats.homenode_read_accesses -
|
||||
cache_stats.homenode_read_misses;
|
||||
arrayPtr->sbt_rtp_stats.writeAc.access =
|
||||
cache_stats.homenode_write_accesses;
|
||||
arrayPtr->sbt_rtp_stats.writeAc.miss =
|
||||
cache_stats.homenode_write_misses;
|
||||
arrayPtr->sbt_rtp_stats.writeAc.hit =
|
||||
cache_stats.homenode_write_accesses -
|
||||
cache_stats.homenode_write_misses;
|
||||
}
|
||||
|
||||
interface_ip.force_cache_config = force_cache_config;
|
||||
if (!((cache_params.dir_ty == ST &&
|
||||
cache_params.cache_level == L1Directory) ||
|
||||
(cache_params.dir_ty == ST &&
|
||||
cache_params.cache_level== L2Directory))) {
|
||||
// Miss Buffer
|
||||
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||
data = (physical_address_width) +
|
||||
int(ceil(log2(size / cache_params.blockW))) +
|
||||
(cache_params.blockW * BITS_PER_BYTE);
|
||||
line = int(ceil(data / BITS_PER_BYTE));
|
||||
size = cache_params.missb_size * line;
|
||||
|
||||
interface_ip.cache_sz = size;
|
||||
interface_ip.line_sz = line;
|
||||
interface_ip.assoc = cache_params.missb_assoc;
|
||||
interface_ip.nbanks = cache_params.missb_banks;
|
||||
interface_ip.specific_tag = tag > 0;
|
||||
interface_ip.tag_w = tag;
|
||||
|
||||
if (cache_params.cache_level == L1) {
|
||||
interface_ip.out_w = line * BITS_PER_BYTE;
|
||||
} else {
|
||||
interface_ip.out_w = line * BITS_PER_BYTE / 2;
|
||||
}
|
||||
|
||||
interface_ip.access_mode = cache_params.miss_buff_access_mode;
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.is_cache = is_cache;
|
||||
interface_ip.pure_ram = cache_params.pure_ram;
|
||||
interface_ip.pure_cam = pure_cam;
|
||||
interface_ip.throughput = cache_params.throughput;
|
||||
interface_ip.latency = cache_params.latency;
|
||||
interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports;
|
||||
interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports;
|
||||
interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports;
|
||||
interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports;
|
||||
interface_ip.num_search_ports = cache_params.miss_buff_search_ports;
|
||||
|
||||
arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer",
|
||||
cache_params.device_ty, clockRate, opt_local,
|
||||
cache_params.core_ty);
|
||||
children.push_back(arrayPtr);
|
||||
|
||||
arrayPtr->tdp_stats.reset();
|
||||
arrayPtr->tdp_stats.readAc.access = 0;
|
||||
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
|
||||
arrayPtr->rtp_stats.reset();
|
||||
arrayPtr->rtp_stats.readAc.access =
|
||||
cache_stats.read_misses + cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access =
|
||||
cache_stats.read_misses + cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||
|
||||
if (cache_params.dir_ty == SBT) {
|
||||
arrayPtr->rtp_stats.readAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
}
|
||||
|
||||
// Fill Buffer
|
||||
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||
data = cache_params.blockW;
|
||||
|
||||
interface_ip.cache_sz = data * cache_params.fu_size;
|
||||
interface_ip.line_sz = data;
|
||||
interface_ip.assoc = cache_params.fu_assoc;
|
||||
interface_ip.nbanks = cache_params.fu_banks;
|
||||
interface_ip.specific_tag = tag > 0;
|
||||
interface_ip.tag_w = tag;
|
||||
|
||||
if (cache_params.cache_level == L1) {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||
} else {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||
}
|
||||
|
||||
interface_ip.access_mode = cache_params.fetch_buff_access_mode;
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.is_cache = is_cache;
|
||||
interface_ip.pure_cam = pure_cam;
|
||||
interface_ip.throughput = cache_params.throughput;
|
||||
interface_ip.latency = cache_params.latency;
|
||||
interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports;
|
||||
interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports;
|
||||
interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports;
|
||||
interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports;
|
||||
interface_ip.num_search_ports = cache_params.fetch_buff_search_ports;
|
||||
arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer",
|
||||
cache_params.device_ty, clockRate, opt_local,
|
||||
cache_params.core_ty);
|
||||
children.push_back(arrayPtr);
|
||||
|
||||
arrayPtr->tdp_stats.reset();
|
||||
arrayPtr->tdp_stats.readAc.access = 0;
|
||||
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
|
||||
arrayPtr->rtp_stats.reset();
|
||||
arrayPtr->rtp_stats.readAc.access =
|
||||
cache_stats.read_misses + cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access =
|
||||
cache_stats.read_misses + cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||
|
||||
if (cache_params.dir_ty == SBT) {
|
||||
arrayPtr->rtp_stats.readAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
}
|
||||
|
||||
// Prefetch Buffer
|
||||
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||
line = cache_params.blockW;
|
||||
|
||||
interface_ip.cache_sz = cache_params.prefetchb_size * line;
|
||||
interface_ip.line_sz = line;
|
||||
interface_ip.assoc = cache_params.prefetchb_assoc;
|
||||
interface_ip.nbanks = cache_params.prefetchb_banks;
|
||||
interface_ip.specific_tag = tag > 0;
|
||||
interface_ip.tag_w = tag;
|
||||
|
||||
if (cache_params.cache_level == L1) {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||
} else {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||
}
|
||||
|
||||
interface_ip.access_mode = cache_params.prefetch_buff_access_mode;
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.is_cache = is_cache;
|
||||
interface_ip.pure_ram = cache_params.pure_ram;
|
||||
interface_ip.pure_cam = pure_cam;
|
||||
interface_ip.throughput = cache_params.throughput;
|
||||
interface_ip.latency = cache_params.latency;
|
||||
interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports;
|
||||
interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports;
|
||||
interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports;
|
||||
interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports;
|
||||
interface_ip.num_search_ports = cache_params.pf_buff_search_ports;
|
||||
arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer",
|
||||
cache_params.device_ty, clockRate, opt_local,
|
||||
cache_params.core_ty);
|
||||
children.push_back(arrayPtr);
|
||||
|
||||
arrayPtr->tdp_stats.reset();
|
||||
arrayPtr->tdp_stats.readAc.access = 0;
|
||||
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||
|
||||
arrayPtr->rtp_stats.reset();
|
||||
arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses;
|
||||
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||
|
||||
if (cache_params.dir_ty == SBT) {
|
||||
arrayPtr->rtp_stats.readAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
}
|
||||
|
||||
// Writeback Buffer
|
||||
if (cache_params.wbb_size > 0) {
|
||||
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||
line = cache_params.blockW;
|
||||
|
||||
interface_ip.cache_sz = cache_params.wbb_size * line;
|
||||
interface_ip.line_sz = line;
|
||||
interface_ip.assoc = cache_params.wbb_assoc;
|
||||
interface_ip.nbanks = cache_params.wbb_banks;
|
||||
interface_ip.specific_tag = tag > 0;
|
||||
interface_ip.tag_w = tag;
|
||||
|
||||
if (cache_params.cache_level == L1) {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||
} else {
|
||||
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||
}
|
||||
|
||||
interface_ip.access_mode = cache_params.writeback_buff_access_mode;
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.is_cache = is_cache;
|
||||
interface_ip.pure_ram = cache_params.pure_ram;
|
||||
interface_ip.pure_cam = pure_cam;
|
||||
interface_ip.throughput = cache_params.throughput;
|
||||
interface_ip.latency = cache_params.latency;
|
||||
interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports;
|
||||
interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports;
|
||||
interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports;
|
||||
interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports;
|
||||
interface_ip.num_search_ports = cache_params.wb_buff_search_ports;
|
||||
arrayPtr = new CacheArray(xml_data, &interface_ip,
|
||||
"Writeback Buffer",
|
||||
cache_params.device_ty, clockRate,
|
||||
opt_local, cache_params.core_ty);
|
||||
children.push_back(arrayPtr);
|
||||
|
||||
arrayPtr->tdp_stats.reset();
|
||||
arrayPtr->tdp_stats.readAc.access = 0;
|
||||
arrayPtr->tdp_stats.writeAc.access =
|
||||
arrayPtr->l_ip.num_search_ports;
|
||||
arrayPtr->tdp_stats.searchAc.access =
|
||||
arrayPtr->l_ip.num_search_ports;
|
||||
|
||||
arrayPtr->rtp_stats.reset();
|
||||
arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses;
|
||||
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||
|
||||
if (cache_params.dir_ty == SBT) {
|
||||
arrayPtr->rtp_stats.readAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
arrayPtr->rtp_stats.writeAc.access +=
|
||||
cache_stats.homenode_write_misses;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CacheUnit::computeEnergy() {
|
||||
McPATComponent::computeEnergy();
|
||||
}
|
||||
|
||||
void CacheUnit::set_cache_param_from_xml_data() {
|
||||
int level, type;
|
||||
|
||||
// Initialization... move this?
|
||||
memset(&cache_params, 0, sizeof(CacheParameters));
|
||||
memset(&cache_stats, 0, sizeof(CacheStatistics));
|
||||
|
||||
// By default, use the core clock frequency. This can be changed by
|
||||
// setting the clockrate param in the XML definition of the CacheUnit
|
||||
clockRate = target_core_clockrate;
|
||||
XMLCSTR comp_name = xml_data->getAttribute("name");
|
||||
if (comp_name) {
|
||||
name = comp_name;
|
||||
}
|
||||
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
int tech_type;
|
||||
int mat_type;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_INT_IF("level", level);
|
||||
ASSIGN_FP_IF("size", cache_params.capacity);
|
||||
ASSIGN_FP_IF("block_size", cache_params.blockW);
|
||||
ASSIGN_FP_IF("assoc", cache_params.assoc);
|
||||
ASSIGN_FP_IF("num_banks", cache_params.nbanks);
|
||||
ASSIGN_FP_IF("latency", cache_params.latency);
|
||||
ASSIGN_FP_IF("throughput", cache_params.throughput);
|
||||
ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size);
|
||||
ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size);
|
||||
ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size);
|
||||
ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size);
|
||||
ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc);
|
||||
ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc);
|
||||
ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc);
|
||||
ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc);
|
||||
ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks);
|
||||
ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks);
|
||||
ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks);
|
||||
ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks);
|
||||
ASSIGN_ENUM_IF("cache_access_mode",
|
||||
cache_params.cache_access_mode, Access_mode);
|
||||
ASSIGN_ENUM_IF("miss_buff_access_mode",
|
||||
cache_params.miss_buff_access_mode, Access_mode);
|
||||
ASSIGN_ENUM_IF("fetch_buff_access_mode",
|
||||
cache_params.fetch_buff_access_mode, Access_mode);
|
||||
ASSIGN_ENUM_IF("prefetch_buff_access_mode",
|
||||
cache_params.prefetch_buff_access_mode, Access_mode);
|
||||
ASSIGN_ENUM_IF("writeback_buff_access_mode",
|
||||
cache_params.writeback_buff_access_mode, Access_mode);
|
||||
ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports);
|
||||
ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports);
|
||||
ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports);
|
||||
ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports);
|
||||
ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports);
|
||||
ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports);
|
||||
ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports);
|
||||
ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports);
|
||||
ASSIGN_INT_IF("miss_buff_se_rd_ports" ,
|
||||
cache_params.miss_buff_se_rd_ports);
|
||||
ASSIGN_INT_IF("miss_buff_search_ports",
|
||||
cache_params.miss_buff_search_ports);
|
||||
ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports);
|
||||
ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports);
|
||||
ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports);
|
||||
ASSIGN_INT_IF("fetch_buff_se_rd_ports",
|
||||
cache_params.fetch_buff_se_rd_ports);
|
||||
ASSIGN_INT_IF("fetch_buff_search_ports",
|
||||
cache_params.fetch_buff_search_ports);
|
||||
ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports);
|
||||
ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports);
|
||||
ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports);
|
||||
ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports);
|
||||
ASSIGN_INT_IF("pf_buff_search_ports",
|
||||
cache_params.pf_buff_search_ports);
|
||||
ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports);
|
||||
ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports);
|
||||
ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports);
|
||||
ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports);
|
||||
ASSIGN_INT_IF("wb_buff_search_ports",
|
||||
cache_params.wb_buff_search_ports);
|
||||
ASSIGN_FP_IF("clockrate", cache_params.clockRate);
|
||||
ASSIGN_INT_IF("pure_ram", cache_params.pure_ram);
|
||||
ASSIGN_INT_IF("tech_type", tech_type);
|
||||
ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type);
|
||||
ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty);
|
||||
ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type);
|
||||
ASSIGN_INT_IF("num_cores", cache_params.num_cores);
|
||||
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Change from MHz to Hz
|
||||
cache_params.clockRate *= 1e6;
|
||||
if (cache_params.clockRate > 0) {
|
||||
clockRate = cache_params.clockRate;
|
||||
}
|
||||
|
||||
interface_ip.data_arr_ram_cell_tech_type = tech_type;
|
||||
interface_ip.data_arr_peri_global_tech_type = tech_type;
|
||||
interface_ip.tag_arr_ram_cell_tech_type = tech_type;
|
||||
interface_ip.tag_arr_peri_global_tech_type = tech_type;
|
||||
|
||||
interface_ip.wire_is_mat_type = mat_type;
|
||||
interface_ip.wire_os_mat_type = mat_type;
|
||||
|
||||
switch(level) {
|
||||
case 1:
|
||||
cache_params.cache_level = L1;
|
||||
break;
|
||||
case 2:
|
||||
cache_params.cache_level = L2;
|
||||
break;
|
||||
case 3:
|
||||
cache_params.cache_level = L3;
|
||||
break;
|
||||
case 4:
|
||||
cache_params.cache_level = L1Directory;
|
||||
break;
|
||||
case 5:
|
||||
cache_params.cache_level = L2Directory;
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n",
|
||||
name.c_str(), level);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cache_stats.use_detailed_stats = false;
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads);
|
||||
ASSIGN_FP_IF("num_data_array_writes",
|
||||
cache_stats.num_data_array_writes);
|
||||
ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads);
|
||||
ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes);
|
||||
ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle);
|
||||
ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses);
|
||||
ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses);
|
||||
ASSIGN_FP_IF("read_misses", cache_stats.read_misses);
|
||||
ASSIGN_FP_IF("write_misses", cache_stats.write_misses);
|
||||
ASSIGN_FP_IF("conflicts", cache_stats.conflicts);
|
||||
ASSIGN_INT_IF("homenode_read_accesses",
|
||||
cache_stats.homenode_read_accesses);
|
||||
ASSIGN_INT_IF("homenode_write_accesses",
|
||||
cache_stats.homenode_write_accesses);
|
||||
ASSIGN_INT_IF("homenode_read_misses",
|
||||
cache_stats.homenode_read_misses);
|
||||
ASSIGN_INT_IF("homenode_write_misses",
|
||||
cache_stats.homenode_write_misses);
|
||||
ASSIGN_FP_IF("homenode_access_scalar",
|
||||
cache_stats.homenode_access_scalar);
|
||||
ASSIGN_FP_IF("tdp_read_access_scalar",
|
||||
cache_stats.tdp_read_access_scalar);
|
||||
ASSIGN_FP_IF("tdp_write_access_scalar",
|
||||
cache_stats.tdp_write_access_scalar);
|
||||
ASSIGN_FP_IF("tdp_sbt_write_access_scalar",
|
||||
cache_stats.tdp_sbt_write_access_scalar);
|
||||
ASSIGN_FP_IF("dir_duty_cycle",
|
||||
cache_stats.dir_duty_cycle);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (cache_stats.num_data_array_reads > 0 ||
|
||||
cache_stats.num_data_array_writes > 0 ||
|
||||
cache_stats.num_tag_array_reads > 0 ||
|
||||
cache_stats.num_tag_array_writes > 0) {
|
||||
cache_stats.use_detailed_stats = true;
|
||||
calculate_runtime_data_and_tag = true;
|
||||
}
|
||||
}
|
167
ext/mcpat/cacheunit.h
Normal file
167
ext/mcpat/cacheunit.h
Normal file
|
@ -0,0 +1,167 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef CACHEUNIT_H_
|
||||
#define CACHEUNIT_H_
|
||||
|
||||
#include "area.h"
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "logic.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class CacheParameters {
|
||||
public:
|
||||
enum Dir_type dir_ty;
|
||||
double clockRate;
|
||||
double capacity;
|
||||
double blockW;
|
||||
double assoc;
|
||||
double nbanks;
|
||||
double throughput;
|
||||
double latency;
|
||||
int missb_size;
|
||||
int fu_size;
|
||||
int prefetchb_size;
|
||||
int wbb_size;
|
||||
int missb_assoc;
|
||||
int fu_assoc;
|
||||
int prefetchb_assoc;
|
||||
int wbb_assoc;
|
||||
int missb_banks;
|
||||
int fu_banks;
|
||||
int prefetchb_banks;
|
||||
int wbb_banks;
|
||||
enum Access_mode cache_access_mode;
|
||||
enum Access_mode miss_buff_access_mode;
|
||||
enum Access_mode fetch_buff_access_mode;
|
||||
enum Access_mode prefetch_buff_access_mode;
|
||||
enum Access_mode writeback_buff_access_mode;
|
||||
int cache_rw_ports;
|
||||
int cache_rd_ports;
|
||||
int cache_wr_ports;
|
||||
int cache_se_rd_ports;
|
||||
int cache_search_ports;
|
||||
int miss_buff_rw_ports;
|
||||
int miss_buff_rd_ports;
|
||||
int miss_buff_wr_ports;
|
||||
int miss_buff_se_rd_ports;
|
||||
int miss_buff_search_ports;
|
||||
int fetch_buff_rw_ports;
|
||||
int fetch_buff_rd_ports;
|
||||
int fetch_buff_wr_ports;
|
||||
int fetch_buff_se_rd_ports;
|
||||
int fetch_buff_search_ports;
|
||||
int pf_buff_rw_ports;
|
||||
int pf_buff_rd_ports;
|
||||
int pf_buff_wr_ports;
|
||||
int pf_buff_se_rd_ports;
|
||||
int pf_buff_search_ports;
|
||||
int wb_buff_rw_ports;
|
||||
int wb_buff_rd_ports;
|
||||
int wb_buff_wr_ports;
|
||||
int wb_buff_se_rd_ports;
|
||||
int wb_buff_search_ports;
|
||||
bool pure_ram;
|
||||
enum CacheLevel cache_level;
|
||||
enum Device_ty device_ty;
|
||||
enum Core_type core_ty;
|
||||
int num_cores;
|
||||
};
|
||||
|
||||
class CacheStatistics {
|
||||
public:
|
||||
// Duty cycle is used for estimating TDP. It should reflect the highest
|
||||
// sustainable rate of access to the cache unit in execution of a benchmark
|
||||
// Default should be 1.0: one access per cycle
|
||||
double duty_cycle;
|
||||
// This duty cycle is only used for SBT directory types
|
||||
double dir_duty_cycle;
|
||||
// The following two stats are also used for estimating TDP.
|
||||
double tdp_read_access_scalar;
|
||||
double tdp_write_access_scalar;
|
||||
// There are 2 ways to calculate dynamic power from activity statistics:
|
||||
// Default is false
|
||||
bool use_detailed_stats;
|
||||
// 1) Count the number and type of accesses to each cache array
|
||||
// splitting data and tag arrays (use_detailed_stats = true).
|
||||
// These are extremely detailed statistics.
|
||||
// read_misses and write_misses are still required for this method for
|
||||
// various buffers associated with this cache.
|
||||
double num_data_array_reads;
|
||||
double num_data_array_writes;
|
||||
double num_tag_array_reads;
|
||||
double num_tag_array_writes;
|
||||
// 2) Count the number and type of access to the cache unit and
|
||||
// use them to extrapolate the number of accesses to the other
|
||||
// subcomponents (cache arrays and buffers)
|
||||
double read_accesses;
|
||||
double write_accesses;
|
||||
double read_misses;
|
||||
double write_misses;
|
||||
double conflicts;
|
||||
// The following is only used for SBT directory types
|
||||
int homenode_read_accesses;
|
||||
int homenode_write_accesses;
|
||||
int homenode_read_misses;
|
||||
int homenode_write_misses;
|
||||
double homenode_access_scalar;
|
||||
double tdp_sbt_write_access_scalar;
|
||||
};
|
||||
|
||||
class CacheUnit : public McPATComponent {
|
||||
public:
|
||||
static bool is_cache;
|
||||
static bool pure_cam;
|
||||
// This is used for CacheArray objects
|
||||
static bool opt_local;
|
||||
static bool force_cache_config;
|
||||
|
||||
int ithCache;
|
||||
CacheParameters cache_params;
|
||||
CacheStatistics cache_stats;
|
||||
Cache_type cacheType;
|
||||
bool calculate_runtime_data_and_tag;
|
||||
double dir_overhead;
|
||||
|
||||
double scktRatio;
|
||||
|
||||
// TODO: REMOVE _interface_ip... It promotes a mess. Find a better way...
|
||||
CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||
void set_cache_param_from_xml_data();
|
||||
void computeEnergy();
|
||||
~CacheUnit() {};
|
||||
};
|
||||
|
||||
#endif /* CACHEUNIT_H_ */
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -54,8 +55,7 @@ using namespace std;
|
|||
const uint32_t nthreads = NTHREADS;
|
||||
|
||||
|
||||
void min_values_t::update_min_values(const min_values_t * val)
|
||||
{
|
||||
void min_values_t::update_min_values(const min_values_t * val) {
|
||||
min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
|
||||
min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
|
||||
min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
|
||||
|
@ -65,8 +65,7 @@ void min_values_t::update_min_values(const min_values_t * val)
|
|||
|
||||
|
||||
|
||||
void min_values_t::update_min_values(const uca_org_t & res)
|
||||
{
|
||||
void min_values_t::update_min_values(const uca_org_t & res) {
|
||||
min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
|
||||
min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
|
||||
min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
|
||||
|
@ -74,8 +73,7 @@ void min_values_t::update_min_values(const uca_org_t & res)
|
|||
min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
|
||||
}
|
||||
|
||||
void min_values_t::update_min_values(const nuca_org_t * res)
|
||||
{
|
||||
void min_values_t::update_min_values(const nuca_org_t * res) {
|
||||
min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
|
||||
min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
|
||||
min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
|
||||
|
@ -83,8 +81,7 @@ void min_values_t::update_min_values(const nuca_org_t * res)
|
|||
min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
|
||||
}
|
||||
|
||||
void min_values_t::update_min_values(const mem_array * res)
|
||||
{
|
||||
void min_values_t::update_min_values(const mem_array * res) {
|
||||
min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
|
||||
min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
|
||||
min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
|
||||
|
@ -94,8 +91,7 @@ void min_values_t::update_min_values(const mem_array * res)
|
|||
|
||||
|
||||
|
||||
void * calc_time_mt_wrapper(void * void_obj)
|
||||
{
|
||||
void * calc_time_mt_wrapper(void * void_obj) {
|
||||
calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
|
||||
uint32_t tid = calc_obj->tid;
|
||||
list<mem_array *> & data_arr = calc_obj->data_arr;
|
||||
|
@ -126,34 +122,28 @@ void * calc_time_mt_wrapper(void * void_obj)
|
|||
if (g_ip->wt == 0) {
|
||||
wt_min = Low_swing;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing - 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
|
||||
for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2)
|
||||
{
|
||||
for (int wr = wt_min; wr <= wt_max; wr++)
|
||||
{
|
||||
for (uint32_t iter = tid; iter < niter; iter += nthreads)
|
||||
{
|
||||
for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) {
|
||||
for (int wr = wt_min; wr <= wt_max; wr++) {
|
||||
for (uint32_t iter = tid; iter < niter; iter += nthreads) {
|
||||
// reconstruct Ndwl, Ndbl, Ndcm
|
||||
unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
|
||||
unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter);
|
||||
unsigned int Ndcm = 1 << (iter % Ndcm_niter);
|
||||
for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2)
|
||||
{
|
||||
for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2)
|
||||
{
|
||||
for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX;
|
||||
Ndsam_lev_1 *= 2) {
|
||||
for (unsigned int Ndsam_lev_2 = 1;
|
||||
Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) {
|
||||
//for debuging
|
||||
if (g_ip->force_cache_config && is_tag == false)
|
||||
{
|
||||
if (g_ip->force_cache_config && is_tag == false) {
|
||||
wr = g_ip->wt;
|
||||
Ndwl = g_ip->ndwl;
|
||||
Ndbl = g_ip->ndbl;
|
||||
|
@ -167,8 +157,7 @@ void * calc_time_mt_wrapper(void * void_obj)
|
|||
}
|
||||
}
|
||||
|
||||
if (is_tag == true)
|
||||
{
|
||||
if (is_tag == true) {
|
||||
is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
|
||||
Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
|
||||
tag_arr.back(), 0, NULL, NULL,
|
||||
|
@ -176,32 +165,27 @@ void * calc_time_mt_wrapper(void * void_obj)
|
|||
}
|
||||
// If it's a fully-associative cache, the data array partition parameters are identical to that of
|
||||
// the tag array, so compute data array partition properties also here.
|
||||
if (is_tag == false || g_ip->fully_assoc)
|
||||
{
|
||||
if (is_tag == false || g_ip->fully_assoc) {
|
||||
is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
|
||||
Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
|
||||
data_arr.back(), 0, NULL, NULL,
|
||||
is_main_mem);
|
||||
}
|
||||
|
||||
if (is_valid_partition)
|
||||
{
|
||||
if (is_tag == true)
|
||||
{
|
||||
if (is_valid_partition) {
|
||||
if (is_tag == true) {
|
||||
tag_arr.back()->wt = (enum Wire_type) wr;
|
||||
tag_res->update_min_values(tag_arr.back());
|
||||
tag_arr.push_back(new mem_array);
|
||||
}
|
||||
if (is_tag == false || g_ip->fully_assoc)
|
||||
{
|
||||
if (is_tag == false || g_ip->fully_assoc) {
|
||||
data_arr.back()->wt = (enum Wire_type) wr;
|
||||
data_res->update_min_values(data_arr.back());
|
||||
data_arr.push_back(new mem_array);
|
||||
}
|
||||
}
|
||||
|
||||
if (g_ip->force_cache_config && is_tag == false)
|
||||
{
|
||||
if (g_ip->force_cache_config && is_tag == false) {
|
||||
wr = wt_max;
|
||||
iter = niter;
|
||||
if (g_ip->nspd != 0) {
|
||||
|
@ -223,7 +207,11 @@ void * calc_time_mt_wrapper(void * void_obj)
|
|||
data_arr.pop_back();
|
||||
tag_arr.pop_back();
|
||||
|
||||
#ifndef DEBUG
|
||||
pthread_exit(NULL);
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -242,23 +230,20 @@ bool calculate_time(
|
|||
int flag_results_populate,
|
||||
results_mem_array *ptr_results,
|
||||
uca_org_t *ptr_fin_res,
|
||||
bool is_main_mem)
|
||||
{
|
||||
bool is_main_mem) {
|
||||
DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
|
||||
|
||||
if (dyn_p.is_valid == false)
|
||||
{
|
||||
if (dyn_p.is_valid == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
UCA * uca = new UCA(dyn_p);
|
||||
|
||||
|
||||
if (flag_results_populate)
|
||||
{ //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables
|
||||
}
|
||||
else
|
||||
{
|
||||
//For the final solution, populate the ptr_results data structure
|
||||
//-- TODO: copy only necessary variables
|
||||
if (flag_results_populate) {
|
||||
} else {
|
||||
int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
|
||||
int num_mats = uca->bank.dp.num_mats;
|
||||
bool is_fa = uca->bank.dp.fully_assoc;
|
||||
|
@ -271,7 +256,8 @@ bool calculate_time(
|
|||
ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
|
||||
ptr_array->access_time = uca->access_time;
|
||||
ptr_array->cycle_time = uca->cycle_time;
|
||||
ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time;
|
||||
ptr_array->multisubbank_interleave_cycle_time =
|
||||
uca->multisubbank_interleave_cycle_time;
|
||||
ptr_array->area_ram_cells = uca->area_all_dataramcells;
|
||||
ptr_array->area = uca->area.get_area();
|
||||
ptr_array->height = uca->area.h;
|
||||
|
@ -284,89 +270,135 @@ bool calculate_time(
|
|||
ptr_array->delay_senseamp_mux_decoder =
|
||||
MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
|
||||
uca->delay_array_to_sa_mux_lev_2_decoder);
|
||||
ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver;
|
||||
ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out;
|
||||
ptr_array->delay_before_subarray_output_driver =
|
||||
uca->delay_before_subarray_output_driver;
|
||||
ptr_array->delay_from_subarray_output_driver_to_output =
|
||||
uca->delay_from_subarray_out_drv_to_out;
|
||||
|
||||
ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
|
||||
ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
|
||||
ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay;
|
||||
ptr_array->delay_row_predecode_driver_and_block =
|
||||
uca->bank.mat.r_predec->delay;
|
||||
ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
|
||||
ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
|
||||
ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
|
||||
ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
|
||||
ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree;
|
||||
ptr_array->delay_subarray_output_driver =
|
||||
uca->bank.mat.delay_subarray_out_drv_htree;
|
||||
ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
|
||||
ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
|
||||
|
||||
ptr_array->all_banks_height = uca->area.h;
|
||||
ptr_array->all_banks_width = uca->area.w;
|
||||
ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area());
|
||||
ptr_array->area_efficiency = uca->area_all_dataramcells * 100 /
|
||||
(uca->area.get_area());
|
||||
|
||||
ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
|
||||
ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
|
||||
ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
|
||||
// cout<<"power_data_input_htree"<<uca->bank.htree_in_data->power.readOp.leakage<<endl;
|
||||
ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
|
||||
// cout<<"power_data_output_htree"<<uca->bank.htree_out_data->power.readOp.leakage<<endl;
|
||||
ptr_array->power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power;
|
||||
ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power;
|
||||
ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_drivers =
|
||||
uca->bank.mat.r_predec->driver_power;
|
||||
ptr_array->power_row_predecoder_drivers.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_drivers.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_drivers.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_row_predecoder_blocks =
|
||||
uca->bank.mat.r_predec->block_power;
|
||||
ptr_array->power_row_predecoder_blocks.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_blocks.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_row_predecoder_blocks.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
|
||||
ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_drivers =
|
||||
uca->bank.mat.b_mux_predec->driver_power;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_blocks =
|
||||
uca->bank.mat.b_mux_predec->block_power;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
|
||||
ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_decoders.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_bit_mux_decoders.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers =
|
||||
uca->bank.mat.sa_mux_lev_1_predec->driver_power;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks =
|
||||
uca->bank.mat.sa_mux_lev_1_predec->block_power;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders =
|
||||
uca->bank.mat.power_sa_mux_lev_1_decoders;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers =
|
||||
uca->bank.mat.sa_mux_lev_2_predec->driver_power;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks =
|
||||
uca->bank.mat.sa_mux_lev_2_predec->block_power;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders =
|
||||
uca->bank.mat.power_sa_mux_lev_2_decoders;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_bitlines = uca->bank.mat.power_bitline;
|
||||
ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
|
@ -378,39 +410,46 @@ bool calculate_time(
|
|||
ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv;
|
||||
ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_prechg_eq_drivers =
|
||||
uca->bank.mat.power_bl_precharge_eq_drv;
|
||||
ptr_array->power_prechg_eq_drivers.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_prechg_eq_drivers.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_prechg_eq_drivers.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv;
|
||||
ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_output_drivers_at_subarray =
|
||||
uca->bank.mat.power_subarray_out_drv;
|
||||
ptr_array->power_output_drivers_at_subarray.readOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *=
|
||||
num_act_mats_hor_dir;
|
||||
|
||||
ptr_array->power_comparators = uca->bank.mat.power_comparator;
|
||||
ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
|
||||
ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
|
||||
|
||||
// cout << " num of mats: " << dyn_p.num_mats << endl;
|
||||
if (is_fa || pure_cam)
|
||||
{
|
||||
ptr_array->power_htree_in_search = uca->bank.htree_in_search->power;
|
||||
// cout<<"power_htree_in_search"<<uca->bank.htree_in_search->power.readOp.leakage<<endl;
|
||||
ptr_array->power_htree_out_search = uca->bank.htree_out_search->power;
|
||||
// cout<<"power_htree_out_search"<<uca->bank.htree_out_search->power.readOp.leakage<<endl;
|
||||
if (is_fa || pure_cam) {
|
||||
ptr_array->power_htree_in_search =
|
||||
uca->bank.htree_in_search->power;
|
||||
ptr_array->power_htree_out_search =
|
||||
uca->bank.htree_out_search->power;
|
||||
ptr_array->power_searchline = uca->bank.mat.power_searchline;
|
||||
// cout<<"power_searchlineh"<<uca->bank.mat.power_searchline.readOp.leakage<<endl;
|
||||
ptr_array->power_searchline.searchOp.dynamic *= num_mats;
|
||||
ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge;
|
||||
ptr_array->power_searchline_precharge =
|
||||
uca->bank.mat.power_searchline_precharge;
|
||||
ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
|
||||
ptr_array->power_matchlines = uca->bank.mat.power_matchline;
|
||||
ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
|
||||
ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge;
|
||||
ptr_array->power_matchline_precharge =
|
||||
uca->bank.mat.power_matchline_precharge;
|
||||
ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
|
||||
ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv;
|
||||
// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.power_matchline.searchOp.leakage<<endl;
|
||||
ptr_array->power_matchline_to_wordline_drv =
|
||||
uca->bank.mat.power_ml_to_ram_wl_drv;
|
||||
}
|
||||
|
||||
ptr_array->activate_energy = uca->activate_energy;
|
||||
|
@ -418,25 +457,14 @@ bool calculate_time(
|
|||
ptr_array->write_energy = uca->write_energy;
|
||||
ptr_array->precharge_energy = uca->precharge_energy;
|
||||
ptr_array->refresh_power = uca->refresh_power;
|
||||
ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page;
|
||||
ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page;
|
||||
ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks;
|
||||
ptr_array->leak_power_subbank_closed_page =
|
||||
uca->leak_power_subbank_closed_page;
|
||||
ptr_array->leak_power_subbank_open_page =
|
||||
uca->leak_power_subbank_open_page;
|
||||
ptr_array->leak_power_request_and_reply_networks =
|
||||
uca->leak_power_request_and_reply_networks;
|
||||
|
||||
ptr_array->precharge_delay = uca->precharge_delay;
|
||||
|
||||
|
||||
// cout<<"power_matchline.searchOp.leakage"<<uca->bank.mat.<<endl;
|
||||
//
|
||||
// if (!(is_fa || pure_cam))
|
||||
// {
|
||||
// cout << " num of cols: " << dyn_p.num_c_subarray << endl;
|
||||
// }
|
||||
// else if (is_fa)
|
||||
// {
|
||||
// cout << " num of cols: " << dyn_p.tag_num_c_subarray+ dyn_p.data_num_c_subarray<< endl;
|
||||
// } else
|
||||
// cout << " num of cols: " << dyn_p.tag_num_c_subarray<< endl;
|
||||
// cout << uca->bank.mat.subarray.get_total_cell_area()<<endl;
|
||||
}
|
||||
|
||||
|
||||
|
@ -446,16 +474,17 @@ bool calculate_time(
|
|||
|
||||
|
||||
|
||||
bool check_uca_org(uca_org_t & u, min_values_t *minval)
|
||||
{
|
||||
if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
|
||||
bool check_uca_org(uca_org_t & u, min_values_t *minval) {
|
||||
if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
|
||||
g_ip->delay_dev) {
|
||||
return false;
|
||||
}
|
||||
if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
|
||||
g_ip->dynamic_power_dev) {
|
||||
return false;
|
||||
}
|
||||
if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
|
||||
if (((u.power.readOp.leakage - minval->min_leakage) /
|
||||
minval->min_leakage) * 100 >
|
||||
g_ip->leakage_power_dev) {
|
||||
return false;
|
||||
}
|
||||
|
@ -470,16 +499,17 @@ bool check_uca_org(uca_org_t & u, min_values_t *minval)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool check_mem_org(mem_array & u, const min_values_t *minval)
|
||||
{
|
||||
if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) {
|
||||
bool check_mem_org(mem_array & u, const min_values_t *minval) {
|
||||
if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
|
||||
g_ip->delay_dev) {
|
||||
return false;
|
||||
}
|
||||
if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
|
||||
g_ip->dynamic_power_dev) {
|
||||
return false;
|
||||
}
|
||||
if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
|
||||
if (((u.power.readOp.leakage - minval->min_leakage) /
|
||||
minval->min_leakage) * 100 >
|
||||
g_ip->leakage_power_dev) {
|
||||
return false;
|
||||
}
|
||||
|
@ -497,8 +527,8 @@ bool check_mem_org(mem_array & u, const min_values_t *minval)
|
|||
|
||||
|
||||
|
||||
void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & ulist)
|
||||
{
|
||||
void find_optimal_uca(uca_org_t *res, min_values_t * minval,
|
||||
list<uca_org_t> & ulist) {
|
||||
double cost = 0;
|
||||
double min_cost = BIGNUM;
|
||||
float d, a, dp, lp, c;
|
||||
|
@ -509,51 +539,42 @@ void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & u
|
|||
d = g_ip->delay_wt;
|
||||
c = g_ip->cycle_time_wt;
|
||||
|
||||
if (ulist.empty() == true)
|
||||
{
|
||||
if (ulist.empty() == true) {
|
||||
cout << "ERROR: no valid cache organizations found" << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end(); niter++)
|
||||
{
|
||||
if (g_ip->ed == 1)
|
||||
{
|
||||
cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn);
|
||||
if (min_cost > cost)
|
||||
{
|
||||
for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end();
|
||||
niter++) {
|
||||
if (g_ip->ed == 1) {
|
||||
cost = ((niter)->access_time / minval->min_delay) *
|
||||
((niter)->power.readOp.dynamic / minval->min_dyn);
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
*res = (*(niter));
|
||||
}
|
||||
}
|
||||
else if (g_ip->ed == 2)
|
||||
{
|
||||
} else if (g_ip->ed == 2) {
|
||||
cost = ((niter)->access_time / minval->min_delay) *
|
||||
((niter)->access_time / minval->min_delay) *
|
||||
((niter)->power.readOp.dynamic / minval->min_dyn);
|
||||
if (min_cost > cost)
|
||||
{
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
*res = (*(niter));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
/*
|
||||
* check whether the current organization
|
||||
* meets the input deviation constraints
|
||||
*/
|
||||
bool v = check_uca_org(*niter, minval);
|
||||
//if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
|
||||
|
||||
if (v)
|
||||
{
|
||||
if (v) {
|
||||
cost = (d * ((niter)->access_time / minval->min_delay) +
|
||||
c * ((niter)->cycle_time / minval->min_cyc) +
|
||||
dp * ((niter)->power.readOp.dynamic / minval->min_dyn) +
|
||||
lp * ((niter)->power.readOp.leakage/minval->min_leakage) +
|
||||
lp *
|
||||
((niter)->power.readOp.leakage / minval->min_leakage) +
|
||||
a * ((niter)->area / minval->min_area));
|
||||
//fprintf(stderr, "cost = %g\n", cost);
|
||||
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
|
@ -562,8 +583,7 @@ void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & u
|
|||
if (niter != ulist.begin())
|
||||
niter--;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
niter = ulist.erase(niter);
|
||||
if (niter != ulist.begin())
|
||||
niter--;
|
||||
|
@ -571,61 +591,56 @@ void find_optimal_uca(uca_org_t *res, min_values_t * minval, list<uca_org_t> & u
|
|||
}
|
||||
}
|
||||
|
||||
if (min_cost == BIGNUM)
|
||||
{
|
||||
cout << "ERROR: no cache organizations met optimization criteria" << endl;
|
||||
if (min_cost == BIGNUM) {
|
||||
cout << "ERROR: no cache organizations met optimization criteria"
|
||||
<< endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
|
||||
{
|
||||
void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) {
|
||||
double cost = BIGNUM;
|
||||
double cur_cost;
|
||||
double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt;
|
||||
double wt_delay = g_ip->delay_wt;
|
||||
double wt_dyn = g_ip->dynamic_power_wt;
|
||||
double wt_leakage = g_ip->leakage_power_wt;
|
||||
double wt_cyc = g_ip->cycle_time_wt;
|
||||
double wt_area = g_ip->area_wt;
|
||||
mem_array * res = NULL;
|
||||
|
||||
if (list.empty() == true)
|
||||
{
|
||||
if (list.empty() == true) {
|
||||
cout << "ERROR: no valid tag organizations found" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
while (list.empty() != true)
|
||||
{
|
||||
while (list.empty() != true) {
|
||||
bool v = check_mem_org(*list.back(), min);
|
||||
if (v)
|
||||
{
|
||||
if (v) {
|
||||
cur_cost = wt_delay * (list.back()->access_time / min->min_delay) +
|
||||
wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) +
|
||||
wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) +
|
||||
wt_dyn * (list.back()->power.readOp.dynamic /
|
||||
min->min_dyn) +
|
||||
wt_leakage * (list.back()->power.readOp.leakage /
|
||||
min->min_leakage) +
|
||||
wt_area * (list.back()->area / min->min_area) +
|
||||
wt_cyc * (list.back()->cycle_time / min->min_cyc);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
cur_cost = BIGNUM;
|
||||
}
|
||||
if (cur_cost < cost)
|
||||
{
|
||||
if (res != NULL)
|
||||
{
|
||||
if (cur_cost < cost) {
|
||||
if (res != NULL) {
|
||||
delete res;
|
||||
}
|
||||
cost = cur_cost;
|
||||
res = list.back();
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
delete list.back();
|
||||
}
|
||||
list.pop_back();
|
||||
}
|
||||
if(!res)
|
||||
{
|
||||
if (!res) {
|
||||
cout << "ERROR: no valid tag organizations found" << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
@ -635,25 +650,23 @@ void filter_tag_arr(const min_values_t * min, list<mem_array *> & list)
|
|||
|
||||
|
||||
|
||||
void filter_data_arr(list<mem_array *> & curr_list)
|
||||
{
|
||||
if (curr_list.empty() == true)
|
||||
{
|
||||
void filter_data_arr(list<mem_array *> & curr_list) {
|
||||
if (curr_list.empty() == true) {
|
||||
cout << "ERROR: no valid data array organizations found" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
list<mem_array *>::iterator iter;
|
||||
|
||||
for (iter = curr_list.begin(); iter != curr_list.end(); ++iter)
|
||||
{
|
||||
for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) {
|
||||
mem_array * m = *iter;
|
||||
|
||||
if (m == NULL) exit(1);
|
||||
|
||||
if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) &&
|
||||
((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5))
|
||||
{
|
||||
if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay >
|
||||
0.5) &&
|
||||
((m->power.readOp.dynamic - m->arr_min->min_dyn) /
|
||||
m->arr_min->min_dyn > 0.5)) {
|
||||
delete m;
|
||||
iter = curr_list.erase(iter);
|
||||
iter --;
|
||||
|
@ -675,8 +688,7 @@ void filter_data_arr(list<mem_array *> & curr_list)
|
|||
* above results
|
||||
* 4. Cache model with least cost is picked from sol_list
|
||||
*/
|
||||
void solve(uca_org_t *fin_res)
|
||||
{
|
||||
void solve(uca_org_t *fin_res) {
|
||||
bool is_dram = false;
|
||||
int pure_ram = g_ip->pure_ram;
|
||||
bool pure_cam = g_ip->pure_cam;
|
||||
|
@ -699,11 +711,11 @@ void solve(uca_org_t *fin_res)
|
|||
|
||||
|
||||
// distribute calculate_time() execution to multiple threads
|
||||
calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads];
|
||||
calc_time_mt_wrapper_struct * calc_array =
|
||||
new calc_time_mt_wrapper_struct[nthreads];
|
||||
pthread_t threads[nthreads];
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
calc_array[t].tid = t;
|
||||
calc_array[t].pure_ram = pure_ram;
|
||||
calc_array[t].pure_cam = pure_cam;
|
||||
|
@ -715,28 +727,32 @@ void solve(uca_org_t *fin_res)
|
|||
uint32_t ram_cell_tech_type;
|
||||
|
||||
// If it's a cache, first calculate the area, delay and power for all tag array partitions.
|
||||
if (!(pure_ram||pure_cam||g_ip->fully_assoc))
|
||||
{ //cache
|
||||
if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache
|
||||
is_tag = true;
|
||||
ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
|
||||
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
|
||||
is_dram = ((ram_cell_tech_type == lp_dram) ||
|
||||
(ram_cell_tech_type == comm_dram));
|
||||
init_tech_params(g_ip->F_sz_um, is_tag);
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
calc_array[t].is_tag = is_tag;
|
||||
calc_array[t].is_main_mem = false;
|
||||
calc_array[t].Nspd_min = 0.125;
|
||||
pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
|
||||
#ifndef DEBUG
|
||||
pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
|
||||
(void *)(&(calc_array[t])));
|
||||
#else
|
||||
calc_time_mt_wrapper((void *)(&(calc_array[t])));
|
||||
#endif
|
||||
}
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
#ifndef DEBUG
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
pthread_join(threads[t], NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
calc_array[t].data_arr.sort(mem_array::lt);
|
||||
data_arr.merge(calc_array[t].data_arr, mem_array::lt);
|
||||
calc_array[t].tag_arr.sort(mem_array::lt);
|
||||
|
@ -746,72 +762,66 @@ void solve(uca_org_t *fin_res)
|
|||
|
||||
|
||||
// calculate the area, delay and power for all data array partitions (for cache or plain RAM).
|
||||
// if (!g_ip->fully_assoc)
|
||||
// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion
|
||||
// in the new cacti, cam, fully_associative cache are processed as single array in the data portion
|
||||
is_tag = false;
|
||||
ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
|
||||
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
|
||||
init_tech_params(g_ip->F_sz_um, is_tag);
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
calc_array[t].is_tag = is_tag;
|
||||
calc_array[t].is_main_mem = g_ip->is_main_mem;
|
||||
if (!(pure_cam||g_ip->fully_assoc))
|
||||
{
|
||||
calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!(pure_cam || g_ip->fully_assoc)) {
|
||||
calc_array[t].Nspd_min = (double)(g_ip->out_w) /
|
||||
(double)(g_ip->block_sz * 8);
|
||||
} else {
|
||||
calc_array[t].Nspd_min = 1;
|
||||
}
|
||||
|
||||
pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t])));
|
||||
#ifndef DEBUG
|
||||
pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
|
||||
(void *)(&(calc_array[t])));
|
||||
#else
|
||||
calc_time_mt_wrapper((void *)(&(calc_array[t])));
|
||||
#endif
|
||||
}
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
#ifndef DEBUG
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
pthread_join(threads[t], NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
data_arr.clear();
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
calc_array[t].data_arr.sort(mem_array::lt);
|
||||
data_arr.merge(calc_array[t].data_arr, mem_array::lt);
|
||||
|
||||
|
||||
}
|
||||
// }
|
||||
|
||||
|
||||
|
||||
min_values_t * d_min = new min_values_t();
|
||||
min_values_t * t_min = new min_values_t();
|
||||
min_values_t * cache_min = new min_values_t();
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
d_min->update_min_values(calc_array[t].data_res);
|
||||
t_min->update_min_values(calc_array[t].tag_res);
|
||||
}
|
||||
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
|
||||
{
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
||||
(*miter)->arr_min = d_min;
|
||||
}
|
||||
|
||||
|
||||
//cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n";
|
||||
filter_data_arr(data_arr);
|
||||
if(!(pure_ram||pure_cam||g_ip->fully_assoc))
|
||||
{
|
||||
if (!(pure_ram || pure_cam || g_ip->fully_assoc)) {
|
||||
filter_tag_arr(t_min, tag_arr);
|
||||
}
|
||||
//cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n";
|
||||
|
||||
|
||||
if (pure_ram||pure_cam||g_ip->fully_assoc)
|
||||
{
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
|
||||
{
|
||||
if (pure_ram || pure_cam || g_ip->fully_assoc) {
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
||||
uca_org_t & curr_org = sol_list.back();
|
||||
curr_org.tag_array2 = NULL;
|
||||
curr_org.data_array2 = (*miter);
|
||||
|
@ -826,17 +836,12 @@ void solve(uca_org_t *fin_res)
|
|||
|
||||
sol_list.push_back(uca_org_t());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (tag_arr.empty() != true)
|
||||
{
|
||||
} else {
|
||||
while (tag_arr.empty() != true) {
|
||||
mem_array * arr_temp = (tag_arr.back());
|
||||
//delete tag_arr.back();
|
||||
tag_arr.pop_back();
|
||||
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++)
|
||||
{
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
||||
uca_org_t & curr_org = sol_list.back();
|
||||
curr_org.tag_array2 = arr_temp;
|
||||
curr_org.data_array2 = (*miter);
|
||||
|
@ -860,17 +865,14 @@ void solve(uca_org_t *fin_res)
|
|||
|
||||
sol_list.clear();
|
||||
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); ++miter)
|
||||
{
|
||||
if (*miter != fin_res->data_array2)
|
||||
{
|
||||
for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) {
|
||||
if (*miter != fin_res->data_array2) {
|
||||
delete *miter;
|
||||
}
|
||||
}
|
||||
data_arr.clear();
|
||||
|
||||
for (uint32_t t = 0; t < nthreads; t++)
|
||||
{
|
||||
for (uint32_t t = 0; t < nthreads; t++) {
|
||||
delete calc_array[t].data_res;
|
||||
delete calc_array[t].tag_res;
|
||||
}
|
||||
|
@ -886,7 +888,14 @@ void update(uca_org_t *fin_res)
|
|||
if(fin_res->tag_array2)
|
||||
{
|
||||
init_tech_params(g_ip->F_sz_um,true);
|
||||
DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem);
|
||||
DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam,
|
||||
fin_res->tag_array2->Nspd,
|
||||
fin_res->tag_array2->Ndwl,
|
||||
fin_res->tag_array2->Ndbl,
|
||||
fin_res->tag_array2->Ndcm,
|
||||
fin_res->tag_array2->Ndsam_lev_1,
|
||||
fin_res->tag_array2->Ndsam_lev_2,
|
||||
g_ip->is_main_mem);
|
||||
if(tag_arr_dyn_p.is_valid)
|
||||
{
|
||||
UCA * tag_arr = new UCA(tag_arr_dyn_p);
|
||||
|
@ -894,12 +903,20 @@ void update(uca_org_t *fin_res)
|
|||
}
|
||||
else
|
||||
{
|
||||
cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
|
||||
cout << "ERROR: Cannot retrieve array structure for leakage feedback"
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
init_tech_params(g_ip->F_sz_um,false);
|
||||
DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem);
|
||||
DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam,
|
||||
fin_res->data_array2->Nspd,
|
||||
fin_res->data_array2->Ndwl,
|
||||
fin_res->data_array2->Ndbl,
|
||||
fin_res->data_array2->Ndcm,
|
||||
fin_res->data_array2->Ndsam_lev_1,
|
||||
fin_res->data_array2->Ndsam_lev_2,
|
||||
g_ip->is_main_mem);
|
||||
if(data_arr_dyn_p.is_valid)
|
||||
{
|
||||
UCA * data_arr = new UCA(data_arr_dyn_p);
|
||||
|
@ -907,7 +924,8 @@ void update(uca_org_t *fin_res)
|
|||
}
|
||||
else
|
||||
{
|
||||
cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl;
|
||||
cout << "ERROR: Cannot retrieve array structure for leakage feedback"
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -39,8 +40,7 @@
|
|||
#include "nuca.h"
|
||||
#include "router.h"
|
||||
|
||||
class min_values_t
|
||||
{
|
||||
class min_values_t {
|
||||
public:
|
||||
double min_delay;
|
||||
double min_dyn;
|
||||
|
@ -58,8 +58,7 @@ class min_values_t
|
|||
|
||||
|
||||
|
||||
struct solution
|
||||
{
|
||||
struct solution {
|
||||
int tag_array_index;
|
||||
int data_array_index;
|
||||
list<mem_array *>::iterator tag_array_iter;
|
||||
|
@ -94,8 +93,7 @@ void solve(uca_org_t *fin_res);
|
|||
void init_tech_params(double tech, bool is_tag);
|
||||
|
||||
|
||||
struct calc_time_mt_wrapper_struct
|
||||
{
|
||||
struct calc_time_mt_wrapper_struct {
|
||||
uint32_t tid;
|
||||
bool is_tag;
|
||||
bool pure_ram;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -37,8 +38,7 @@ Arbiter::Arbiter(
|
|||
double output_len,
|
||||
TechnologyParameter::DeviceType *dt
|
||||
): R(n_req), flit_size(flit_size_),
|
||||
o_len (output_len), deviceType(dt)
|
||||
{
|
||||
o_len (output_len), deviceType(dt) {
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||
Vdd = dt->Vdd;
|
||||
double technology = g_ip->F_sz_um;
|
||||
|
@ -56,45 +56,59 @@ Arbiter::~Arbiter(){}
|
|||
|
||||
double
|
||||
Arbiter::arb_req() {
|
||||
double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
|
||||
double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 *
|
||||
gate_C(NTn2, 0) +
|
||||
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
|
||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
|
||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::arb_pri() {
|
||||
double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
|
||||
of flip-flop is ignored */
|
||||
/* switching capacitance of flip-flop is ignored */
|
||||
double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0));
|
||||
return temp;
|
||||
}
|
||||
|
||||
|
||||
double
|
||||
Arbiter::arb_grant() {
|
||||
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
|
||||
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
|
||||
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
|
||||
return temp;
|
||||
}
|
||||
|
||||
double
|
||||
Arbiter::arb_int() {
|
||||
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
|
||||
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
|
||||
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
|
||||
2 * gate_C(NTn2, 0) + gate_C(PTn2, 0));
|
||||
return temp;
|
||||
}
|
||||
|
||||
void
|
||||
Arbiter::compute_power() {
|
||||
power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
|
||||
arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
|
||||
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
||||
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
||||
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
||||
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
||||
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
||||
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
||||
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
|
||||
power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
|
||||
power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() *
|
||||
Vdd * Vdd / 2 +
|
||||
arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd *
|
||||
Vdd);
|
||||
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
|
||||
min_w_pmos * PTn1 * 2, 2, nor);
|
||||
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R,
|
||||
min_w_pmos * PTn2 * R, 2, nor);
|
||||
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi,
|
||||
min_w_pmos * PTi, 1, inv);
|
||||
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
|
||||
min_w_pmos * PTn1 * 2, 2, nor);
|
||||
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R,
|
||||
min_w_pmos * PTn2 * R, 2, nor);
|
||||
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi,
|
||||
min_w_pmos * PTi, 1, inv);
|
||||
//FIXME include priority table leakage
|
||||
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd;
|
||||
power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd +
|
||||
not_leak_gate * Vdd;
|
||||
}
|
||||
|
||||
double //wire cap with triple spacing
|
||||
|
@ -119,8 +133,7 @@ Arbiter::transmission_buf_ctrcap() {
|
|||
}
|
||||
|
||||
|
||||
void Arbiter::print_arbiter()
|
||||
{
|
||||
void Arbiter::print_arbiter() {
|
||||
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
|
||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
|
||||
|
|
108
ext/mcpat/cacti/bank.cc
Executable file → Normal file
108
ext/mcpat/cacti/bank.cc
Executable file → Normal file
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -38,74 +39,91 @@
|
|||
Bank::Bank(const DynamicParameter & dyn_p):
|
||||
dp(dyn_p), mat(dp),
|
||||
num_addr_b_mat(dyn_p.number_addr_bits_mat),
|
||||
num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
|
||||
{
|
||||
num_mats_hor_dir(dyn_p.num_mats_h_dir),
|
||||
num_mats_ver_dir(dyn_p.num_mats_v_dir) {
|
||||
int RWP;
|
||||
int ERP;
|
||||
int EWP;
|
||||
int SCHP;
|
||||
|
||||
if (dp.use_inp_params)
|
||||
{
|
||||
if (dp.use_inp_params) {
|
||||
RWP = dp.num_rw_ports;
|
||||
ERP = dp.num_rd_ports;
|
||||
EWP = dp.num_wr_ports;
|
||||
SCHP = dp.num_search_ports;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
RWP = g_ip->num_rw_ports;
|
||||
ERP = g_ip->num_rd_ports;
|
||||
EWP = g_ip->num_wr_ports;
|
||||
SCHP = g_ip->num_search_ports;
|
||||
}
|
||||
|
||||
int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
|
||||
int total_addrbits = (dp.number_addr_bits_mat +
|
||||
dp.number_subbanks_decode) * (RWP + ERP + EWP);
|
||||
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||
int searchinbits;
|
||||
int searchoutbits;
|
||||
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
if (dp.fully_assoc || dp.pure_cam) {
|
||||
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||
searchinbits = dp.num_si_b_bank_per_port * SCHP;
|
||||
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
|
||||
}
|
||||
|
||||
if (!(dp.fully_assoc || dp.pure_cam))
|
||||
{
|
||||
if (g_ip->fast_access && dp.is_tag == false)
|
||||
{
|
||||
if (!(dp.fully_assoc || dp.pure_cam)) {
|
||||
if (g_ip->fast_access && dp.is_tag == false) {
|
||||
dataoutbits *= g_ip->data_assoc;
|
||||
}
|
||||
|
||||
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
||||
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
||||
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||
(double)mat.area.h,
|
||||
total_addrbits, datainbits, 0, dataoutbits,
|
||||
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
|
||||
Add_htree);
|
||||
htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||
(double)mat.area.h,
|
||||
total_addrbits, datainbits, 0, dataoutbits,
|
||||
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
|
||||
Data_in_htree);
|
||||
htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||
(double)mat.area.h,
|
||||
total_addrbits, datainbits, 0, dataoutbits,
|
||||
0, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Data_out_htree);
|
||||
|
||||
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
|
||||
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
|
||||
area.w = htree_in_data->area.w;
|
||||
area.h = htree_in_data->area.h;
|
||||
}
|
||||
else
|
||||
{
|
||||
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
||||
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
||||
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||
htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
|
||||
htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
|
||||
} else {
|
||||
htree_in_add =
|
||||
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||
searchoutbits, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Add_htree);
|
||||
htree_in_data =
|
||||
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||
searchoutbits, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Data_in_htree);
|
||||
htree_out_data =
|
||||
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||
searchoutbits, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Data_out_htree);
|
||||
htree_in_search =
|
||||
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||
searchoutbits, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Data_in_htree, true, true);
|
||||
htree_out_search =
|
||||
new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||
searchoutbits, num_mats_ver_dir * 2,
|
||||
num_mats_hor_dir * 2, Data_out_htree, true);
|
||||
|
||||
area.w = htree_in_data->area.w;
|
||||
area.h = htree_in_data->area.h;
|
||||
|
@ -113,18 +131,17 @@ Bank::Bank(const DynamicParameter & dyn_p):
|
|||
|
||||
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
|
||||
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
|
||||
num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
|
||||
num_addr_b_routed_to_mat_for_rd_or_wr =
|
||||
num_addr_b_mat - num_addr_b_row_dec;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Bank::~Bank()
|
||||
{
|
||||
Bank::~Bank() {
|
||||
delete htree_in_add;
|
||||
delete htree_out_data;
|
||||
delete htree_in_data;
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
if (dp.fully_assoc || dp.pure_cam) {
|
||||
delete htree_in_search;
|
||||
delete htree_out_search;
|
||||
}
|
||||
|
@ -132,19 +149,16 @@ Bank::~Bank()
|
|||
|
||||
|
||||
|
||||
double Bank::compute_delays(double inrisetime)
|
||||
{
|
||||
double Bank::compute_delays(double inrisetime) {
|
||||
return mat.compute_delays(inrisetime);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Bank::compute_power_energy()
|
||||
{
|
||||
void Bank::compute_power_energy() {
|
||||
mat.compute_power_energy();
|
||||
|
||||
if (!(dp.fully_assoc || dp.pure_cam))
|
||||
{
|
||||
if (!(dp.fully_assoc || dp.pure_cam)) {
|
||||
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
||||
|
@ -158,9 +172,7 @@ void Bank::compute_power_energy()
|
|||
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
||||
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
|
||||
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
|
||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,8 +40,7 @@
|
|||
#include "htree2.h"
|
||||
#include "mat.h"
|
||||
|
||||
class Bank : public Component
|
||||
{
|
||||
class Bank : public Component {
|
||||
public:
|
||||
Bank(const DynamicParameter & dyn_p);
|
||||
~Bank();
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -39,18 +40,15 @@
|
|||
#include "basic_circuit.h"
|
||||
#include "parameter.h"
|
||||
|
||||
uint32_t _log2(uint64_t num)
|
||||
{
|
||||
uint32_t _log2(uint64_t num) {
|
||||
uint32_t log2 = 0;
|
||||
|
||||
if (num == 0)
|
||||
{
|
||||
if (num == 0) {
|
||||
std::cerr << "log0?" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while (num > 1)
|
||||
{
|
||||
while (num > 1) {
|
||||
num = (num >> 1);
|
||||
log2++;
|
||||
}
|
||||
|
@ -59,25 +57,18 @@ uint32_t _log2(uint64_t num)
|
|||
}
|
||||
|
||||
|
||||
bool is_pow2(int64_t val)
|
||||
{
|
||||
if (val <= 0)
|
||||
{
|
||||
bool is_pow2(int64_t val) {
|
||||
if (val <= 0) {
|
||||
return false;
|
||||
}
|
||||
else if (val == 1)
|
||||
{
|
||||
} else if (val == 1) {
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return (_log2(val) != _log2(val - 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int powers (int base, int n)
|
||||
{
|
||||
int powers (int base, int n) {
|
||||
int i, p;
|
||||
|
||||
p = 1;
|
||||
|
@ -88,8 +79,7 @@ int powers (int base, int n)
|
|||
|
||||
/*----------------------------------------------------------------------*/
|
||||
|
||||
double logtwo (double x)
|
||||
{
|
||||
double logtwo (double x) {
|
||||
assert(x > 0);
|
||||
return ((double) (log (x) / log (2.0)));
|
||||
}
|
||||
|
@ -102,24 +92,16 @@ double gate_C(
|
|||
double wirelength,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
const TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if (_is_dram && _is_cell)
|
||||
{
|
||||
if (_is_dram && _is_cell) {
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if (_is_dram && _is_wl_tr)
|
||||
{
|
||||
} else if (_is_dram && _is_wl_tr) {
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if (!_is_dram && _is_cell)
|
||||
{
|
||||
} else if (!_is_dram && _is_cell) {
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
|
@ -134,25 +116,17 @@ double gate_C_pass(
|
|||
double wirelength, // poly wire length going to gate in lambda
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
// v5.0
|
||||
const TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
if ((_is_dram) && (_is_cell)) {
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
} else if ((_is_dram) && (_is_wl_tr)) {
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
} else if ((!_is_dram) && _is_cell) {
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
|
@ -169,25 +143,17 @@ double drain_C_(
|
|||
double fold_dimension,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
double w_folded_tr;
|
||||
const TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
if ((_is_dram) && (_is_cell)) {
|
||||
dt = &g_tp.dram_acc; // DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
} else if ((_is_dram) && (_is_wl_tr)) {
|
||||
dt = &g_tp.dram_wl; // DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
} else if ((!_is_dram) && _is_cell) {
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
|
@ -198,29 +164,23 @@ double drain_C_(
|
|||
double drain_C_metal_connecting_folded_tr = 0;
|
||||
|
||||
// determine the width of the transistor after folding (if it is getting folded)
|
||||
if (next_arg_thresh_folding_width_or_height_cell == 0)
|
||||
{ // interpret fold_dimension as the the folding width threshold
|
||||
if (next_arg_thresh_folding_width_or_height_cell == 0) {
|
||||
// interpret fold_dimension as the the folding width threshold
|
||||
// i.e. the value of transistor width above which the transistor gets folded
|
||||
w_folded_tr = fold_dimension;
|
||||
}
|
||||
else
|
||||
{ // interpret fold_dimension as the height of the cell that this transistor is part of.
|
||||
} else { // interpret fold_dimension as the height of the cell that this transistor is part of.
|
||||
double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
|
||||
// TODO : w_folded_tr must come from Component::compute_gate_area()
|
||||
double ratio_p_to_n = 2.0 / (2.0 + 1.0);
|
||||
if (nchannel)
|
||||
{
|
||||
if (nchannel) {
|
||||
w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
||||
}
|
||||
}
|
||||
int num_folded_tr = (int) (ceil(width / w_folded_tr));
|
||||
|
||||
if (num_folded_tr < 2)
|
||||
{
|
||||
if (num_folded_tr < 2) {
|
||||
w_folded_tr = width;
|
||||
}
|
||||
|
||||
|
@ -228,13 +188,11 @@ double drain_C_(
|
|||
(stack - 1) * g_tp.spacing_poly_to_poly;
|
||||
double drain_h_for_sidewall = w_folded_tr;
|
||||
double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
|
||||
if (num_folded_tr > 1)
|
||||
{
|
||||
if (num_folded_tr > 1) {
|
||||
total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
|
||||
(num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
|
||||
|
||||
if (num_folded_tr%2 == 0)
|
||||
{
|
||||
if (num_folded_tr % 2 == 0) {
|
||||
drain_h_for_sidewall = 0;
|
||||
}
|
||||
total_drain_height_for_cap_wrt_gate *= num_folded_tr;
|
||||
|
@ -255,24 +213,16 @@ double tr_R_on(
|
|||
int stack,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
const TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
if ((_is_dram) && (_is_cell)) {
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
} else if ((_is_dram) && (_is_wl_tr)) {
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && _is_cell)
|
||||
{
|
||||
} else if ((!_is_dram) && _is_cell) {
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
|
@ -291,24 +241,16 @@ double R_to_w(
|
|||
int nchannel,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
const TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((_is_dram) && (_is_cell))
|
||||
{
|
||||
if ((_is_dram) && (_is_cell)) {
|
||||
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
||||
}
|
||||
else if ((_is_dram) && (_is_wl_tr))
|
||||
{
|
||||
} else if ((_is_dram) && (_is_wl_tr)) {
|
||||
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
||||
}
|
||||
else if ((!_is_dram) && (_is_cell))
|
||||
{
|
||||
} else if ((!_is_dram) && (_is_cell)) {
|
||||
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
dt = &g_tp.peri_global;
|
||||
}
|
||||
|
||||
|
@ -319,15 +261,11 @@ double R_to_w(
|
|||
|
||||
double pmos_to_nmos_sz_ratio(
|
||||
bool _is_dram,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
double p_to_n_sizing_ratio;
|
||||
if ((_is_dram) && (_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
|
||||
}
|
||||
return p_to_n_sizing_ratio;
|
||||
|
@ -340,24 +278,21 @@ double horowitz(
|
|||
double tf, // time constant of gate
|
||||
double vs1, // threshold voltage
|
||||
double vs2, // threshold voltage
|
||||
int rise) // whether input rises or fall
|
||||
{
|
||||
if (inputramptime == 0 && vs1 == vs2)
|
||||
{
|
||||
int rise) { // whether input rises or fall
|
||||
if (inputramptime == 0 && vs1 == vs2) {
|
||||
return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
|
||||
}
|
||||
double a, b, td;
|
||||
|
||||
a = inputramptime / tf;
|
||||
if (rise == RISE)
|
||||
{
|
||||
if (rise == RISE) {
|
||||
b = 0.5;
|
||||
td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2));
|
||||
}
|
||||
else
|
||||
{
|
||||
td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
|
||||
tf * (log(vs1) - log(vs2));
|
||||
} else {
|
||||
b = 0.4;
|
||||
td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2));
|
||||
td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
|
||||
tf * (log(1.0 - vs1) - log(1.0 - vs2));
|
||||
}
|
||||
return (td);
|
||||
}
|
||||
|
@ -367,20 +302,14 @@ double cmos_Ileak(
|
|||
double pWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
|
||||
|
@ -391,35 +320,27 @@ double simplified_nmos_leakage(
|
|||
double nwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nwidth * dt->I_off_n;
|
||||
}
|
||||
|
||||
int factorial(int n, int m)
|
||||
{
|
||||
int factorial(int n, int m) {
|
||||
int fa = m, i;
|
||||
for (i = m + 1; i <= n; i++)
|
||||
fa *= i;
|
||||
return fa;
|
||||
}
|
||||
|
||||
int combination(int n, int m)
|
||||
{
|
||||
int combination(int n, int m) {
|
||||
int ret;
|
||||
ret = factorial(n, m + 1) / factorial(n - m);
|
||||
return ret;
|
||||
|
@ -429,20 +350,14 @@ double simplified_pmos_leakage(
|
|||
double pwidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return pwidth * dt->I_off_p;
|
||||
|
@ -452,20 +367,14 @@ double cmos_Ig_n(
|
|||
double nWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return nWidth*dt->I_g_on_n;
|
||||
|
@ -475,20 +384,14 @@ double cmos_Ig_p(
|
|||
double pWidth,
|
||||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr)
|
||||
{
|
||||
bool _is_wl_tr) {
|
||||
TechnologyParameter::DeviceType * dt;
|
||||
|
||||
if ((!_is_dram)&&(_is_cell))
|
||||
{ //SRAM cell access transistor
|
||||
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
||||
dt = &(g_tp.sram_cell);
|
||||
}
|
||||
else if ((_is_dram)&&(_is_wl_tr))
|
||||
{ //DRAM wordline transistor
|
||||
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
||||
dt = &(g_tp.dram_wl);
|
||||
}
|
||||
else
|
||||
{ //DRAM or SRAM all other transistors
|
||||
} else { //DRAM or SRAM all other transistors
|
||||
dt = &(g_tp.peri_global);
|
||||
}
|
||||
return pWidth*dt->I_g_on_p;
|
||||
|
@ -502,8 +405,7 @@ double cmos_Isub_leakage(
|
|||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
enum Half_net_topology topo)
|
||||
{
|
||||
enum Half_net_topology topo) {
|
||||
assert (fanin >= 1);
|
||||
double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
|
||||
double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
|
||||
|
@ -513,25 +415,21 @@ double cmos_Isub_leakage(
|
|||
|
||||
num_states = int(pow(2.0, fanin));
|
||||
|
||||
switch (g_type)
|
||||
{
|
||||
switch (g_type) {
|
||||
case nmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
if (fanin == 1) {
|
||||
Isub = nmos_leak / num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
|
||||
}
|
||||
else
|
||||
{
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
|
||||
{
|
||||
//Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
} else {
|
||||
if (topo == parallel) {
|
||||
//only when all tx are off, leakage power is non-zero.
|
||||
//The possibility of this state is 1/num_states
|
||||
Isub = nmos_leak * fanin / num_states;
|
||||
} else {
|
||||
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
||||
//when num_off_tx ==0 there is no leakage power
|
||||
Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
||||
(num_off_tx - 1)) *
|
||||
combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /= num_states;
|
||||
}
|
||||
|
@ -539,22 +437,19 @@ double cmos_Isub_leakage(
|
|||
}
|
||||
break;
|
||||
case pmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
if (fanin == 1) {
|
||||
Isub = pmos_leak / num_states;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states
|
||||
}
|
||||
else
|
||||
{
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power
|
||||
{
|
||||
//Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
} else {
|
||||
if (topo == parallel) {
|
||||
//only when all tx are off, leakage power is non-zero.
|
||||
//The possibility of this state is 1/num_states
|
||||
Isub = pmos_leak * fanin / num_states;
|
||||
} else {
|
||||
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
||||
//when num_off_tx ==0 there is no leakage power
|
||||
Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
||||
(num_off_tx - 1)) *
|
||||
combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /= num_states;
|
||||
}
|
||||
|
@ -566,25 +461,28 @@ double cmos_Isub_leakage(
|
|||
break;
|
||||
case nand:
|
||||
Isub += fanin * pmos_leak;//the pullup network
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network
|
||||
{
|
||||
//Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
||||
// the pulldown network
|
||||
Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
||||
(num_off_tx - 1)) *
|
||||
combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub /= num_states;
|
||||
break;
|
||||
case nor:
|
||||
for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network
|
||||
{
|
||||
//Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx)));
|
||||
Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx);
|
||||
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
||||
// the pullup network
|
||||
Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
||||
(num_off_tx - 1)) *
|
||||
combination(fanin, num_off_tx);
|
||||
}
|
||||
Isub += fanin * nmos_leak;//the pulldown network
|
||||
Isub /= num_states;
|
||||
break;
|
||||
case tri:
|
||||
Isub += (nmos_leak + pmos_leak) / 2;//enabled
|
||||
Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power
|
||||
//disabled upper bound of leakage power
|
||||
Isub += nmos_leak * UNI_LEAK_STACK_FACTOR;
|
||||
Isub /= 2;
|
||||
break;
|
||||
case tg:
|
||||
|
@ -607,8 +505,7 @@ double cmos_Ig_leakage(
|
|||
bool _is_dram,
|
||||
bool _is_cell,
|
||||
bool _is_wl_tr,
|
||||
enum Half_net_topology topo)
|
||||
{
|
||||
enum Half_net_topology topo) {
|
||||
assert (fanin >= 1);
|
||||
double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
|
||||
double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
|
||||
|
@ -618,55 +515,50 @@ double cmos_Ig_leakage(
|
|||
|
||||
num_states = int(pow(2.0, fanin));
|
||||
|
||||
switch (g_type)
|
||||
{
|
||||
switch (g_type) {
|
||||
case nmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
if (fanin == 1) {
|
||||
Ig_on = nmos_leak / num_states;
|
||||
} else {
|
||||
if (topo == parallel) {
|
||||
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
||||
Ig_on += nmos_leak * combination(fanin, num_on_tx) *
|
||||
num_on_tx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
|
||||
} else {
|
||||
//pull down network when all TXs are on.
|
||||
Ig_on += nmos_leak * fanin;
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
||||
//when num_on_tx=[1,n-1]
|
||||
//TODO: this is a approximation now, a precise computation
|
||||
//will be very complicated.
|
||||
Ig_on += nmos_leak * combination(fanin, num_on_tx) *
|
||||
num_on_tx / 2;
|
||||
}
|
||||
Ig_on /= num_states;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case pmos:
|
||||
if (fanin==1)
|
||||
{
|
||||
if (fanin == 1) {
|
||||
Ig_on = pmos_leak / num_states;
|
||||
} else {
|
||||
if (topo == parallel) {
|
||||
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
||||
Ig_on += pmos_leak * combination(fanin, num_on_tx) *
|
||||
num_on_tx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (topo==parallel)
|
||||
{
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Ig_on += pmos_leak * fanin;//pull down network when all TXs are on.
|
||||
} else {
|
||||
//pull down network when all TXs are on.
|
||||
Ig_on += pmos_leak * fanin;
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
||||
//when num_on_tx=[1,n-1]
|
||||
//TODO: this is a approximation now, a precise computation
|
||||
//will be very complicated.
|
||||
Ig_on += pmos_leak * combination(fanin, num_on_tx) *
|
||||
num_on_tx / 2;
|
||||
}
|
||||
Ig_on /= num_states;
|
||||
}
|
||||
|
@ -678,38 +570,40 @@ double cmos_Ig_leakage(
|
|||
break;
|
||||
case nand:
|
||||
//pull up network
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
|
||||
{
|
||||
//when num_on_tx=[1,n]
|
||||
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
||||
Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
|
||||
}
|
||||
|
||||
//pull down network
|
||||
Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
|
||||
//num_on_tx is the number of on tx
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)//when num_on_tx=[1,n-1]
|
||||
{
|
||||
Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx/2;//TODO: this is a approximation now, a precise computation will be very complicated.
|
||||
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
||||
//when num_on_tx=[1,n-1]
|
||||
//TODO: this is a approximation now, a precise computation will be
|
||||
//very complicated.
|
||||
Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
|
||||
}
|
||||
Ig_on /= num_states;
|
||||
break;
|
||||
case nor:
|
||||
// num_on_tx is the number of on tx in pull up network
|
||||
Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
|
||||
for (num_on_tx=1; num_on_tx<fanin; num_on_tx++)
|
||||
{
|
||||
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
||||
Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
|
||||
|
||||
}
|
||||
//pull down network
|
||||
for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++)//when num_on_tx=[1,n]
|
||||
{
|
||||
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
||||
//when num_on_tx=[1,n]
|
||||
Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
|
||||
}
|
||||
Ig_on /= num_states;
|
||||
break;
|
||||
case tri:
|
||||
Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;//enabled
|
||||
Ig_on += (nmos_leak + pmos_leak)/2; //disabled upper bound of leakage power
|
||||
//disabled upper bound of leakage power
|
||||
Ig_on += (nmos_leak + pmos_leak) / 2;
|
||||
Ig_on /= 2;
|
||||
break;
|
||||
case tg:
|
||||
|
@ -734,8 +628,7 @@ double shortcircuit_simple(
|
|||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd)
|
||||
{
|
||||
double vdd) {
|
||||
|
||||
double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
|
||||
double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
|
||||
|
@ -747,8 +640,16 @@ double shortcircuit_simple(
|
|||
vt_to_vdd_ratio = vt / vdd;
|
||||
|
||||
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
|
||||
p_short_circuit_discharge_low =
|
||||
10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
|
||||
pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
|
||||
vt_to_vdd_ratio)) * c_in *
|
||||
vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
|
||||
p_short_circuit_charge_low =
|
||||
10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
|
||||
pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
|
||||
vt_to_vdd_ratio)) * c_in *
|
||||
vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
|
||||
// double t1, t2, t3, t4, t5;
|
||||
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
|
||||
// t2=pow(velocity_index,2.0);
|
||||
|
@ -756,8 +657,12 @@ double shortcircuit_simple(
|
|||
// t4=t1/t2/t3;
|
||||
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
||||
|
||||
p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
p_short_circuit_discharge_high =
|
||||
pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd *
|
||||
fo_p / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
|
||||
p_short_circuit_charge_high = pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) *
|
||||
c_in * vdd * vdd * fo_n / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 *
|
||||
velocity_index);
|
||||
|
||||
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
|
||||
// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
||||
|
@ -784,10 +689,10 @@ double shortcircuit(
|
|||
double i_on_p,
|
||||
double i_on_n_in,
|
||||
double i_on_p_in,
|
||||
double vdd)
|
||||
{
|
||||
double vdd) {
|
||||
|
||||
double p_short_circuit=0, p_short_circuit_discharge;//, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
|
||||
//this is actually energy
|
||||
double p_short_circuit = 0, p_short_circuit_discharge;
|
||||
double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
|
||||
double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
|
||||
|
||||
|
@ -797,10 +702,18 @@ double shortcircuit(
|
|||
beta_ratio = i_on_p / i_on_n;
|
||||
vt_to_vdd_ratio = vt / vdd;
|
||||
e = 2.71828;
|
||||
f_alpha = 1/(velocity_index+2) -velocity_index/(2*(velocity_index+3)) +velocity_index/(velocity_index+4)*(velocity_index/2-1);
|
||||
f_alpha = 1 / (velocity_index + 2) - velocity_index /
|
||||
(2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) *
|
||||
(velocity_index / 2 - 1);
|
||||
k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
|
||||
g_v_alpha = (velocity_index + 1)*pow((1-velocity_index),velocity_index)*pow((1-velocity_index),velocity_index/2)/f_alpha/pow((1-velocity_index-velocity_index),(velocity_index/2+velocity_index+2));
|
||||
h_v_alpha = pow(2, velocity_index)*(velocity_index+1)*pow((1-velocity_index),velocity_index)/pow((1-velocity_index-velocity_index),(velocity_index+1));
|
||||
g_v_alpha = (velocity_index + 1) *
|
||||
pow((1 - velocity_index), velocity_index) *
|
||||
pow((1 - velocity_index), velocity_index / 2) / f_alpha /
|
||||
pow((1 - velocity_index - velocity_index),
|
||||
(velocity_index / 2 + velocity_index + 2));
|
||||
h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) *
|
||||
pow((1 - velocity_index), velocity_index) /
|
||||
pow((1 - velocity_index - velocity_index), (velocity_index + 1));
|
||||
|
||||
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
||||
|
@ -824,6 +737,8 @@ double shortcircuit(
|
|||
//
|
||||
// p_short_circuit = p_short_circuit_discharge;
|
||||
|
||||
p_short_circuit_discharge = k_v*vdd*vdd*c_in*fo_p*fo_p/((vdd-vt)*g_v_alpha*fanout*beta_ratio/2/k_v + h_v_alpha*fo_p);
|
||||
p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p /
|
||||
((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha *
|
||||
fo_p);
|
||||
return (p_short_circuit);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -47,8 +48,7 @@
|
|||
using namespace std;
|
||||
|
||||
|
||||
bool mem_array::lt(const mem_array * m1, const mem_array * m2)
|
||||
{
|
||||
bool mem_array::lt(const mem_array * m1, const mem_array * m2) {
|
||||
if (m1->Nspd < m2->Nspd) return true;
|
||||
else if (m1->Nspd > m2->Nspd) return false;
|
||||
else if (m1->Ndwl < m2->Ndwl) return true;
|
||||
|
@ -65,36 +65,31 @@ bool mem_array::lt(const mem_array * m1, const mem_array * m2)
|
|||
|
||||
|
||||
|
||||
void uca_org_t::find_delay()
|
||||
{
|
||||
void uca_org_t::find_delay() {
|
||||
mem_array * data_arr = data_array2;
|
||||
mem_array * tag_arr = tag_array2;
|
||||
|
||||
// check whether it is a regular cache or scratch ram
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
||||
{
|
||||
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||
access_time = data_arr->access_time;
|
||||
}
|
||||
// Both tag and data lookup happen in parallel
|
||||
// and the entire set is sent over the data array h-tree without
|
||||
// waiting for the way-select signal --TODO add the corresponding
|
||||
// power overhead Nav
|
||||
else if (g_ip->fast_access == true)
|
||||
{
|
||||
else if (g_ip->fast_access == true) {
|
||||
access_time = MAX(tag_arr->access_time, data_arr->access_time);
|
||||
}
|
||||
// Tag is accessed first. On a hit, way-select signal along with the
|
||||
// address is sent to read/write the appropriate block in the data
|
||||
// array
|
||||
else if (g_ip->is_seq_acc == true)
|
||||
{
|
||||
else if (g_ip->is_seq_acc == true) {
|
||||
access_time = tag_arr->access_time + data_arr->access_time;
|
||||
}
|
||||
// Normal access: tag array access and data array access happen in parallel.
|
||||
// But, the data array will wait for the way-select and transfer only the
|
||||
// appropriate block over the h-tree.
|
||||
else
|
||||
{
|
||||
else {
|
||||
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
|
||||
data_arr->delay_before_subarray_output_driver) +
|
||||
data_arr->delay_from_subarray_output_driver_to_output;
|
||||
|
@ -103,9 +98,8 @@ void uca_org_t::find_delay()
|
|||
|
||||
|
||||
|
||||
void uca_org_t::find_energy()
|
||||
{
|
||||
if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
|
||||
void uca_org_t::find_energy() {
|
||||
if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc))
|
||||
power = data_array2->power + tag_array2->power;
|
||||
else
|
||||
power = data_array2->power;
|
||||
|
@ -113,28 +107,21 @@ void uca_org_t::find_energy()
|
|||
|
||||
|
||||
|
||||
void uca_org_t::find_area()
|
||||
{
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
|
||||
{
|
||||
void uca_org_t::find_area() {
|
||||
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||
cache_ht = data_array2->height;
|
||||
cache_len = data_array2->width;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
cache_ht = MAX(tag_array2->height, data_array2->height);
|
||||
cache_len = tag_array2->width + data_array2->width;
|
||||
}
|
||||
area = cache_ht * cache_len;
|
||||
}
|
||||
|
||||
void uca_org_t::adjust_area()
|
||||
{
|
||||
void uca_org_t::adjust_area() {
|
||||
double area_adjust;
|
||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
||||
{
|
||||
if (data_array2->area_efficiency/100.0<0.2)
|
||||
{
|
||||
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||
if (data_array2->area_efficiency / 100.0 < 0.2) {
|
||||
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
|
||||
area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0));
|
||||
cache_ht = cache_ht / area_adjust;
|
||||
|
@ -144,14 +131,10 @@ void uca_org_t::adjust_area()
|
|||
area = cache_ht * cache_len;
|
||||
}
|
||||
|
||||
void uca_org_t::find_cyc()
|
||||
{
|
||||
if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
|
||||
{
|
||||
void uca_org_t::find_cyc() {
|
||||
if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
|
||||
cycle_time = data_array2->cycle_time;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
cycle_time = MAX(tag_array2->cycle_time,
|
||||
data_array2->cycle_time);
|
||||
}
|
||||
|
@ -159,13 +142,11 @@ void uca_org_t::find_cyc()
|
|||
|
||||
uca_org_t :: uca_org_t()
|
||||
: tag_array2(0),
|
||||
data_array2(0)
|
||||
{
|
||||
data_array2(0) {
|
||||
|
||||
}
|
||||
|
||||
void uca_org_t :: cleanup()
|
||||
{
|
||||
void uca_org_t :: cleanup() {
|
||||
if (data_array2 != 0)
|
||||
delete data_array2;
|
||||
if (tag_array2 != 0)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -50,8 +51,7 @@ class mem_array;
|
|||
class uca_org_t;
|
||||
|
||||
|
||||
class powerComponents
|
||||
{
|
||||
class powerComponents {
|
||||
public:
|
||||
double dynamic;
|
||||
double leakage;
|
||||
|
@ -60,9 +60,10 @@ class powerComponents
|
|||
double longer_channel_leakage;
|
||||
|
||||
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
|
||||
powerComponents(const powerComponents & obj) { *this = obj; }
|
||||
powerComponents & operator=(const powerComponents & rhs)
|
||||
{
|
||||
powerComponents(const powerComponents & obj) {
|
||||
*this = obj;
|
||||
}
|
||||
powerComponents & operator=(const powerComponents & rhs) {
|
||||
dynamic = rhs.dynamic;
|
||||
leakage = rhs.leakage;
|
||||
gate_leakage = rhs.gate_leakage;
|
||||
|
@ -70,7 +71,13 @@ class powerComponents
|
|||
longer_channel_leakage = rhs.longer_channel_leakage;
|
||||
return *this;
|
||||
}
|
||||
void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
|
||||
void reset() {
|
||||
dynamic = 0;
|
||||
leakage = 0;
|
||||
gate_leakage = 0;
|
||||
short_circuit = 0;
|
||||
longer_channel_leakage = 0;
|
||||
}
|
||||
|
||||
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
|
||||
friend powerComponents operator*(const powerComponents & x, double const * const y);
|
||||
|
@ -78,22 +85,24 @@ class powerComponents
|
|||
|
||||
|
||||
|
||||
class powerDef
|
||||
{
|
||||
class powerDef {
|
||||
public:
|
||||
powerComponents readOp;
|
||||
powerComponents writeOp;
|
||||
powerComponents searchOp;//Sheng: for CAM and FA
|
||||
|
||||
powerDef() : readOp(), writeOp(), searchOp() { }
|
||||
void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
|
||||
void reset() {
|
||||
readOp.reset();
|
||||
writeOp.reset();
|
||||
searchOp.reset();
|
||||
}
|
||||
|
||||
friend powerDef operator+(const powerDef & x, const powerDef & y);
|
||||
friend powerDef operator*(const powerDef & x, double const * const y);
|
||||
};
|
||||
|
||||
enum Wire_type
|
||||
{
|
||||
enum Wire_type {
|
||||
Global /* gloabl wires with repeaters */,
|
||||
Global_5 /* 5% delay penalty */,
|
||||
Global_10 /* 10% delay penalty */,
|
||||
|
@ -108,12 +117,12 @@ enum Wire_type
|
|||
|
||||
|
||||
|
||||
class InputParameter
|
||||
{
|
||||
class InputParameter {
|
||||
public:
|
||||
void parse_cfg(const string & infile);
|
||||
|
||||
bool error_checking(); // return false if the input parameters are problematic
|
||||
// return false if the input parameters are problematic
|
||||
bool error_checking(string name = "CACTI");
|
||||
void display_ip();
|
||||
|
||||
unsigned int cache_sz; // in bytes
|
||||
|
@ -284,59 +293,6 @@ typedef struct{
|
|||
double routing_area_height_within_bank;
|
||||
double routing_area_width_within_bank;
|
||||
double area_efficiency;
|
||||
// double perc_power_dyn_routing_to_bank;
|
||||
// double perc_power_dyn_addr_horizontal_htree;
|
||||
// double perc_power_dyn_datain_horizontal_htree;
|
||||
// double perc_power_dyn_dataout_horizontal_htree;
|
||||
// double perc_power_dyn_addr_vertical_htree;
|
||||
// double perc_power_dyn_datain_vertical_htree;
|
||||
// double perc_power_dyn_row_predecoder_drivers;
|
||||
// double perc_power_dyn_row_predecoder_blocks;
|
||||
// double perc_power_dyn_row_decoders;
|
||||
// double perc_power_dyn_bit_mux_predecoder_drivers;
|
||||
// double perc_power_dyn_bit_mux_predecoder_blocks;
|
||||
// double perc_power_dyn_bit_mux_decoders;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
|
||||
// double perc_power_dyn_senseamp_mux_lev_1_decoders;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
|
||||
// double perc_power_dyn_senseamp_mux_lev_2_decoders;
|
||||
// double perc_power_dyn_bitlines;
|
||||
// double perc_power_dyn_sense_amps;
|
||||
// double perc_power_dyn_prechg_eq_drivers;
|
||||
// double perc_power_dyn_subarray_output_drivers;
|
||||
// double perc_power_dyn_dataout_vertical_htree;
|
||||
// double perc_power_dyn_comparators;
|
||||
// double perc_power_dyn_crossbar;
|
||||
// double perc_power_dyn_spent_outside_mats;
|
||||
// double perc_power_leak_routing_to_bank;
|
||||
// double perc_power_leak_addr_horizontal_htree;
|
||||
// double perc_power_leak_datain_horizontal_htree;
|
||||
// double perc_power_leak_dataout_horizontal_htree;
|
||||
// double perc_power_leak_addr_vertical_htree;
|
||||
// double perc_power_leak_datain_vertical_htree;
|
||||
// double perc_power_leak_row_predecoder_drivers;
|
||||
// double perc_power_leak_row_predecoder_blocks;
|
||||
// double perc_power_leak_row_decoders;
|
||||
// double perc_power_leak_bit_mux_predecoder_drivers;
|
||||
// double perc_power_leak_bit_mux_predecoder_blocks;
|
||||
// double perc_power_leak_bit_mux_decoders;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
|
||||
// double perc_power_leak_senseamp_mux_lev_1_decoders;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
|
||||
// double perc_power_leak_senseamp_mux_lev_2_decoders;
|
||||
// double perc_power_leak_bitlines;
|
||||
// double perc_power_leak_sense_amps;
|
||||
// double perc_power_leak_prechg_eq_drivers;
|
||||
// double perc_power_leak_subarray_output_drivers;
|
||||
// double perc_power_leak_dataout_vertical_htree;
|
||||
// double perc_power_leak_comparators;
|
||||
// double perc_power_leak_crossbar;
|
||||
// double perc_leak_mats;
|
||||
// double perc_active_mats;
|
||||
double refresh_power;
|
||||
double dram_refresh_period;
|
||||
double dram_array_availability;
|
||||
|
@ -352,8 +308,7 @@ typedef struct{
|
|||
} results_mem_array;
|
||||
|
||||
|
||||
class uca_org_t
|
||||
{
|
||||
class uca_org_t {
|
||||
public:
|
||||
mem_array * tag_array2;
|
||||
mem_array * data_array2;
|
||||
|
@ -387,7 +342,8 @@ uca_org_t cacti_interface(const string & infile_name);
|
|||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t cacti_interface(InputParameter * const local_interface);
|
||||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t init_interface(InputParameter * const local_interface);
|
||||
uca_org_t init_interface(InputParameter * const local_interface,
|
||||
const string &name);
|
||||
//McPAT's plain interface, please keep !!!
|
||||
uca_org_t cacti_interface(
|
||||
int cache_size,
|
||||
|
@ -442,48 +398,6 @@ uca_org_t cacti_interface(
|
|||
int ndsam1,
|
||||
int ndsam2,
|
||||
int ecc);
|
||||
// int cache_size,
|
||||
// int line_size,
|
||||
// int associativity,
|
||||
// int rw_ports,
|
||||
// int excl_read_ports,
|
||||
// int excl_write_ports,
|
||||
// int single_ended_read_ports,
|
||||
// int banks,
|
||||
// double tech_node,
|
||||
// int output_width,
|
||||
// int specific_tag,
|
||||
// int tag_width,
|
||||
// int access_mode,
|
||||
// int cache,
|
||||
// int main_mem,
|
||||
// int obj_func_delay,
|
||||
// int obj_func_dynamic_power,
|
||||
// int obj_func_leakage_power,
|
||||
// int obj_func_area,
|
||||
// int obj_func_cycle_time,
|
||||
// int dev_func_delay,
|
||||
// int dev_func_dynamic_power,
|
||||
// int dev_func_leakage_power,
|
||||
// int dev_func_area,
|
||||
// int dev_func_cycle_time,
|
||||
// int temp,
|
||||
// int data_arr_ram_cell_tech_flavor_in,
|
||||
// int data_arr_peri_global_tech_flavor_in,
|
||||
// int tag_arr_ram_cell_tech_flavor_in,
|
||||
// int tag_arr_peri_global_tech_flavor_in,
|
||||
// int interconnect_projection_type_in,
|
||||
// int wire_inside_mat_type_in,
|
||||
// int wire_outside_mat_type_in,
|
||||
// int REPEATERS_IN_HTREE_SEGMENTS_in,
|
||||
// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
||||
// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
||||
//// double MAXAREACONSTRAINT_PERC_in,
|
||||
//// double MAXACCTIMECONSTRAINT_PERC_in,
|
||||
//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
|
||||
// int PAGE_SIZE_BITS_in,
|
||||
// int BURST_LENGTH_in,
|
||||
// int INTERNAL_PREFETCH_WIDTH_in);
|
||||
|
||||
//Naveen's interface
|
||||
uca_org_t cacti_interface(
|
||||
|
@ -542,8 +456,7 @@ uca_org_t cacti_interface(
|
|||
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
|
||||
int p_input);
|
||||
|
||||
class mem_array
|
||||
{
|
||||
class mem_array {
|
||||
public:
|
||||
int Ndcm;
|
||||
int Ndwl;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -45,28 +46,24 @@ using namespace std;
|
|||
|
||||
|
||||
Component::Component()
|
||||
:area(), power(), rt_power(),delay(0)
|
||||
{
|
||||
: area(), power(), rt_power(), delay(0) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
Component::~Component()
|
||||
{
|
||||
Component::~Component() {
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
|
||||
{
|
||||
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) {
|
||||
double w_poly = g_ip->F_sz_um;
|
||||
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
||||
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
|
||||
num_stacked_in * w_poly +
|
||||
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||
|
||||
if (num_folded_tr > 1)
|
||||
{
|
||||
if (num_folded_tr > 1) {
|
||||
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
|
||||
(num_folded_tr - 1) * num_stacked_in * w_poly +
|
||||
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||
|
@ -82,10 +79,8 @@ double Component::compute_gate_area(
|
|||
int num_inputs,
|
||||
double w_pmos,
|
||||
double w_nmos,
|
||||
double h_gate)
|
||||
{
|
||||
if (w_pmos <= 0.0 || w_nmos <= 0.0)
|
||||
{
|
||||
double h_gate) {
|
||||
if (w_pmos <= 0.0 || w_nmos <= 0.0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
@ -97,8 +92,7 @@ double Component::compute_gate_area(
|
|||
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
|
||||
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
|
||||
|
||||
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
|
||||
{
|
||||
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
@ -109,8 +103,7 @@ double Component::compute_gate_area(
|
|||
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
|
||||
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
|
||||
|
||||
switch (gate_type)
|
||||
{
|
||||
switch (gate_type) {
|
||||
case INV:
|
||||
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
|
||||
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
|
||||
|
@ -132,14 +125,11 @@ double Component::compute_gate_area(
|
|||
|
||||
gate.w = MAX(total_ndiff_w, total_pdiff_w);
|
||||
|
||||
if (w_folded_nmos > w_nmos)
|
||||
{
|
||||
if (w_folded_nmos > w_nmos) {
|
||||
//means that the height of the gate can
|
||||
//be made smaller than the input height specified, so calculate the height of the gate.
|
||||
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
gate.h = h_gate;
|
||||
}
|
||||
return gate.get_area();
|
||||
|
@ -149,11 +139,10 @@ double Component::compute_gate_area(
|
|||
|
||||
double Component::compute_tr_width_after_folding(
|
||||
double input_width,
|
||||
double threshold_folding_width)
|
||||
{//This is actually the width of the cell not the width of a device.
|
||||
double threshold_folding_width) {
|
||||
//This is actually the width of the cell not the width of a device.
|
||||
//The width of a cell and the width of a device is orthogonal.
|
||||
if (input_width <= 0)
|
||||
{
|
||||
if (input_width <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -167,8 +156,7 @@ double Component::compute_tr_width_after_folding(
|
|||
|
||||
|
||||
|
||||
double Component::height_sense_amplifier(double pitch_sense_amp)
|
||||
{
|
||||
double Component::height_sense_amplifier(double pitch_sense_amp) {
|
||||
// compute the height occupied by all PMOS transistors
|
||||
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
|
||||
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
|
||||
|
@ -195,8 +183,7 @@ int Component::logical_effort(
|
|||
double p_to_n_sz_ratio,
|
||||
bool is_dram_,
|
||||
bool is_wl_tr_,
|
||||
double max_w_nmos)
|
||||
{
|
||||
double max_w_nmos) {
|
||||
int num_gates = (int) (log(F) / log(fopt));
|
||||
|
||||
// check if num_gates is odd. if so, add 1 to make it even
|
||||
|
@ -211,8 +198,7 @@ int Component::logical_effort(
|
|||
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
|
||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
|
||||
if (w_n[i] > max_w_nmos)
|
||||
{
|
||||
if (w_n[i] > max_w_nmos) {
|
||||
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
|
||||
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
|
||||
num_gates = (int) (log(F) / log(fopt)) + 1;
|
||||
|
@ -224,8 +210,7 @@ int Component::logical_effort(
|
|||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
}
|
||||
|
||||
for (i = num_gates - 2; i >= 1; i--)
|
||||
{
|
||||
for (i = num_gates - 2; i >= 1; i--) {
|
||||
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
|
||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -42,39 +43,30 @@ using namespace std;
|
|||
class Crossbar;
|
||||
class Bank;
|
||||
|
||||
class Component
|
||||
{
|
||||
class Component {
|
||||
public:
|
||||
Component();
|
||||
~Component();
|
||||
|
||||
Area area;
|
||||
// TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE
|
||||
// VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS
|
||||
// MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER
|
||||
// CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS
|
||||
powerDef power, rt_power;
|
||||
double delay;
|
||||
double cycle_time;
|
||||
|
||||
double compute_gate_area(
|
||||
int gate_type,
|
||||
int num_inputs,
|
||||
double w_pmos,
|
||||
double w_nmos,
|
||||
double h_gate);
|
||||
|
||||
double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
|
||||
double compute_gate_area(int gate_type, int num_inputs, double w_pmos,
|
||||
double w_nmos, double h_gate);
|
||||
double compute_tr_width_after_folding(double input_width,
|
||||
double threshold_folding_width);
|
||||
double height_sense_amplifier(double pitch_sense_amp);
|
||||
|
||||
protected:
|
||||
int logical_effort(
|
||||
int num_gates_min,
|
||||
double g,
|
||||
double F,
|
||||
double * w_n,
|
||||
double * w_p,
|
||||
double C_load,
|
||||
double p_to_n_sz_ratio,
|
||||
bool is_dram_,
|
||||
bool is_wl_tr_,
|
||||
double max_w_nmos);
|
||||
int logical_effort(int num_gates_min, double g, double F, double * w_n,
|
||||
double * w_p, double C_load, double p_to_n_sz_ratio,
|
||||
bool is_dram_, bool is_wl_tr_, double max_w_nmos);
|
||||
|
||||
private:
|
||||
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -249,8 +250,7 @@ const double bit_to_byte = 8.0;
|
|||
// v : vertical or velocity
|
||||
|
||||
|
||||
enum ram_cell_tech_type_num
|
||||
{
|
||||
enum ram_cell_tech_type_num {
|
||||
itrs_hp = 0,
|
||||
itrs_lstp = 1,
|
||||
itrs_lop = 2,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -39,8 +40,7 @@ Crossbar::Crossbar(
|
|||
double n_out_,
|
||||
double flit_size_,
|
||||
TechnologyParameter::DeviceType *dt
|
||||
):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
|
||||
{
|
||||
): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||
Vdd = dt->Vdd;
|
||||
CB_ADJ = 1;
|
||||
|
@ -48,14 +48,14 @@ Crossbar::Crossbar(
|
|||
|
||||
Crossbar::~Crossbar() {}
|
||||
|
||||
double Crossbar::output_buffer()
|
||||
{
|
||||
double Crossbar::output_buffer() {
|
||||
|
||||
//Wire winit(4, 4);
|
||||
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
|
||||
Wire w1(g_ip->wt, l_eff);
|
||||
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
|
||||
double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
|
||||
double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
|
||||
l_eff * ADJ / w1.repeater_spacing : ADJ);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
|
||||
TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
|
||||
|
@ -78,7 +78,8 @@ double Crossbar::output_buffer()
|
|||
drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
|
||||
drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(TriS2 * min_w_pmos, 0);
|
||||
double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
|
||||
g_tp.cell_h_def) +
|
||||
drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
|
||||
double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
|
||||
|
||||
|
@ -88,21 +89,24 @@ double Crossbar::output_buffer()
|
|||
return input_cap + output_cap + ctr_cap;
|
||||
}
|
||||
|
||||
void Crossbar::compute_power()
|
||||
{
|
||||
void Crossbar::compute_power() {
|
||||
|
||||
Wire winit(4, 4);
|
||||
double tri_cap = output_buffer();
|
||||
assert(tri_cap > 0);
|
||||
//area of a tristate logic
|
||||
double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
|
||||
double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
|
||||
TriS2 * min_w_pmos, g_tp.cell_h_def);
|
||||
g_area *= 2; // to model area of output transistors
|
||||
g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
|
||||
g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
|
||||
g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
|
||||
TriS1 * min_w_pmos, g_tp.cell_h_def);
|
||||
g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
|
||||
TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
|
||||
double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
|
||||
// effective no. of tristate buffers that need to be laid side by side
|
||||
int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
|
||||
double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
|
||||
double wire_len = MAX(width * ntri * n_out,
|
||||
flit_size * g_tp.wire_outside_mat.pitch * n_out);
|
||||
Wire w1(g_ip->wt, wire_len);
|
||||
|
||||
area.w = wire_len;
|
||||
|
@ -124,37 +128,51 @@ void Crossbar::compute_power()
|
|||
|
||||
|
||||
|
||||
power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
|
||||
power.readOp.dynamic =
|
||||
(w1.power.readOp.dynamic + w2.power.readOp.dynamic +
|
||||
(tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
|
||||
tri_int_cap) * Vdd * Vdd) * flit_size;
|
||||
power.readOp.leakage = n_inp * n_out * flit_size * (
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
|
||||
1, inv) * Vdd +
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||
2, nand) * Vdd +
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||
2, nor) * Vdd +
|
||||
w1.power.readOp.leakage + w2.power.readOp.leakage);
|
||||
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
|
||||
1, inv) * Vdd +
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||
2, nand) * Vdd +
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||
2, nor) * Vdd +
|
||||
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
|
||||
|
||||
// delay calculation
|
||||
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
|
||||
Wire wdriver(g_ip->wt, l_eff);
|
||||
double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
|
||||
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
|
||||
delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
|
||||
tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
|
||||
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
|
||||
tri_inp_cap + n_inp * tri_out_cap;
|
||||
delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
|
||||
deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
|
||||
|
||||
Wire wreset();
|
||||
}
|
||||
|
||||
void Crossbar::print_crossbar()
|
||||
{
|
||||
void Crossbar::print_crossbar() {
|
||||
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
|
||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||
cout << "Width : " << area.w << " u" << endl;
|
||||
cout << "Height : " << area.h << " u" << endl;
|
||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
|
||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
||||
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
|
||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
|
||||
MIN(n_inp, n_out) << " (nJ)" << endl;
|
||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
|
||||
<< endl;
|
||||
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
|
||||
<< " (mW)" << endl;
|
||||
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -44,8 +45,7 @@
|
|||
#include "parameter.h"
|
||||
#include "wire.h"
|
||||
|
||||
class Crossbar : public Component
|
||||
{
|
||||
class Crossbar : public Component {
|
||||
public:
|
||||
Crossbar(
|
||||
double in,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -58,11 +59,9 @@ Decoder::Decoder(
|
|||
delay(0),
|
||||
//power(),
|
||||
fully_assoc(fully_assoc_), is_dram(is_dram_),
|
||||
is_wl_tr(is_wl_tr_), cell(cell_)
|
||||
{
|
||||
is_wl_tr(is_wl_tr_), cell(cell_) {
|
||||
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
|
||||
{
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
|
||||
w_dec_n[i] = 0;
|
||||
w_dec_p[i] = 0;
|
||||
}
|
||||
|
@ -74,28 +73,19 @@ Decoder::Decoder(
|
|||
*/
|
||||
int num_addr_bits_dec = _log2(_num_dec_signals);
|
||||
|
||||
if (num_addr_bits_dec < 4)
|
||||
{
|
||||
if (flag_way_select)
|
||||
{
|
||||
if (num_addr_bits_dec < 4) {
|
||||
if (flag_way_select) {
|
||||
exist = true;
|
||||
num_in_signals = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_in_signals = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
exist = true;
|
||||
|
||||
if (flag_way_select)
|
||||
{
|
||||
if (flag_way_select) {
|
||||
num_in_signals = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_in_signals = 2;
|
||||
}
|
||||
}
|
||||
|
@ -112,23 +102,18 @@ Decoder::Decoder(
|
|||
|
||||
|
||||
|
||||
void Decoder::compute_widths()
|
||||
{
|
||||
void Decoder::compute_widths() {
|
||||
double F;
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
|
||||
double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
|
||||
double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
|
||||
|
||||
if (exist)
|
||||
{
|
||||
if (num_in_signals == 2 || fully_assoc)
|
||||
{
|
||||
if (exist) {
|
||||
if (num_in_signals == 2 || fully_assoc) {
|
||||
w_dec_n[0] = 2 * g_tp.min_w_nmos_;
|
||||
w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
|
||||
F = gnand2;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
w_dec_n[0] = 3 * g_tp.min_w_nmos_;
|
||||
w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
|
||||
F = gnand3;
|
||||
|
@ -152,32 +137,35 @@ void Decoder::compute_widths()
|
|||
|
||||
|
||||
|
||||
void Decoder::compute_area()
|
||||
{
|
||||
void Decoder::compute_area() {
|
||||
double cumulative_area = 0;
|
||||
double cumulative_curr = 0; // cumulative leakage current
|
||||
double cumulative_curr_Ig = 0; // cumulative leakage current
|
||||
|
||||
if (exist)
|
||||
{ // First check if this decoder exists
|
||||
if (num_in_signals == 2)
|
||||
{
|
||||
cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
|
||||
cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
|
||||
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
|
||||
}
|
||||
else if (num_in_signals == 3)
|
||||
{
|
||||
cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
|
||||
cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
|
||||
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
|
||||
if (exist) { // First check if this decoder exists
|
||||
if (num_in_signals == 2) {
|
||||
cumulative_area =
|
||||
compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
|
||||
cumulative_curr =
|
||||
cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
|
||||
cumulative_curr_Ig =
|
||||
cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
|
||||
} else if (num_in_signals == 3) {
|
||||
cumulative_area =
|
||||
compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
|
||||
cumulative_curr =
|
||||
cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
|
||||
cumulative_curr_Ig =
|
||||
cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
|
||||
}
|
||||
|
||||
for (int i = 1; i < num_gates; i++)
|
||||
{
|
||||
cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
|
||||
cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
|
||||
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
|
||||
for (int i = 1; i < num_gates; i++) {
|
||||
cumulative_area +=
|
||||
compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
|
||||
cumulative_curr +=
|
||||
cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
|
||||
cumulative_curr_Ig =
|
||||
cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
|
||||
}
|
||||
power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
|
||||
power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
|
||||
|
@ -188,25 +176,18 @@ void Decoder::compute_area()
|
|||
|
||||
|
||||
|
||||
double Decoder::compute_delays(double inrisetime)
|
||||
{
|
||||
if (exist)
|
||||
{
|
||||
double Decoder::compute_delays(double inrisetime) {
|
||||
if (exist) {
|
||||
double ret_val = 0; // outrisetime
|
||||
int i;
|
||||
double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
|
||||
double Vdd = g_tp.peri_global.Vdd;
|
||||
|
||||
if ((is_wl_tr) && (is_dram))
|
||||
{
|
||||
if ((is_wl_tr) && (is_dram)) {
|
||||
Vpp = g_tp.vpp;
|
||||
}
|
||||
else if (is_wl_tr)
|
||||
{
|
||||
} else if (is_wl_tr) {
|
||||
Vpp = g_tp.sram_cell.Vdd;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
Vpp = g_tp.peri_global.Vdd;
|
||||
}
|
||||
|
||||
|
@ -221,8 +202,7 @@ double Decoder::compute_delays(double inrisetime)
|
|||
inrisetime = this_delay / (1.0 - 0.5);
|
||||
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
|
||||
|
||||
for (i = 1; i < num_gates - 1; ++i)
|
||||
{
|
||||
for (i = 1; i < num_gates - 1; ++i) {
|
||||
rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
|
||||
c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
|
||||
c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
|
||||
|
@ -247,9 +227,7 @@ double Decoder::compute_delays(double inrisetime)
|
|||
power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
@ -313,8 +291,7 @@ PredecBlk::PredecBlk(
|
|||
power_nand2_path(),
|
||||
power_nand3_path(),
|
||||
power_L2(),
|
||||
is_dram_(is_dram)
|
||||
{
|
||||
is_dram_(is_dram) {
|
||||
int branch_effort_predec_out;
|
||||
double C_ld_dec_gate;
|
||||
int num_addr_bits_dec = _log2(num_dec_signals);
|
||||
|
@ -326,23 +303,17 @@ PredecBlk::PredecBlk(
|
|||
w_L1_nand3_n[0] = 0;
|
||||
w_L1_nand3_p[0] = 0;
|
||||
|
||||
if (is_blk1 == true)
|
||||
{
|
||||
if (num_addr_bits_dec <= 0)
|
||||
{
|
||||
if (is_blk1 == true) {
|
||||
if (num_addr_bits_dec <= 0) {
|
||||
return;
|
||||
}
|
||||
else if (num_addr_bits_dec < 4)
|
||||
{
|
||||
} else if (num_addr_bits_dec < 4) {
|
||||
// Just one predecoder block is required with NAND2 gates. No decoder required.
|
||||
// The first level of predecoding directly drives the decoder output load
|
||||
exist = true;
|
||||
number_input_addr_bits = num_addr_bits_dec;
|
||||
R_wire_predec_blk_out = dec->R_wire_dec_out;
|
||||
C_ld_predec_blk_out = dec->C_ld_dec_out;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
exist = true;
|
||||
number_input_addr_bits = blk1_num_input_addr_bits;
|
||||
branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
|
||||
|
@ -350,11 +321,8 @@ PredecBlk::PredecBlk(
|
|||
R_wire_predec_blk_out = R_wire_predec_blk_out_;
|
||||
C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (num_addr_bits_dec >= 4)
|
||||
{
|
||||
} else {
|
||||
if (num_addr_bits_dec >= 4) {
|
||||
exist = true;
|
||||
number_input_addr_bits = blk2_num_input_addr_bits;
|
||||
branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
|
||||
|
@ -370,8 +338,7 @@ PredecBlk::PredecBlk(
|
|||
|
||||
|
||||
|
||||
void PredecBlk::compute_widths()
|
||||
{
|
||||
void PredecBlk::compute_widths() {
|
||||
double F, c_load_nand3_path, c_load_nand2_path;
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
|
||||
double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
|
||||
|
@ -380,8 +347,7 @@ void PredecBlk::compute_widths()
|
|||
if (exist == false) return;
|
||||
|
||||
|
||||
switch (number_input_addr_bits)
|
||||
{
|
||||
switch (number_input_addr_bits) {
|
||||
case 1:
|
||||
flag_two_unique_paths = false;
|
||||
number_inputs_L1_gate = 2;
|
||||
|
@ -439,15 +405,11 @@ void PredecBlk::compute_widths()
|
|||
}
|
||||
|
||||
// find the number of gates and sizing in second level of predecoder (if there is a second level)
|
||||
if (flag_L2_gate)
|
||||
{
|
||||
if (flag_L2_gate == 2)
|
||||
{ // 2nd level is a NAND2 gate
|
||||
if (flag_L2_gate) {
|
||||
if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
|
||||
w_L2_n[0] = 2 * g_tp.min_w_nmos_;
|
||||
F = gnand2;
|
||||
}
|
||||
else
|
||||
{ // 2nd level is a NAND3 gate
|
||||
} else { // 2nd level is a NAND3 gate
|
||||
w_L2_n[0] = 3 * g_tp.min_w_nmos_;
|
||||
F = gnand3;
|
||||
}
|
||||
|
@ -465,9 +427,11 @@ void PredecBlk::compute_widths()
|
|||
g_tp.max_w_nmos_);
|
||||
|
||||
// Now find the number of gates and widths in first level of predecoder
|
||||
if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2))
|
||||
{ // Whenever flag_two_unique_paths is true, it means first level of decoder employs
|
||||
// both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
|
||||
// Whenever flag_two_unique_paths is true, it means first level of
|
||||
// decoder employs
|
||||
// both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
|
||||
// it means
|
||||
// a NAND2 gate is used in the first level of the predecoder
|
||||
c_load_nand2_path = branch_effort_nand2_gate_output *
|
||||
(gate_C(w_L2_n[0], 0, is_dram_) +
|
||||
|
@ -490,8 +454,7 @@ void PredecBlk::compute_widths()
|
|||
}
|
||||
|
||||
//Now find widths of gates along path in which first gate is a NAND3
|
||||
if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3))
|
||||
{ // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
|
||||
// both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
|
||||
// a NAND3 gate is used in the first level of the predecoder
|
||||
c_load_nand3_path = branch_effort_nand3_gate_output *
|
||||
|
@ -513,11 +476,8 @@ void PredecBlk::compute_widths()
|
|||
is_dram_, false,
|
||||
g_tp.max_w_nmos_);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // find number of gates and widths in first level of predecoder block when there is no second level
|
||||
if (number_inputs_L1_gate == 2)
|
||||
{
|
||||
} else { // find number of gates and widths in first level of predecoder block when there is no second level
|
||||
if (number_inputs_L1_gate == 2) {
|
||||
w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
|
||||
w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
|
||||
F = gnand2 * C_ld_predec_blk_out /
|
||||
|
@ -533,9 +493,7 @@ void PredecBlk::compute_widths()
|
|||
p_to_n_sz_ratio,
|
||||
is_dram_, false,
|
||||
g_tp.max_w_nmos_);
|
||||
}
|
||||
else if (number_inputs_L1_gate == 3)
|
||||
{
|
||||
} else if (number_inputs_L1_gate == 3) {
|
||||
w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
|
||||
w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
|
||||
F = gnand3 * C_ld_predec_blk_out /
|
||||
|
@ -557,10 +515,8 @@ void PredecBlk::compute_widths()
|
|||
|
||||
|
||||
|
||||
void PredecBlk::compute_area()
|
||||
{
|
||||
if (exist)
|
||||
{ // First check whether a predecoder block is needed
|
||||
void PredecBlk::compute_area() {
|
||||
if (exist) { // First check whether a predecoder block is needed
|
||||
int num_L1_nand2 = 0;
|
||||
int num_L1_nand3 = 0;
|
||||
int num_L2 = 0;
|
||||
|
@ -575,15 +531,13 @@ void PredecBlk::compute_area()
|
|||
tot_area_L1_nand3 = 0;
|
||||
leak_L1_nand3 = 0;
|
||||
gate_leak_L1_nand3 = 0;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
|
||||
leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
|
||||
gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
|
||||
}
|
||||
|
||||
switch (number_input_addr_bits)
|
||||
{
|
||||
switch (number_input_addr_bits) {
|
||||
case 1: //2 NAND2 gates
|
||||
num_L1_nand2 = 2;
|
||||
num_L2 = 0;
|
||||
|
@ -645,8 +599,7 @@ void PredecBlk::compute_area()
|
|||
break;
|
||||
}
|
||||
|
||||
for (int i = 1; i < number_gates_L1_nand2_path; ++i)
|
||||
{
|
||||
for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
|
||||
tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
|
||||
leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
|
||||
gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
|
||||
|
@ -655,8 +608,7 @@ void PredecBlk::compute_area()
|
|||
leak_L1_nand2 *= num_L1_nand2;
|
||||
gate_leak_L1_nand2 *= num_L1_nand2;
|
||||
|
||||
for (int i = 1; i < number_gates_L1_nand3_path; ++i)
|
||||
{
|
||||
for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
|
||||
tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
|
||||
leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
|
||||
gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
|
||||
|
@ -670,21 +622,17 @@ void PredecBlk::compute_area()
|
|||
double leakage_L2 = 0.0;
|
||||
double gate_leakage_L2 = 0.0;
|
||||
|
||||
if (flag_L2_gate == 2)
|
||||
{
|
||||
if (flag_L2_gate == 2) {
|
||||
cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
|
||||
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
|
||||
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
|
||||
}
|
||||
else if (flag_L2_gate == 3)
|
||||
{
|
||||
} else if (flag_L2_gate == 3) {
|
||||
cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
|
||||
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
|
||||
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
|
||||
}
|
||||
|
||||
for (int i = 1; i < number_gates_L2; ++i)
|
||||
{
|
||||
for (int i = 1; i < number_gates_L2; ++i) {
|
||||
cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
|
||||
leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
|
||||
gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
|
||||
|
@ -706,8 +654,7 @@ void PredecBlk::compute_area()
|
|||
|
||||
|
||||
pair<double, double> PredecBlk::compute_delays(
|
||||
pair<double, double> inrisetime) // <nand2, nand3>
|
||||
{
|
||||
pair<double, double> inrisetime) { // <nand2, nand3>
|
||||
pair<double, double> ret_val;
|
||||
ret_val.first = 0; // outrisetime_nand2_path
|
||||
ret_val.second = 0; // outrisetime_nand3_path
|
||||
|
@ -720,12 +667,10 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
|
||||
// TODO: following delay calculation part can be greatly simplified.
|
||||
// first check whether a predecoder block is required
|
||||
if (exist)
|
||||
{
|
||||
if (exist) {
|
||||
//Find delay in first level of predecoder block
|
||||
//First find delay in path
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2))
|
||||
{
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
|
||||
//First gate is a NAND2 gate
|
||||
rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
|
||||
c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
|
||||
|
@ -738,8 +683,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
|
||||
|
||||
//Add delays of all but the last inverter in the chain
|
||||
for (i = 1; i < number_gates_L1_nand2_path - 1; ++i)
|
||||
{
|
||||
for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
|
||||
rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
|
||||
c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -754,9 +698,10 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
//Add delay of the last inverter
|
||||
i = number_gates_L1_nand2_path - 1;
|
||||
rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
|
||||
if (flag_L2_gate)
|
||||
{
|
||||
c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
|
||||
if (flag_L2_gate) {
|
||||
c_load = branch_effort_nand2_gate_output *
|
||||
(gate_C(w_L2_n[0], 0, is_dram_) +
|
||||
gate_C(w_L2_p[0], 0, is_dram_));
|
||||
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
|
||||
tf = rd * (c_intrinsic + c_load);
|
||||
|
@ -764,9 +709,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
delay_nand2_path += this_delay;
|
||||
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
|
||||
power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
|
||||
}
|
||||
else
|
||||
{ //First level directly drives decoder output load
|
||||
} else { //First level directly drives decoder output load
|
||||
c_load = C_ld_predec_blk_out;
|
||||
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
|
||||
|
@ -778,8 +721,8 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
}
|
||||
}
|
||||
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3))
|
||||
{ //Check if the number of gates in the first level is more than 1.
|
||||
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
|
||||
//Check if the number of gates in the first level is more than 1.
|
||||
//First gate is a NAND3 gate
|
||||
rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
|
||||
c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
|
||||
|
@ -792,8 +735,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
|
||||
|
||||
//Add delays of all but the last inverter in the chain
|
||||
for (i = 1; i < number_gates_L1_nand3_path - 1; ++i)
|
||||
{
|
||||
for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
|
||||
rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
|
||||
c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -808,9 +750,10 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
//Add delay of the last inverter
|
||||
i = number_gates_L1_nand3_path - 1;
|
||||
rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
|
||||
if (flag_L2_gate)
|
||||
{
|
||||
c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
|
||||
if (flag_L2_gate) {
|
||||
c_load = branch_effort_nand3_gate_output *
|
||||
(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
|
||||
is_dram_));
|
||||
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
|
||||
tf = rd * (c_intrinsic + c_load);
|
||||
|
@ -818,9 +761,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
delay_nand3_path += this_delay;
|
||||
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
|
||||
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
|
||||
}
|
||||
else
|
||||
{ //First level directly drives decoder output load
|
||||
} else { //First level directly drives decoder output load
|
||||
c_load = C_ld_predec_blk_out;
|
||||
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
|
||||
|
@ -833,10 +774,8 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
}
|
||||
|
||||
// Find delay through second level
|
||||
if (flag_L2_gate)
|
||||
{
|
||||
if (flag_L2_gate == 2)
|
||||
{
|
||||
if (flag_L2_gate) {
|
||||
if (flag_L2_gate == 2) {
|
||||
rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
|
||||
c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
|
||||
c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -846,9 +785,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
delay_nand2_path += this_delay;
|
||||
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
|
||||
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
|
||||
}
|
||||
else
|
||||
{ // flag_L2_gate = 3
|
||||
} else { // flag_L2_gate = 3
|
||||
rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
|
||||
c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
|
||||
c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -860,8 +797,7 @@ pair<double, double> PredecBlk::compute_delays(
|
|||
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
|
||||
}
|
||||
|
||||
for (i = 1; i < number_gates_L2 - 1; ++i)
|
||||
{
|
||||
for (i = 1; i < number_gates_L2 - 1; ++i) {
|
||||
rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
|
||||
c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1053,10 +989,8 @@ PredecBlkDrv::PredecBlkDrv(
|
|||
power_nand3_path(),
|
||||
blk(blk_), dec(blk->dec),
|
||||
is_dram_(is_dram),
|
||||
way_select(way_select_)
|
||||
{
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
|
||||
{
|
||||
way_select(way_select_) {
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
|
||||
width_nand2_path_n[i] = 0;
|
||||
width_nand2_path_p[i] = 0;
|
||||
width_nand3_path_n[i] = 0;
|
||||
|
@ -1065,25 +999,18 @@ PredecBlkDrv::PredecBlkDrv(
|
|||
|
||||
number_input_addr_bits = blk->number_input_addr_bits;
|
||||
|
||||
if (way_select > 1)
|
||||
{
|
||||
if (way_select > 1) {
|
||||
flag_driver_exists = 1;
|
||||
number_input_addr_bits = way_select;
|
||||
if (dec->num_in_signals == 2)
|
||||
{
|
||||
if (dec->num_in_signals == 2) {
|
||||
c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
|
||||
num_buffers_driving_2_nand2_load = number_input_addr_bits;
|
||||
}
|
||||
else if (dec->num_in_signals == 3)
|
||||
{
|
||||
} else if (dec->num_in_signals == 3) {
|
||||
c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
|
||||
num_buffers_driving_2_nand3_load = number_input_addr_bits;
|
||||
}
|
||||
}
|
||||
else if (way_select == 0)
|
||||
{
|
||||
if (blk->exist)
|
||||
{
|
||||
} else if (way_select == 0) {
|
||||
if (blk->exist) {
|
||||
flag_driver_exists = 1;
|
||||
}
|
||||
}
|
||||
|
@ -1094,8 +1021,7 @@ PredecBlkDrv::PredecBlkDrv(
|
|||
|
||||
|
||||
|
||||
void PredecBlkDrv::compute_widths()
|
||||
{
|
||||
void PredecBlkDrv::compute_widths() {
|
||||
// The predecode block driver accepts as input the address bits from the h-tree network. For
|
||||
// each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
|
||||
// inversion to generate addrbar and simply treat addrbar as addr.
|
||||
|
@ -1103,61 +1029,54 @@ void PredecBlkDrv::compute_widths()
|
|||
double F;
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
|
||||
|
||||
if (flag_driver_exists)
|
||||
{
|
||||
if (flag_driver_exists) {
|
||||
double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
|
||||
double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
|
||||
|
||||
if (way_select == 0)
|
||||
{
|
||||
if (blk->number_input_addr_bits == 1)
|
||||
{ //2 NAND2 gates
|
||||
if (way_select == 0) {
|
||||
if (blk->number_input_addr_bits == 1) {
|
||||
//2 NAND2 gates
|
||||
num_buffers_driving_2_nand2_load = 1;
|
||||
c_load_nand2_path_out = 2 * C_nand2_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 2)
|
||||
{ //4 NAND2 gates one 2-4 decoder
|
||||
} else if (blk->number_input_addr_bits == 2) {
|
||||
//4 NAND2 gates one 2-4 decoder
|
||||
num_buffers_driving_4_nand2_load = 2;
|
||||
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 3)
|
||||
{ //8 NAND3 gates one 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 3) {
|
||||
//8 NAND3 gates one 3-8 decoder
|
||||
num_buffers_driving_8_nand3_load = 3;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 4)
|
||||
{ //4 + 4 NAND2 gates two 2-4 decoder
|
||||
} else if (blk->number_input_addr_bits == 4) {
|
||||
//4 + 4 NAND2 gates two 2-4 decoder
|
||||
num_buffers_driving_4_nand2_load = 4;
|
||||
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 5)
|
||||
{ //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 5) {
|
||||
//4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
|
||||
//decoder
|
||||
num_buffers_driving_4_nand2_load = 2;
|
||||
num_buffers_driving_8_nand3_load = 3;
|
||||
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 6)
|
||||
{ //8 + 8 NAND3 gates two 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 6) {
|
||||
//8 + 8 NAND3 gates two 3-8 decoder
|
||||
num_buffers_driving_8_nand3_load = 6;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 7)
|
||||
{ //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 7) {
|
||||
//4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
|
||||
//decoder
|
||||
num_buffers_driving_4_nand2_load = 4;
|
||||
num_buffers_driving_8_nand3_load = 3;
|
||||
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 8)
|
||||
{ //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 8) {
|
||||
//4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
|
||||
//decoder
|
||||
num_buffers_driving_4_nand2_load = 2;
|
||||
num_buffers_driving_8_nand3_load = 6;
|
||||
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
else if (blk->number_input_addr_bits == 9)
|
||||
{ //8 + 8 + 8 NAND3 gates three 3-8 decoder
|
||||
} else if (blk->number_input_addr_bits == 9) {
|
||||
//8 + 8 + 8 NAND3 gates three 3-8 decoder
|
||||
num_buffers_driving_8_nand3_load = 9;
|
||||
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
|
||||
}
|
||||
|
@ -1166,8 +1085,8 @@ void PredecBlkDrv::compute_widths()
|
|||
if ((blk->flag_two_unique_paths) ||
|
||||
(blk->number_inputs_L1_gate == 2) ||
|
||||
(number_input_addr_bits == 0) ||
|
||||
((way_select)&&(dec->num_in_signals == 2)))
|
||||
{ //this means that way_select is driving NAND2 in decoder.
|
||||
((way_select) && (dec->num_in_signals == 2))) {
|
||||
//this means that way_select is driving NAND2 in decoder.
|
||||
width_nand2_path_n[0] = g_tp.min_w_nmos_;
|
||||
width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
|
||||
F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
|
||||
|
@ -1184,8 +1103,8 @@ void PredecBlkDrv::compute_widths()
|
|||
|
||||
if ((blk->flag_two_unique_paths) ||
|
||||
(blk->number_inputs_L1_gate == 3) ||
|
||||
((way_select)&&(dec->num_in_signals == 3)))
|
||||
{ //this means that way_select is driving NAND3 in decoder.
|
||||
((way_select) && (dec->num_in_signals == 3))) {
|
||||
//this means that way_select is driving NAND3 in decoder.
|
||||
width_nand3_path_n[0] = g_tp.min_w_nmos_;
|
||||
width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
|
||||
F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
|
||||
|
@ -1204,8 +1123,7 @@ void PredecBlkDrv::compute_widths()
|
|||
|
||||
|
||||
|
||||
void PredecBlkDrv::compute_area()
|
||||
{
|
||||
void PredecBlkDrv::compute_area() {
|
||||
double area_nand2_path = 0;
|
||||
double area_nand3_path = 0;
|
||||
double leak_nand2_path = 0;
|
||||
|
@ -1213,13 +1131,18 @@ void PredecBlkDrv::compute_area()
|
|||
double gate_leak_nand2_path = 0;
|
||||
double gate_leak_nand3_path = 0;
|
||||
|
||||
if (flag_driver_exists)
|
||||
{ // first check whether a predecoder block driver is needed
|
||||
for (int i = 0; i < number_gates_nand2_path; ++i)
|
||||
{
|
||||
area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def);
|
||||
leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
|
||||
gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
|
||||
if (flag_driver_exists) {
|
||||
// first check whether a predecoder block driver is needed
|
||||
for (int i = 0; i < number_gates_nand2_path; ++i) {
|
||||
area_nand2_path +=
|
||||
compute_gate_area(INV, 1, width_nand2_path_p[i],
|
||||
width_nand2_path_n[i], g_tp.cell_h_def);
|
||||
leak_nand2_path +=
|
||||
cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
|
||||
1, inv, is_dram_);
|
||||
gate_leak_nand2_path +=
|
||||
cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
|
||||
1, inv, is_dram_);
|
||||
}
|
||||
area_nand2_path *= (num_buffers_driving_1_nand2_load +
|
||||
num_buffers_driving_2_nand2_load +
|
||||
|
@ -1231,11 +1154,16 @@ void PredecBlkDrv::compute_area()
|
|||
num_buffers_driving_2_nand2_load +
|
||||
num_buffers_driving_4_nand2_load);
|
||||
|
||||
for (int i = 0; i < number_gates_nand3_path; ++i)
|
||||
{
|
||||
area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def);
|
||||
leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
|
||||
gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
|
||||
for (int i = 0; i < number_gates_nand3_path; ++i) {
|
||||
area_nand3_path +=
|
||||
compute_gate_area(INV, 1, width_nand3_path_p[i],
|
||||
width_nand3_path_n[i], g_tp.cell_h_def);
|
||||
leak_nand3_path +=
|
||||
cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
|
||||
1, inv, is_dram_);
|
||||
gate_leak_nand3_path +=
|
||||
cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
|
||||
1, inv, is_dram_);
|
||||
}
|
||||
area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
|
||||
leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
|
||||
|
@ -1253,8 +1181,7 @@ void PredecBlkDrv::compute_area()
|
|||
|
||||
pair<double, double> PredecBlkDrv::compute_delays(
|
||||
double inrisetime_nand2_path,
|
||||
double inrisetime_nand3_path)
|
||||
{
|
||||
double inrisetime_nand3_path) {
|
||||
pair<double, double> ret_val;
|
||||
ret_val.first = 0; // outrisetime_nand2_path
|
||||
ret_val.second = 0; // outrisetime_nand3_path
|
||||
|
@ -1262,10 +1189,8 @@ pair<double, double> PredecBlkDrv::compute_delays(
|
|||
double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
|
||||
double Vdd = g_tp.peri_global.Vdd;
|
||||
|
||||
if (flag_driver_exists)
|
||||
{
|
||||
for (i = 0; i < number_gates_nand2_path - 1; ++i)
|
||||
{
|
||||
if (flag_driver_exists) {
|
||||
for (i = 0; i < number_gates_nand2_path - 1; ++i) {
|
||||
rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
|
||||
c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1278,8 +1203,7 @@ pair<double, double> PredecBlkDrv::compute_delays(
|
|||
}
|
||||
|
||||
// Final inverter drives the predecoder block or the decoder output load
|
||||
if (number_gates_nand2_path != 0)
|
||||
{
|
||||
if (number_gates_nand2_path != 0) {
|
||||
i = number_gates_nand2_path - 1;
|
||||
rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
|
||||
c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1293,8 +1217,7 @@ pair<double, double> PredecBlkDrv::compute_delays(
|
|||
// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
|
||||
}
|
||||
|
||||
for (i = 0; i < number_gates_nand3_path - 1; ++i)
|
||||
{
|
||||
for (i = 0; i < number_gates_nand3_path - 1; ++i) {
|
||||
rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
|
||||
c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1307,8 +1230,7 @@ pair<double, double> PredecBlkDrv::compute_delays(
|
|||
}
|
||||
|
||||
// Final inverter drives the predecoder block or the decoder output load
|
||||
if (number_gates_nand3_path != 0)
|
||||
{
|
||||
if (number_gates_nand3_path != 0) {
|
||||
i = number_gates_nand3_path - 1;
|
||||
rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
|
||||
c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1325,8 +1247,7 @@ pair<double, double> PredecBlkDrv::compute_delays(
|
|||
}
|
||||
|
||||
|
||||
double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
|
||||
{
|
||||
double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
|
||||
return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
|
||||
num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
|
||||
}
|
||||
|
@ -1336,8 +1257,7 @@ double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir)
|
|||
Predec::Predec(
|
||||
PredecBlkDrv * drv1_,
|
||||
PredecBlkDrv * drv2_)
|
||||
:blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_)
|
||||
{
|
||||
: blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
|
||||
driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
|
||||
drv1->power_nand3_path.readOp.leakage +
|
||||
drv2->power_nand2_path.readOp.leakage +
|
||||
|
@ -1399,8 +1319,7 @@ void PredecBlkDrv::leakage_feedback(double temperature)
|
|||
}
|
||||
}
|
||||
|
||||
double Predec::compute_delays(double inrisetime)
|
||||
{
|
||||
double Predec::compute_delays(double inrisetime) {
|
||||
// TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
|
||||
pair<double, double> tmp_pair1, tmp_pair2;
|
||||
tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
|
||||
|
@ -1429,7 +1348,6 @@ double Predec::compute_delays(double inrisetime)
|
|||
return tmp_pair1.second;
|
||||
}
|
||||
|
||||
|
||||
void Predec::leakage_feedback(double temperature)
|
||||
{
|
||||
drv1->leakage_feedback(temperature);
|
||||
|
@ -1465,8 +1383,7 @@ void Predec::leakage_feedback(double temperature)
|
|||
// returns <delay, risetime>
|
||||
pair<double, double> Predec::get_max_delay_before_decoder(
|
||||
pair<double, double> input_pair1,
|
||||
pair<double, double> input_pair2)
|
||||
{
|
||||
pair<double, double> input_pair2) {
|
||||
pair<double, double> ret_val;
|
||||
double delay;
|
||||
|
||||
|
@ -1474,20 +1391,17 @@ pair<double, double> Predec::get_max_delay_before_decoder(
|
|||
ret_val.first = delay;
|
||||
ret_val.second = input_pair1.first;
|
||||
delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
|
||||
if (ret_val.first < delay)
|
||||
{
|
||||
if (ret_val.first < delay) {
|
||||
ret_val.first = delay;
|
||||
ret_val.second = input_pair1.second;
|
||||
}
|
||||
delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
|
||||
if (ret_val.first < delay)
|
||||
{
|
||||
if (ret_val.first < delay) {
|
||||
ret_val.first = delay;
|
||||
ret_val.second = input_pair2.first;
|
||||
}
|
||||
delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
|
||||
if (ret_val.first < delay)
|
||||
{
|
||||
if (ret_val.first < delay) {
|
||||
ret_val.first = delay;
|
||||
ret_val.second = input_pair2.second;
|
||||
}
|
||||
|
@ -1497,7 +1411,8 @@ pair<double, double> Predec::get_max_delay_before_decoder(
|
|||
|
||||
|
||||
|
||||
Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram)
|
||||
Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
|
||||
bool is_dram)
|
||||
: number_gates(0),
|
||||
min_number_gates(2),
|
||||
c_gate_load(c_gate_load_),
|
||||
|
@ -1505,10 +1420,8 @@ Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bo
|
|||
r_wire_load(r_wire_load_),
|
||||
delay(0),
|
||||
power(),
|
||||
is_dram_(is_dram)
|
||||
{
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++)
|
||||
{
|
||||
is_dram_(is_dram) {
|
||||
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
|
||||
width_n[i] = 0;
|
||||
width_p[i] = 0;
|
||||
}
|
||||
|
@ -1517,8 +1430,7 @@ Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bo
|
|||
}
|
||||
|
||||
|
||||
void Driver::compute_widths()
|
||||
{
|
||||
void Driver::compute_widths() {
|
||||
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
|
||||
double c_load = c_gate_load + c_wire_load;
|
||||
width_n[0] = g_tp.min_w_nmos_;
|
||||
|
@ -1539,14 +1451,12 @@ void Driver::compute_widths()
|
|||
|
||||
|
||||
|
||||
double Driver::compute_delay(double inrisetime)
|
||||
{
|
||||
double Driver::compute_delay(double inrisetime) {
|
||||
int i;
|
||||
double rd, c_load, c_intrinsic, tf;
|
||||
double this_delay = 0;
|
||||
|
||||
for (i = 0; i < number_gates - 1; ++i)
|
||||
{
|
||||
for (i = 0; i < number_gates - 1; ++i) {
|
||||
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
|
||||
c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
|
||||
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
|
@ -1555,9 +1465,14 @@ double Driver::compute_delay(double inrisetime)
|
|||
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay += this_delay;
|
||||
inrisetime = this_delay / (1.0 - 0.5);
|
||||
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||||
power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd;
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
|
||||
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
|
||||
g_tp.peri_global.Vdd;
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
|
||||
g_tp.peri_global.Vdd;
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
|
||||
g_tp.peri_global.Vdd;
|
||||
}
|
||||
|
||||
i = number_gates - 1;
|
||||
|
@ -1565,12 +1480,18 @@ double Driver::compute_delay(double inrisetime)
|
|||
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
|
||||
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
|
||||
drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
|
||||
tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load);
|
||||
tf = rd * (c_intrinsic + c_load) + r_wire_load *
|
||||
(c_wire_load / 2 + c_gate_load);
|
||||
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
|
||||
delay += this_delay;
|
||||
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
|
||||
power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd;
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd;
|
||||
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
|
||||
g_tp.peri_global.Vdd;
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
|
||||
g_tp.peri_global.Vdd;
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
|
||||
g_tp.peri_global.Vdd;
|
||||
|
||||
return this_delay / (1.0 - 0.5);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -42,8 +43,7 @@
|
|||
using namespace std;
|
||||
|
||||
|
||||
class Decoder : public Component
|
||||
{
|
||||
class Decoder : public Component {
|
||||
public:
|
||||
Decoder(
|
||||
int _num_dec_signals,
|
||||
|
@ -80,8 +80,7 @@ class Decoder : public Component
|
|||
|
||||
|
||||
|
||||
class PredecBlk : public Component
|
||||
{
|
||||
class PredecBlk : public Component {
|
||||
public:
|
||||
PredecBlk(
|
||||
int num_dec_signals,
|
||||
|
@ -133,8 +132,7 @@ class PredecBlk : public Component
|
|||
};
|
||||
|
||||
|
||||
class PredecBlkDrv : public Component
|
||||
{
|
||||
class PredecBlkDrv : public Component {
|
||||
public:
|
||||
PredecBlkDrv(
|
||||
int way_select,
|
||||
|
@ -180,14 +178,12 @@ class PredecBlkDrv : public Component
|
|||
double inrisetime_nand2_path,
|
||||
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
|
||||
|
||||
inline int num_addr_bits_nand2_path()
|
||||
{
|
||||
inline int num_addr_bits_nand2_path() {
|
||||
return num_buffers_driving_1_nand2_load +
|
||||
num_buffers_driving_2_nand2_load +
|
||||
num_buffers_driving_4_nand2_load;
|
||||
}
|
||||
inline int num_addr_bits_nand3_path()
|
||||
{
|
||||
inline int num_addr_bits_nand3_path() {
|
||||
return num_buffers_driving_2_nand3_load +
|
||||
num_buffers_driving_8_nand3_load;
|
||||
}
|
||||
|
@ -196,8 +192,7 @@ class PredecBlkDrv : public Component
|
|||
|
||||
|
||||
|
||||
class Predec : public Component
|
||||
{
|
||||
class Predec : public Component {
|
||||
public:
|
||||
Predec(
|
||||
PredecBlkDrv * drv1,
|
||||
|
@ -223,8 +218,7 @@ class Predec : public Component
|
|||
|
||||
|
||||
|
||||
class Driver : public Component
|
||||
{
|
||||
class Driver : public Component {
|
||||
public:
|
||||
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -39,14 +40,16 @@
|
|||
|
||||
Htree2::Htree2(
|
||||
enum Wire_type wire_model, double mat_w, double mat_h,
|
||||
int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type,
|
||||
int a_bits, int d_inbits, int search_data_in, int d_outbits,
|
||||
int search_data_out, int bl, int wl, enum Htree_type htree_type,
|
||||
bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt)
|
||||
: in_rise_time(0), out_rise_time(0),
|
||||
tree_type(htree_type), mat_width(mat_w), mat_height(mat_h),
|
||||
add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits),
|
||||
add_bits(a_bits), data_in_bits(d_inbits),
|
||||
search_data_in_bits(search_data_in), data_out_bits(d_outbits),
|
||||
search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl),
|
||||
uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt)
|
||||
{
|
||||
uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model),
|
||||
deviceType(dt) {
|
||||
assert(ndbl >= 2 && ndwl >= 2);
|
||||
|
||||
// if (ndbl == 1 && ndwl == 1)
|
||||
|
@ -65,8 +68,7 @@ Htree2::Htree2(
|
|||
min_w_nmos = g_tp.min_w_nmos_;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
||||
|
||||
switch (htree_type)
|
||||
{
|
||||
switch (htree_type) {
|
||||
case Add_htree:
|
||||
wire_bw = init_wire_bw = add_bits;
|
||||
in_htree();
|
||||
|
@ -102,8 +104,7 @@ Htree2::Htree2(
|
|||
|
||||
|
||||
// nand gate sizing calculation
|
||||
void Htree2::input_nand(double s1, double s2, double l_eff)
|
||||
{
|
||||
void Htree2::input_nand(double s1, double s2, double l_eff) {
|
||||
Wire w1(wt, l_eff);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// input capacitance of a repeater = input capacitance of nand.
|
||||
|
@ -114,7 +115,8 @@ void Htree2::input_nand(double s1, double s2, double l_eff)
|
|||
(drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
|
||||
2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0));
|
||||
delay += horowitz(w1.out_rise_time, tc,
|
||||
deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
deviceType->Vth / deviceType->Vdd, deviceType->Vth /
|
||||
deviceType->Vdd, RISE);
|
||||
power.readOp.dynamic += 0.5 *
|
||||
(2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)
|
||||
+ drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
|
@ -126,31 +128,40 @@ void Htree2::input_nand(double s1, double s2, double l_eff)
|
|||
+ drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)
|
||||
+ 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) *
|
||||
deviceType->Vdd * deviceType->Vdd * wire_bw ;
|
||||
power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
|
||||
power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd;
|
||||
power.readOp.leakage += (wire_bw *
|
||||
cmos_Isub_leakage(min_w_nmos * (nsize * 2),
|
||||
min_w_pmos * nsize * 2, 2,
|
||||
nand)) * deviceType->Vdd;
|
||||
power.readOp.gate_leakage += (wire_bw *
|
||||
cmos_Ig_leakage(min_w_nmos * (nsize * 2),
|
||||
min_w_pmos * nsize * 2, 2,
|
||||
nand)) * deviceType->Vdd;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// tristate buffer model consisting of not, nand, nor, and driver transistors
|
||||
void Htree2::output_buffer(double s1, double s2, double l_eff)
|
||||
{
|
||||
void Htree2::output_buffer(double s1, double s2, double l_eff) {
|
||||
Wire w1(wt, l_eff);
|
||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||
// input capacitance of repeater = input capacitance of nand + nor.
|
||||
double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
|
||||
double s_eff = //stage eff of a repeater in a wire
|
||||
(gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/
|
||||
(gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6,
|
||||
true)) /
|
||||
gate_C(s2 * (min_w_nmos + min_w_pmos), 0);
|
||||
double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0));
|
||||
double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 /
|
||||
(s_eff * gate_C(min_w_pmos, 0));
|
||||
size = (size < 1) ? 1 : size;
|
||||
|
||||
double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1);
|
||||
double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1);
|
||||
double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
double cap_nand_out =
|
||||
drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
|
||||
gate_C(tr_size * min_w_pmos, 0);
|
||||
double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
double cap_ptrans_out = 2 *
|
||||
(drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) +
|
||||
gate_C(s1 * (min_w_nmos + min_w_pmos), 0);
|
||||
|
||||
|
@ -158,7 +169,8 @@ void Htree2::output_buffer(double s1, double s2, double l_eff)
|
|||
|
||||
|
||||
delay += horowitz(w1.out_rise_time, tc,
|
||||
deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
||||
deviceType->Vth / deviceType->Vdd, deviceType->Vth /
|
||||
deviceType->Vdd, RISE);
|
||||
|
||||
//nand
|
||||
power.readOp.dynamic += 0.5 *
|
||||
|
@ -213,24 +225,49 @@ void Htree2::output_buffer(double s1, double s2, double l_eff)
|
|||
deviceType->Vdd * deviceType->Vdd * init_wire_bw;
|
||||
|
||||
if (uca_tree) {
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
|
||||
2, 1, inv) *
|
||||
deviceType->Vdd * wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nand) * deviceType->Vdd * wire_bw;//nand
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nor) * deviceType->Vdd * wire_bw;//nor
|
||||
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
//power.readOp.gate_leakage *=;
|
||||
}
|
||||
else {
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
|
||||
1, inv) *
|
||||
deviceType->Vdd * wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nand) * deviceType->Vdd * wire_bw;//nand
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nor) * deviceType->Vdd * wire_bw;//nor
|
||||
} else {
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size *
|
||||
2, 1, inv) *
|
||||
deviceType->Vdd * wire_bw;/*inverter + output tr*/
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nand) * deviceType->Vdd * wire_bw;//nand
|
||||
power.readOp.leakage +=
|
||||
cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nor) * deviceType->Vdd * wire_bw;//nor
|
||||
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand
|
||||
power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor
|
||||
//power.readOp.gate_leakage *=deviceType->Vdd*wire_bw;
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2,
|
||||
1, inv) *
|
||||
deviceType->Vdd * wire_bw;/*inverter + output tr*/
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nand) * deviceType->Vdd * wire_bw;//nand
|
||||
power.readOp.gate_leakage +=
|
||||
cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2,
|
||||
nor) * deviceType->Vdd * wire_bw;//nor
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -251,8 +288,7 @@ void Htree2::output_buffer(double s1, double s2, double l_eff)
|
|||
* links.
|
||||
*/
|
||||
void
|
||||
Htree2::in_htree()
|
||||
{
|
||||
Htree2::in_htree() {
|
||||
//temp var
|
||||
double s1 = 0, s2 = 0, s3 = 0;
|
||||
double l_eff = 0;
|
||||
|
@ -264,45 +300,70 @@ Htree2::in_htree()
|
|||
int v = (int) _log2(ndbl / 2); // vertical nodes
|
||||
double len_temp;
|
||||
double ht_temp;
|
||||
if (uca_tree)
|
||||
{//Sheng: this computation do not consider the wires that route from edge to middle.
|
||||
ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
if (uca_tree) {
|
||||
//Sheng: this computation do not consider the wires that route from
|
||||
//edge to middle.
|
||||
ht_temp = (mat_height * ndbl / 2 +
|
||||
/* since uca_tree models interbank tree,
|
||||
mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
2 * (1 - pow(0.5, h)))) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
((add_bits + data_in_bits + data_out_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
2 * (1 - pow(0.5, v)))) / 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if (ndwl == ndbl) {
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
((add_bits + (search_data_in_bits +
|
||||
search_data_out_bits)) * (ndbl / 2 - 1) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * h)
|
||||
) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else if (ndwl > ndbl) {
|
||||
((add_bits + (search_data_in_bits +
|
||||
search_data_out_bits)) * (ndwl / 2 - 1) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * v)) / 2;
|
||||
} else if (ndwl > ndbl) {
|
||||
double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
|
||||
((add_bits + + (search_data_in_bits +
|
||||
search_data_out_bits)) *
|
||||
((ndbl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
(2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else {
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
(ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * v)) / 2;
|
||||
} else {
|
||||
double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
((ndwl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * h)
|
||||
) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
((ndwl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
(h + 2 * (1 - pow(0.5, v - h)))) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -315,14 +376,12 @@ Htree2::in_htree()
|
|||
len = len_temp;
|
||||
ht = ht_temp / 2;
|
||||
|
||||
while (v > 0 || h > 0)
|
||||
{
|
||||
while (v > 0 || h > 0) {
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
|
||||
if (h > v)
|
||||
{
|
||||
if (h > v) {
|
||||
//the iteration considers only one horizontal link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, len / 2); // ver
|
||||
|
@ -331,9 +390,7 @@ Htree2::in_htree()
|
|||
wtemp3 = 0;
|
||||
h--;
|
||||
option = 0;
|
||||
}
|
||||
else if (v>0 && h>0)
|
||||
{
|
||||
} else if (v > 0 && h > 0) {
|
||||
//considers one horizontal link and one vertical link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, ht); // ver
|
||||
|
@ -345,9 +402,7 @@ Htree2::in_htree()
|
|||
v--;
|
||||
h--;
|
||||
option = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// considers only one vertical link
|
||||
assert(h == 0);
|
||||
wtemp1 = new Wire(wt, ht); // ver
|
||||
|
@ -364,39 +419,32 @@ Htree2::in_htree()
|
|||
power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw;
|
||||
power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
|
||||
power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
|
||||
if ((uca_tree == false && option == 2) || search_tree==true)
|
||||
{
|
||||
if ((uca_tree == false && option == 2) || search_tree == true) {
|
||||
wire_bw *= 2; // wire bandwidth doubles only for vertical branches
|
||||
}
|
||||
|
||||
if (uca_tree == false)
|
||||
{
|
||||
if (len_temp > wtemp1->repeater_spacing)
|
||||
{
|
||||
if (uca_tree == false) {
|
||||
if (len_temp > wtemp1->repeater_spacing) {
|
||||
s1 = wtemp1->repeater_size;
|
||||
l_eff = wtemp1->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
|
||||
} else {
|
||||
s1 = (len_temp / wtemp1->repeater_spacing) *
|
||||
wtemp1->repeater_size;
|
||||
l_eff = len_temp;
|
||||
}
|
||||
|
||||
if (ht_temp > wtemp2->repeater_spacing)
|
||||
{
|
||||
if (ht_temp > wtemp2->repeater_spacing) {
|
||||
s2 = wtemp2->repeater_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
|
||||
} else {
|
||||
s2 = (len_temp / wtemp2->repeater_spacing) *
|
||||
wtemp2->repeater_size;
|
||||
}
|
||||
// first level
|
||||
input_nand(s1, s2, l_eff);
|
||||
}
|
||||
|
||||
|
||||
if (option != 1)
|
||||
{
|
||||
if (option != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -407,25 +455,22 @@ Htree2::in_htree()
|
|||
power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
|
||||
if (uca_tree)
|
||||
{
|
||||
if (uca_tree) {
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
}
|
||||
else
|
||||
{
|
||||
power.readOp.gate_leakage +=
|
||||
wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
} else {
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
power.readOp.gate_leakage +=
|
||||
wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
wire_bw *= 2;
|
||||
|
||||
if (ht_temp > wtemp3->repeater_spacing)
|
||||
{
|
||||
if (ht_temp > wtemp3->repeater_spacing) {
|
||||
s3 = wtemp3->repeater_size;
|
||||
l_eff = wtemp3->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
|
||||
} else {
|
||||
s3 = (len_temp / wtemp3->repeater_spacing) *
|
||||
wtemp3->repeater_size;
|
||||
l_eff = ht_temp;
|
||||
}
|
||||
|
||||
|
@ -452,8 +497,7 @@ Htree2::in_htree()
|
|||
* hor. links left. After this it goes through the remaining vertical
|
||||
* links.
|
||||
*/
|
||||
void Htree2::out_htree()
|
||||
{
|
||||
void Htree2::out_htree() {
|
||||
//temp var
|
||||
double s1 = 0, s2 = 0, s3 = 0;
|
||||
double l_eff = 0;
|
||||
|
@ -465,46 +509,69 @@ void Htree2::out_htree()
|
|||
int v = (int) _log2(ndbl / 2);
|
||||
double len_temp;
|
||||
double ht_temp;
|
||||
if (uca_tree)
|
||||
{
|
||||
ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
if (uca_tree) {
|
||||
ht_temp = (mat_height * ndbl / 2 +
|
||||
/* since uca_tree models interbank tree,
|
||||
mat_height => bank height */
|
||||
((add_bits + data_in_bits + data_out_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
2 * (1 - pow(0.5, h)))) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch *
|
||||
((add_bits + data_in_bits + data_out_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
2 * (1 - pow(0.5, v)))) / 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if (ndwl == ndbl) {
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
((add_bits + (search_data_in_bits +
|
||||
search_data_out_bits)) *
|
||||
(ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * h)
|
||||
) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
((add_bits + (search_data_in_bits +
|
||||
search_data_out_bits)) * (ndwl / 2 - 1) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * v)) / 2;
|
||||
|
||||
}
|
||||
else if (ndwl > ndbl) {
|
||||
} else if (ndwl > ndbl) {
|
||||
double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2));
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch *
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
((ndbl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
(2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2;
|
||||
}
|
||||
else {
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
(ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * v)) / 2;
|
||||
} else {
|
||||
double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2));
|
||||
ht_temp = ((mat_height * ndbl / 2) +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h)
|
||||
((add_bits +
|
||||
(search_data_in_bits + search_data_out_bits)) *
|
||||
((ndwl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
((data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch * h)
|
||||
) / 2;
|
||||
len_temp = (mat_width * ndwl / 2 +
|
||||
((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2;
|
||||
((add_bits + (search_data_in_bits +
|
||||
search_data_out_bits)) *
|
||||
((ndwl / 2 - 1) + excess_part) *
|
||||
g_tp.wire_outside_mat.pitch) +
|
||||
(data_in_bits + data_out_bits) *
|
||||
g_tp.wire_outside_mat.pitch *
|
||||
(h + 2 * (1 - pow(0.5, v - h)))) / 2;
|
||||
}
|
||||
}
|
||||
area.h = ht_temp * 2;
|
||||
|
@ -517,8 +584,7 @@ void Htree2::out_htree()
|
|||
len = len_temp;
|
||||
ht = ht_temp / 2;
|
||||
|
||||
while (v > 0 || h > 0)
|
||||
{ //finds delay/power of each link in the tree
|
||||
while (v > 0 || h > 0) { //finds delay/power of each link in the tree
|
||||
if (wtemp1) delete wtemp1;
|
||||
if (wtemp2) delete wtemp2;
|
||||
if (wtemp3) delete wtemp3;
|
||||
|
@ -532,8 +598,7 @@ void Htree2::out_htree()
|
|||
wtemp3 = 0;
|
||||
h--;
|
||||
option = 0;
|
||||
}
|
||||
else if (v>0 && h>0) {
|
||||
} else if (v > 0 && h > 0) {
|
||||
//considers one horizontal link and one vertical link
|
||||
wtemp1 = new Wire(wt, len); // hor
|
||||
wtemp2 = new Wire(wt, ht); // ver
|
||||
|
@ -545,8 +610,7 @@ void Htree2::out_htree()
|
|||
v--;
|
||||
h--;
|
||||
option = 1;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// considers only one vertical link
|
||||
assert(h == 0);
|
||||
wtemp1 = new Wire(wt, ht); // hor
|
||||
|
@ -562,39 +626,31 @@ void Htree2::out_htree()
|
|||
power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw;
|
||||
power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw;
|
||||
power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
if ((uca_tree == false && option == 2) || search_tree==true)
|
||||
{
|
||||
if ((uca_tree == false && option == 2) || search_tree == true) {
|
||||
wire_bw *= 2;
|
||||
}
|
||||
|
||||
if (uca_tree == false)
|
||||
{
|
||||
if (len_temp > wtemp1->repeater_spacing)
|
||||
{
|
||||
if (uca_tree == false) {
|
||||
if (len_temp > wtemp1->repeater_spacing) {
|
||||
s1 = wtemp1->repeater_size;
|
||||
l_eff = wtemp1->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size;
|
||||
} else {
|
||||
s1 = (len_temp / wtemp1->repeater_spacing) *
|
||||
wtemp1->repeater_size;
|
||||
l_eff = len_temp;
|
||||
}
|
||||
if (ht_temp > wtemp2->repeater_spacing)
|
||||
{
|
||||
if (ht_temp > wtemp2->repeater_spacing) {
|
||||
s2 = wtemp2->repeater_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size;
|
||||
} else {
|
||||
s2 = (len_temp / wtemp2->repeater_spacing) *
|
||||
wtemp2->repeater_size;
|
||||
}
|
||||
// first level
|
||||
output_buffer(s1, s2, l_eff);
|
||||
}
|
||||
|
||||
|
||||
if (option != 1)
|
||||
{
|
||||
if (option != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -605,25 +661,22 @@ void Htree2::out_htree()
|
|||
power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw;
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
//cout<<"power.readOp.gate_leakage"<<power.readOp.gate_leakage<<endl;
|
||||
if (uca_tree)
|
||||
{
|
||||
if (uca_tree) {
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
}
|
||||
else
|
||||
{
|
||||
power.readOp.gate_leakage +=
|
||||
wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
} else {
|
||||
power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw);
|
||||
power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw;
|
||||
power.readOp.gate_leakage +=
|
||||
wtemp2->power.readOp.gate_leakage * wire_bw;
|
||||
wire_bw *= 2;
|
||||
|
||||
if (ht_temp > wtemp3->repeater_spacing)
|
||||
{
|
||||
if (ht_temp > wtemp3->repeater_spacing) {
|
||||
s3 = wtemp3->repeater_size;
|
||||
l_eff = wtemp3->repeater_spacing;
|
||||
}
|
||||
else
|
||||
{
|
||||
s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size;
|
||||
} else {
|
||||
s3 = (len_temp / wtemp3->repeater_spacing) *
|
||||
wtemp3->repeater_size;
|
||||
l_eff = ht_temp;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -44,8 +45,7 @@
|
|||
// leakge power includes entire htree in a bank (when uca_tree == false)
|
||||
// leakge power includes only part to one bank when uca_tree == true
|
||||
|
||||
class Htree2 : public Component
|
||||
{
|
||||
class Htree2 : public Component {
|
||||
public:
|
||||
Htree2(enum Wire_type wire_model,
|
||||
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
|
||||
|
@ -64,8 +64,7 @@ class Htree2 : public Component
|
|||
|
||||
double in_rise_time, out_rise_time;
|
||||
|
||||
void set_in_rise_time(double rt)
|
||||
{
|
||||
void set_in_rise_time(double rt) {
|
||||
in_rise_time = rt;
|
||||
}
|
||||
|
||||
|
@ -81,7 +80,11 @@ class Htree2 : public Component
|
|||
double htree_vnodes;
|
||||
double mat_width;
|
||||
double mat_height;
|
||||
int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
|
||||
int add_bits;
|
||||
int data_in_bits;
|
||||
int search_data_in_bits;
|
||||
int data_out_bits;
|
||||
int search_data_out_bits;
|
||||
int ndbl, ndwl;
|
||||
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
|
||||
bool search_tree;
|
||||
|
|
File diff suppressed because it is too large
Load diff
758
ext/mcpat/cacti/mat.cc
Executable file → Normal file
758
ext/mcpat/cacti/mat.cc
Executable file → Normal file
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -39,8 +40,7 @@
|
|||
#include "subarray.h"
|
||||
#include "wire.h"
|
||||
|
||||
class Mat : public Component
|
||||
{
|
||||
class Mat : public Component {
|
||||
public:
|
||||
Mat(const DynamicParameter & dyn_p);
|
||||
~Mat();
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -45,14 +46,12 @@ int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cyc
|
|||
|
||||
Nuca::Nuca(
|
||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
||||
):deviceType(dt)
|
||||
{
|
||||
): deviceType(dt) {
|
||||
init_cont();
|
||||
}
|
||||
|
||||
void
|
||||
Nuca::init_cont()
|
||||
{
|
||||
Nuca::init_cont() {
|
||||
FILE *cont;
|
||||
char line[5000];
|
||||
char jk[5000];
|
||||
|
@ -68,7 +67,8 @@ Nuca::init_cont()
|
|||
for (int l = 0; l < 7; l++) {
|
||||
int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
|
||||
assert(fscanf(cont, "%[^\n]\n", line) != EOF);
|
||||
sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d",jk, &temp[0], &temp[1], &temp[2], &temp[3],
|
||||
sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
|
||||
&temp[0], &temp[1], &temp[2], &temp[3],
|
||||
&temp[4], &temp[5], &temp[6], &temp[7]);
|
||||
}
|
||||
}
|
||||
|
@ -78,8 +78,7 @@ Nuca::init_cont()
|
|||
}
|
||||
|
||||
void
|
||||
Nuca::print_cont_stats()
|
||||
{
|
||||
Nuca::print_cont_stats() {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
for (int j = 2; j < 5; j++) {
|
||||
for (int k = 0; k < ROUTER_TYPES; k++) {
|
||||
|
@ -104,8 +103,7 @@ Nuca::~Nuca(){
|
|||
|
||||
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
|
||||
int
|
||||
Nuca::calc_cycles(double lat, double oper_freq)
|
||||
{
|
||||
Nuca::calc_cycles(double lat, double oper_freq) {
|
||||
//TODO: convert latch delay to FO4 */
|
||||
double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
|
||||
cycle_time -= LATCH_DELAY;
|
||||
|
@ -138,8 +136,7 @@ nuca_org_t::~nuca_org_t() {
|
|||
* NUCA configuration
|
||||
*/
|
||||
void
|
||||
Nuca::sim_nuca()
|
||||
{
|
||||
Nuca::sim_nuca() {
|
||||
/* temp variables */
|
||||
int it, ro, wr;
|
||||
int num_cyc;
|
||||
|
@ -196,7 +193,10 @@ Nuca::sim_nuca()
|
|||
if (g_ip->cores <= 4) core_in = 2;
|
||||
else if (g_ip->cores <= 8) core_in = 3;
|
||||
else if (g_ip->cores <= 16) core_in = 4;
|
||||
else {cout << "Number of cores should be <= 16!\n"; exit(0);}
|
||||
else {
|
||||
cout << "Number of cores should be <= 16!\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
|
||||
// set the lower bound to an appropriate value. this depends on cache associativity
|
||||
|
@ -210,18 +210,15 @@ Nuca::sim_nuca()
|
|||
|
||||
iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
|
||||
|
||||
if (g_ip->force_wiretype)
|
||||
{
|
||||
if (g_ip->force_wiretype) {
|
||||
if (g_ip->wt == Low_swing) {
|
||||
wt_min = Low_swing;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing - 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
wt_min = Global;
|
||||
wt_max = Low_swing;
|
||||
}
|
||||
|
@ -229,14 +226,16 @@ Nuca::sim_nuca()
|
|||
if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
|
||||
g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
|
||||
g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
|
||||
fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n");
|
||||
fprintf(stderr, "Incorrect bank count value! Please fix the ",
|
||||
"value in cache.cfg\n");
|
||||
}
|
||||
bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
|
||||
iterations = bank_start + 1;
|
||||
g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
|
||||
}
|
||||
cout << "Simulating various NUCA configurations\n";
|
||||
for (it=bank_start; it<iterations; it++) { /* different bank count values */
|
||||
for (it = bank_start; it < iterations; it++) {
|
||||
/* different bank count values */
|
||||
ures.tag_array2 = &tag;
|
||||
ures.data_array2 = &data;
|
||||
/*
|
||||
|
@ -249,8 +248,7 @@ Nuca::sim_nuca()
|
|||
|
||||
for (wr = wt_min; wr <= wt_max; wr++) {
|
||||
|
||||
for (ro=0; ro<ROUTER_TYPES; ro++)
|
||||
{
|
||||
for (ro = 0; ro < ROUTER_TYPES; ro++) {
|
||||
flit_width = (int) router_s[ro]->flit_size; //initialize router
|
||||
nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
|
||||
|
||||
|
@ -264,10 +262,14 @@ Nuca::sim_nuca()
|
|||
wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
|
||||
|
||||
|
||||
hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
ver_hop_lat = calc_cycles(wire_vertical[wr]->delay,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
hor_hop_lat =
|
||||
calc_cycles(wire_horizontal[wr]->delay,
|
||||
1 /(nuca_list.back()->nuca_pda.cycle_time *
|
||||
.001));
|
||||
ver_hop_lat =
|
||||
calc_cycles(wire_vertical[wr]->delay,
|
||||
1 / (nuca_list.back()->nuca_pda.cycle_time *
|
||||
.001));
|
||||
|
||||
/*
|
||||
* assume a grid like topology and explore for optimal network
|
||||
|
@ -310,9 +312,12 @@ Nuca::sim_nuca()
|
|||
avg_vhop = totno_vhops / bank_count;
|
||||
|
||||
/* net access latency */
|
||||
curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) +
|
||||
curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
|
||||
avg_hop) +
|
||||
calc_cycles(ures.access_time,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001));
|
||||
1 /
|
||||
(nuca_list.back()->nuca_pda.cycle_time *
|
||||
.001));
|
||||
|
||||
/* avg access lat of nuca */
|
||||
avg_dyn_power =
|
||||
|
@ -364,7 +369,9 @@ Nuca::sim_nuca()
|
|||
nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
|
||||
|
||||
num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
|
||||
1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/));
|
||||
1 /
|
||||
(nuca_list.back()->nuca_pda.cycle_time *
|
||||
.001/*GHz*/));
|
||||
if (num_cyc % 2 != 0) num_cyc++;
|
||||
if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
|
||||
|
||||
|
@ -373,8 +380,7 @@ Nuca::sim_nuca()
|
|||
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
||||
nuca_list.back()->contention =
|
||||
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
nuca_list.back()->nuca_pda.delay = opt_acclat +
|
||||
cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
|
||||
nuca_list.back()->contention =
|
||||
|
@ -405,14 +411,12 @@ Nuca::sim_nuca()
|
|||
g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
|
||||
|
||||
list<nuca_org_t *>::iterator niter;
|
||||
for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter)
|
||||
{
|
||||
for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
|
||||
delete *niter;
|
||||
}
|
||||
nuca_list.clear();
|
||||
|
||||
for(int i=0; i < ROUTER_TYPES; i++)
|
||||
{
|
||||
for (int i = 0; i < ROUTER_TYPES; i++) {
|
||||
delete router_s[i];
|
||||
}
|
||||
g_ip->display_ip();
|
||||
|
@ -428,8 +432,7 @@ Nuca::sim_nuca()
|
|||
|
||||
|
||||
void
|
||||
Nuca::print_nuca (nuca_org_t *fr)
|
||||
{
|
||||
Nuca::print_nuca (nuca_org_t *fr) {
|
||||
printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
|
||||
"----------\n\n");
|
||||
printf("Optimal number of banks - %d\n", fr->bank_count);
|
||||
|
@ -447,24 +450,19 @@ Nuca::print_nuca (nuca_org_t *fr)
|
|||
if (fr->h_wire->wt == Global) {
|
||||
printf("\tWire type - Full swing global wires with least "
|
||||
"possible delay\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_5) {
|
||||
} else if (fr->h_wire->wt == Global_5) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"5%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_10) {
|
||||
} else if (fr->h_wire->wt == Global_10) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"10%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_20) {
|
||||
} else if (fr->h_wire->wt == Global_20) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"20%% delay penalty\n");
|
||||
}
|
||||
else if (fr->h_wire->wt == Global_30) {
|
||||
} else if (fr->h_wire->wt == Global_30) {
|
||||
printf("\tWire type - Full swing global wires with "
|
||||
"30%% delay penalty\n");
|
||||
}
|
||||
else if(fr->h_wire->wt == Low_swing) {
|
||||
} else if (fr->h_wire->wt == Low_swing) {
|
||||
printf("\tWire type - Low swing wires\n");
|
||||
}
|
||||
|
||||
|
@ -489,8 +487,7 @@ Nuca::print_nuca (nuca_org_t *fr)
|
|||
|
||||
|
||||
nuca_org_t *
|
||||
Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
||||
{
|
||||
Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
|
||||
double cost = 0;
|
||||
double min_cost = BIGNUM;
|
||||
nuca_org_t *res = NULL;
|
||||
|
@ -528,8 +525,7 @@ Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
|||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else if (g_ip->ed == 2) {
|
||||
} else if (g_ip->ed == 2) {
|
||||
cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
|
||||
((*niter)->nuca_pda.delay / minval->min_delay) *
|
||||
((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
|
||||
|
@ -537,8 +533,7 @@ Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
|||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
/*
|
||||
* check whether the current organization
|
||||
* meets the input deviation constraints
|
||||
|
@ -549,17 +544,19 @@ Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
|||
if (v) {
|
||||
cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
|
||||
c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
|
||||
dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) +
|
||||
lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) +
|
||||
a * ((*niter)->nuca_pda.area.get_area()/minval->min_area));
|
||||
dp * ((*niter)->nuca_pda.power.readOp.dynamic /
|
||||
minval->min_dyn) +
|
||||
lp * ((*niter)->nuca_pda.power.readOp.leakage /
|
||||
minval->min_leakage) +
|
||||
a * ((*niter)->nuca_pda.area.get_area() /
|
||||
minval->min_area));
|
||||
fprintf(stderr, "cost = %g\n", cost);
|
||||
|
||||
if (min_cost > cost) {
|
||||
min_cost = cost;
|
||||
res = ((*niter));
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
niter = n->erase(niter);
|
||||
if (niter != n->begin())
|
||||
niter --;
|
||||
|
@ -570,16 +567,18 @@ Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval)
|
|||
}
|
||||
|
||||
int
|
||||
Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
|
||||
{
|
||||
if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) {
|
||||
Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
|
||||
if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
|
||||
g_ip->delay_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 >
|
||||
if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
|
||||
minval->min_dyn)*100 >
|
||||
g_ip->dynamic_power_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 >
|
||||
if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
|
||||
minval->min_leakage)*100 >
|
||||
g_ip->leakage_power_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -587,7 +586,8 @@ Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
|
|||
g_ip->cycle_time_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 >
|
||||
if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
|
||||
100 >
|
||||
g_ip->area_dev_nuca) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -595,8 +595,7 @@ Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval)
|
|||
}
|
||||
|
||||
void
|
||||
Nuca::calculate_nuca_area (nuca_org_t *nuca)
|
||||
{
|
||||
Nuca::calculate_nuca_area (nuca_org_t *nuca) {
|
||||
nuca->nuca_pda.area.h =
|
||||
nuca->rows * ((nuca->h_wire->wire_width +
|
||||
nuca->h_wire->wire_spacing)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -71,8 +72,7 @@ class nuca_org_t {
|
|||
|
||||
|
||||
|
||||
class Nuca : public Component
|
||||
{
|
||||
class Nuca : public Component {
|
||||
public:
|
||||
Nuca(
|
||||
TechnologyParameter::DeviceType *dt);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -46,8 +47,7 @@ TechnologyParameter g_tp;
|
|||
|
||||
|
||||
|
||||
void TechnologyParameter::DeviceType::display(uint32_t indent)
|
||||
{
|
||||
void TechnologyParameter::DeviceType::display(uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl;
|
||||
|
@ -71,8 +71,7 @@ void TechnologyParameter::DeviceType::display(uint32_t indent)
|
|||
|
||||
|
||||
|
||||
void TechnologyParameter::InterconnectType::display(uint32_t indent)
|
||||
{
|
||||
void TechnologyParameter::InterconnectType::display(uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl;
|
||||
|
@ -80,16 +79,14 @@ void TechnologyParameter::InterconnectType::display(uint32_t indent)
|
|||
cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl;
|
||||
}
|
||||
|
||||
void TechnologyParameter::ScalingFactor::display(uint32_t indent)
|
||||
{
|
||||
void TechnologyParameter::ScalingFactor::display(uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl;
|
||||
cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl;
|
||||
}
|
||||
|
||||
void TechnologyParameter::MemoryType::display(uint32_t indent)
|
||||
{
|
||||
void TechnologyParameter::MemoryType::display(uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl;
|
||||
|
@ -102,8 +99,7 @@ void TechnologyParameter::MemoryType::display(uint32_t indent)
|
|||
|
||||
|
||||
|
||||
void TechnologyParameter::display(uint32_t indent)
|
||||
{
|
||||
void TechnologyParameter::display(uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl;
|
||||
|
@ -185,8 +181,7 @@ void TechnologyParameter::display(uint32_t indent)
|
|||
|
||||
|
||||
DynamicParameter::DynamicParameter():
|
||||
use_inp_params(0), cell(), is_valid(true)
|
||||
{
|
||||
use_inp_params(0), cell(), is_valid(true) {
|
||||
}
|
||||
|
||||
|
||||
|
@ -202,11 +197,11 @@ DynamicParameter::DynamicParameter(
|
|||
unsigned int Ndsam_lev_1_,
|
||||
unsigned int Ndsam_lev_2_,
|
||||
bool is_main_mem_):
|
||||
is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_),
|
||||
is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0),
|
||||
Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_),
|
||||
Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_),
|
||||
number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0),
|
||||
is_main_mem(is_main_mem_), cell(), is_valid(false)
|
||||
{
|
||||
is_main_mem(is_main_mem_), cell(), is_valid(false) {
|
||||
ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type;
|
||||
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
|
||||
|
||||
|
@ -214,44 +209,37 @@ DynamicParameter::DynamicParameter(
|
|||
const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local;
|
||||
fully_assoc = (g_ip->fully_assoc) ? true : false;
|
||||
|
||||
if (fully_assoc || pure_cam)
|
||||
{ // fully-assocative cache -- ref: CACTi 2.0 report
|
||||
// fully-assocative cache -- ref: CACTi 2.0 report
|
||||
if (fully_assoc || pure_cam) {
|
||||
if (Ndwl != 1 || //Ndwl is fixed to 1 for FA
|
||||
Ndcm != 1 || //Ndcm is fixed to 1 for FA
|
||||
Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA
|
||||
Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one
|
||||
Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one
|
||||
Ndbl < 2)
|
||||
{
|
||||
Ndbl < 2) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if ((is_dram) && (!is_tag) && (Ndcm > 1))
|
||||
{
|
||||
if ((is_dram) && (!is_tag) && (Ndcm > 1)) {
|
||||
return; // For a DRAM array, each bitline has its own sense-amp
|
||||
}
|
||||
|
||||
// If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be
|
||||
// at least two because an array is assumed to have at least one mat. And a mat
|
||||
// is formed out of two horizontal subarrays and two vertical subarrays
|
||||
if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1))
|
||||
{
|
||||
if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
//***********compute row, col of an subarray
|
||||
if (!(fully_assoc || pure_cam))//Not fully_asso nor cam
|
||||
{
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
//Not fully_asso nor cam
|
||||
// if data array, let tagbits = 0
|
||||
if (is_tag)
|
||||
{
|
||||
if (g_ip->specific_tag)
|
||||
{
|
||||
if (is_tag) {
|
||||
if (g_ip->specific_tag) {
|
||||
tagbits = g_ip->tag_w;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) +
|
||||
_log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks);
|
||||
|
||||
|
@ -262,9 +250,7 @@ DynamicParameter::DynamicParameter(
|
|||
g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON);
|
||||
num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON);
|
||||
//burst_length = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks *
|
||||
g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON);
|
||||
num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON);
|
||||
|
@ -279,23 +265,21 @@ DynamicParameter::DynamicParameter(
|
|||
|
||||
}
|
||||
|
||||
else
|
||||
{//either fully-asso or cam
|
||||
if (pure_cam)
|
||||
{
|
||||
if (g_ip->specific_tag)
|
||||
{
|
||||
else {//either fully-asso or cam
|
||||
if (pure_cam) {
|
||||
if (g_ip->specific_tag) {
|
||||
tagbits = int(ceil(g_ip->tag_w / 8.0) * 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8);
|
||||
// cout<<"Pure CAM needs tag width to be specified"<<endl;
|
||||
// exit(0);
|
||||
}
|
||||
//tagbits = (((tagbits + 3) >> 2) << 2);
|
||||
|
||||
tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries.
|
||||
//TODO: error check input of tagbits and blocksize
|
||||
//TODO: for pure CAM, g_ip->block should be number of entries.
|
||||
tag_num_r_subarray = (int)ceil(capacity_per_die /
|
||||
(g_ip->nbanks * tagbits / 8.0 * Ndbl));
|
||||
//tag_num_c_subarray = (int)(tagbits + EPSILON);
|
||||
tag_num_c_subarray = tagbits;
|
||||
if (tag_num_r_subarray == 0) return;
|
||||
|
@ -303,20 +287,16 @@ DynamicParameter::DynamicParameter(
|
|||
if (tag_num_c_subarray < MINSUBARRAYCOLS) return;
|
||||
if (tag_num_c_subarray > MAXSUBARRAYCOLS) return;
|
||||
num_r_subarray = tag_num_r_subarray;
|
||||
}
|
||||
else //fully associative
|
||||
{
|
||||
if (g_ip->specific_tag)
|
||||
{
|
||||
} else { //fully associative
|
||||
if (g_ip->specific_tag) {
|
||||
tagbits = g_ip->tag_w;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem.
|
||||
}
|
||||
tagbits = (((tagbits + 3) >> 2) << 2);
|
||||
|
||||
tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl));
|
||||
tag_num_r_subarray = (int)(capacity_per_die /
|
||||
(g_ip->nbanks * g_ip->block_sz * Ndbl));
|
||||
tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON);
|
||||
if (tag_num_r_subarray == 0) return;
|
||||
if (tag_num_r_subarray > MAXSUBARRAYROWS) return;
|
||||
|
@ -337,37 +317,37 @@ DynamicParameter::DynamicParameter(
|
|||
//****************end of computation of row, col of an subarray
|
||||
|
||||
// calculate wire parameters
|
||||
if (fully_assoc || pure_cam)
|
||||
{
|
||||
cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
|
||||
+ 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
|
||||
cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
|
||||
+ 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports;
|
||||
if (fully_assoc || pure_cam) {
|
||||
cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch *
|
||||
(g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
|
||||
+ 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
|
||||
wire_local.pitch * g_ip->num_se_rd_ports;
|
||||
cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch *
|
||||
(g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports)
|
||||
+ 2 * wire_local.pitch * (g_ip->num_search_ports - 1) +
|
||||
wire_local.pitch * g_ip->num_se_rd_ports;
|
||||
|
||||
cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports)
|
||||
cell.h = g_tp.sram.b_h + 2 * wire_local.pitch *
|
||||
(g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports)
|
||||
+ 2 * wire_local.pitch * (g_ip->num_search_ports - 1);
|
||||
cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)
|
||||
+ g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(is_tag)
|
||||
{
|
||||
cell.w = g_tp.sram.b_w + 2 * wire_local.pitch *
|
||||
(g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports -
|
||||
g_ip->num_se_rd_ports)
|
||||
+ g_ip->num_wr_ports) + g_tp.wire_local.pitch *
|
||||
g_ip->num_se_rd_ports + 2 * wire_local.pitch *
|
||||
(g_ip->num_search_ports - 1);
|
||||
} else {
|
||||
if (is_tag) {
|
||||
cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports +
|
||||
g_ip->num_wr_ports);
|
||||
cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports +
|
||||
(g_ip->num_rd_ports - g_ip->num_se_rd_ports)) +
|
||||
wire_local.pitch * g_ip->num_se_rd_ports;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_dram)
|
||||
{
|
||||
} else {
|
||||
if (is_dram) {
|
||||
cell.h = g_tp.dram.b_h;
|
||||
cell.w = g_tp.dram.b_w;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +
|
||||
g_ip->num_rw_ports - 1 + g_ip->num_rd_ports);
|
||||
cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 +
|
||||
|
@ -380,30 +360,25 @@ DynamicParameter::DynamicParameter(
|
|||
double c_b_metal = cell.h * wire_local.C_per_um;
|
||||
double C_bl;
|
||||
|
||||
if (!(fully_assoc || pure_cam))
|
||||
{
|
||||
if (is_dram)
|
||||
{
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
if (is_dram) {
|
||||
deg_bl_muxing = 1;
|
||||
if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == comm_dram) {
|
||||
C_bl = num_r_subarray * c_b_metal;
|
||||
V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl);
|
||||
if (V_b_sense < VBITSENSEMIN)
|
||||
{
|
||||
V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
|
||||
(g_tp.dram_cell_C + C_bl);
|
||||
if (V_b_sense < VBITSENSEMIN) {
|
||||
return;
|
||||
}
|
||||
V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
|
||||
dram_refresh_period = 64e-3;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0;
|
||||
C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
|
||||
V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl);
|
||||
V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C /
|
||||
(g_tp.dram_cell_C + C_bl);
|
||||
|
||||
if (V_b_sense < VBITSENSEMIN)
|
||||
{
|
||||
if (V_b_sense < VBITSENSEMIN) {
|
||||
return; //Sense amp input signal is smaller that minimum allowable sense amp input signal
|
||||
}
|
||||
V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value
|
||||
|
@ -411,9 +386,7 @@ DynamicParameter::DynamicParameter(
|
|||
//dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp;
|
||||
dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ //SRAM
|
||||
} else { //SRAM
|
||||
V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
|
||||
deg_bl_muxing = Ndcm;
|
||||
// "/ 2.0" below is due to the fact that two adjacent access transistors share drain
|
||||
|
@ -422,9 +395,7 @@ DynamicParameter::DynamicParameter(
|
|||
C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal);
|
||||
dram_refresh_period = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM
|
||||
V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN;
|
||||
deg_bl_muxing = 1;//FA fix as 1
|
||||
|
@ -441,8 +412,7 @@ DynamicParameter::DynamicParameter(
|
|||
// for CAM, si=di, but so = matching address. do = data out = di (for normal read/write)
|
||||
// so/si needs broadcase while do/di do not
|
||||
|
||||
if (fully_assoc || pure_cam)
|
||||
{
|
||||
if (fully_assoc || pure_cam) {
|
||||
switch (Ndbl) {
|
||||
case (0):
|
||||
cout << " Invalid Ndbl \n" << endl;
|
||||
|
@ -462,82 +432,61 @@ DynamicParameter::DynamicParameter(
|
|||
}
|
||||
num_mats = num_mats_h_dir * num_mats_v_dir;
|
||||
|
||||
if (fully_assoc)
|
||||
{
|
||||
if (fully_assoc) {
|
||||
num_so_b_mat = data_num_c_subarray;
|
||||
num_do_b_mat = data_num_c_subarray + tagbits;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
|
||||
num_do_b_mat = tagbits;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_mats_h_dir = MAX(Ndwl / 2, 1);
|
||||
num_mats_v_dir = MAX(Ndbl / 2, 1);
|
||||
num_mats = num_mats_h_dir * num_mats_v_dir;
|
||||
num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
|
||||
num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray /
|
||||
(deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1);
|
||||
}
|
||||
|
||||
if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats)))
|
||||
{
|
||||
if (!(fully_assoc || pure_cam) && (num_do_b_mat <
|
||||
(num_subarrays / num_mats))) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
int deg_sa_mux_l1_non_assoc;
|
||||
//TODO:the i/o for subbank is not necessary and should be removed.
|
||||
if (!(fully_assoc || pure_cam))
|
||||
{
|
||||
if (!is_tag)
|
||||
{
|
||||
if (is_main_mem == true)
|
||||
{
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
if (!is_tag) {
|
||||
if (is_main_mem == true) {
|
||||
num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w;
|
||||
deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g_ip->fast_access == true)
|
||||
{
|
||||
} else {
|
||||
if (g_ip->fast_access == true) {
|
||||
num_do_b_subbank = g_ip->out_w * g_ip->data_assoc;
|
||||
deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
|
||||
num_do_b_subbank = g_ip->out_w;
|
||||
deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc;
|
||||
if (deg_sa_mux_l1_non_assoc < 1)
|
||||
{
|
||||
if (deg_sa_mux_l1_non_assoc < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_do_b_subbank = tagbits * g_ip->tag_assoc;
|
||||
if (num_do_b_mat < tagbits)
|
||||
{
|
||||
if (num_do_b_mat < tagbits) {
|
||||
return;
|
||||
}
|
||||
deg_sa_mux_l1_non_assoc = Ndsam_lev_1;
|
||||
//num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fully_assoc)
|
||||
{
|
||||
} else {
|
||||
if (fully_assoc) {
|
||||
num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa
|
||||
num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data
|
||||
num_do_b_subbank = tag_num_c_subarray;
|
||||
}
|
||||
|
@ -547,81 +496,62 @@ DynamicParameter::DynamicParameter(
|
|||
|
||||
deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc;
|
||||
|
||||
if (fully_assoc || pure_cam)
|
||||
{
|
||||
if (fully_assoc || pure_cam) {
|
||||
num_act_mats_hor_dir = 1;
|
||||
num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat;
|
||||
if (num_act_mats_hor_dir == 0)
|
||||
{
|
||||
if (num_act_mats_hor_dir == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//compute num_do_mat for tag
|
||||
if (is_tag)
|
||||
{
|
||||
if (!(fully_assoc || pure_cam))
|
||||
{
|
||||
if (is_tag) {
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir;
|
||||
num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat;
|
||||
}
|
||||
}
|
||||
|
||||
if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram))
|
||||
{
|
||||
if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits)
|
||||
{
|
||||
if ((g_ip->is_cache == false && is_main_mem == true) ||
|
||||
(PAGE_MODE == 1 && is_dram)) {
|
||||
if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 !=
|
||||
(int)g_ip->page_sz_bits) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays
|
||||
if (is_tag == false && g_ip->is_main_mem == true &&
|
||||
num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc))
|
||||
{
|
||||
num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 <
|
||||
((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (num_act_mats_hor_dir > num_mats_h_dir)
|
||||
{
|
||||
if (num_act_mats_hor_dir > num_mats_h_dir) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
//compute di for mat subbank and bank
|
||||
if (!(fully_assoc ||pure_cam))
|
||||
{
|
||||
if(!is_tag)
|
||||
{
|
||||
if(g_ip->fast_access == true)
|
||||
{
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
if (!is_tag) {
|
||||
if (g_ip->fast_access == true) {
|
||||
num_di_b_mat = num_do_b_mat / g_ip->data_assoc;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_di_b_mat = num_do_b_mat;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_di_b_mat = tagbits;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fully_assoc)
|
||||
{
|
||||
} else {
|
||||
if (fully_assoc) {
|
||||
num_di_b_mat = num_do_b_mat;
|
||||
//*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache,
|
||||
//but inside the mat wire tracks need to be reserved for search data bus
|
||||
num_si_b_mat = tagbits;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_di_b_mat = tagbits;
|
||||
num_si_b_mat = tagbits;//*num_subarrays/num_mats;
|
||||
}
|
||||
|
@ -643,41 +573,29 @@ DynamicParameter::DynamicParameter(
|
|||
num_se_rd_ports = g_ip->num_se_rd_ports;
|
||||
num_search_ports = g_ip->num_search_ports;
|
||||
|
||||
if (is_dram && is_main_mem)
|
||||
{
|
||||
if (is_dram && is_main_mem) {
|
||||
number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec,
|
||||
_log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2));
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) +
|
||||
_log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2);
|
||||
}
|
||||
|
||||
if (!(fully_assoc ||pure_cam))
|
||||
{
|
||||
if (is_tag)
|
||||
{
|
||||
if (!(fully_assoc || pure_cam)) {
|
||||
if (is_tag) {
|
||||
num_di_b_bank_per_port = tagbits;
|
||||
num_do_b_bank_per_port = g_ip->data_assoc;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc;
|
||||
num_do_b_bank_per_port = g_ip->out_w;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (fully_assoc)
|
||||
{
|
||||
} else {
|
||||
if (fully_assoc) {
|
||||
num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz?
|
||||
num_si_b_bank_per_port = tagbits;
|
||||
num_do_b_bank_per_port = g_ip->out_w + tagbits;
|
||||
num_so_b_bank_per_port = g_ip->out_w;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_di_b_bank_per_port = tagbits;
|
||||
num_si_b_bank_per_port = tagbits;
|
||||
num_do_b_bank_per_port = tagbits;
|
||||
|
@ -685,14 +603,12 @@ DynamicParameter::DynamicParameter(
|
|||
}
|
||||
}
|
||||
|
||||
if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access))
|
||||
{
|
||||
if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) {
|
||||
number_way_select_signals_mat = g_ip->data_assoc;
|
||||
}
|
||||
|
||||
// add ECC adjustment to all data signals that traverse on H-trees.
|
||||
if (g_ip->add_ecc_b_ == true)
|
||||
{
|
||||
if (g_ip->add_ecc_b_ == true) {
|
||||
num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_));
|
||||
num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_));
|
||||
num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_));
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -40,11 +41,9 @@
|
|||
#include "io.h"
|
||||
|
||||
// parameters which are functions of certain device technology
|
||||
class TechnologyParameter
|
||||
{
|
||||
class TechnologyParameter {
|
||||
public:
|
||||
class DeviceType
|
||||
{
|
||||
class DeviceType {
|
||||
public:
|
||||
double C_g_ideal;
|
||||
double C_fringe;
|
||||
|
@ -71,10 +70,11 @@ class TechnologyParameter
|
|||
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
|
||||
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
|
||||
Vdd(0), Vth(0),
|
||||
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
|
||||
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
|
||||
void reset()
|
||||
{
|
||||
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0),
|
||||
I_g_on_p(0),
|
||||
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0),
|
||||
long_channel_leakage_reduction(0) { };
|
||||
void reset() {
|
||||
C_g_ideal = 0;
|
||||
C_fringe = 0;
|
||||
C_overlap = 0;
|
||||
|
@ -99,8 +99,7 @@ class TechnologyParameter
|
|||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
class InterconnectType
|
||||
{
|
||||
class InterconnectType {
|
||||
public:
|
||||
double pitch;
|
||||
double R_per_um;
|
||||
|
@ -113,8 +112,7 @@ class TechnologyParameter
|
|||
|
||||
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
|
||||
|
||||
void reset()
|
||||
{
|
||||
void reset() {
|
||||
pitch = 0;
|
||||
R_per_um = 0;
|
||||
C_per_um = 0;
|
||||
|
@ -127,8 +125,7 @@ class TechnologyParameter
|
|||
|
||||
void display(uint32_t indent = 0);
|
||||
};
|
||||
class MemoryType
|
||||
{
|
||||
class MemoryType {
|
||||
public:
|
||||
double b_w;
|
||||
double b_h;
|
||||
|
@ -137,8 +134,7 @@ class TechnologyParameter
|
|||
double cell_nmos_w;
|
||||
double Vbitpre;
|
||||
|
||||
void reset()
|
||||
{
|
||||
void reset() {
|
||||
b_w = 0;
|
||||
b_h = 0;
|
||||
cell_a_w = 0;
|
||||
|
@ -150,8 +146,7 @@ class TechnologyParameter
|
|||
void display(uint32_t indent = 0);
|
||||
};
|
||||
|
||||
class ScalingFactor
|
||||
{
|
||||
class ScalingFactor {
|
||||
public:
|
||||
double logic_scaling_co_eff;
|
||||
double core_tx_density;
|
||||
|
@ -160,8 +155,7 @@ class TechnologyParameter
|
|||
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
|
||||
long_channel_leakage_reduction(0) { };
|
||||
|
||||
void reset()
|
||||
{
|
||||
void reset() {
|
||||
logic_scaling_co_eff = 0;
|
||||
core_tx_density = 0;
|
||||
long_channel_leakage_reduction = 0;
|
||||
|
@ -240,8 +234,7 @@ class TechnologyParameter
|
|||
|
||||
void display(uint32_t indent = 0);
|
||||
|
||||
void reset()
|
||||
{
|
||||
void reset() {
|
||||
dram_cell_Vdd = 0;
|
||||
dram_cell_I_on = 0;
|
||||
dram_cell_C = 0;
|
||||
|
@ -282,8 +275,7 @@ class TechnologyParameter
|
|||
|
||||
|
||||
|
||||
class DynamicParameter
|
||||
{
|
||||
class DynamicParameter {
|
||||
public:
|
||||
bool is_tag;
|
||||
bool pure_ram;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -45,8 +46,7 @@ Router::Router(
|
|||
deviceType(dt),
|
||||
I(I_),
|
||||
O(O_),
|
||||
M(M_)
|
||||
{
|
||||
M(M_) {
|
||||
vc_buffer_size = vc_buf;
|
||||
vc_count = vc_c;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||
|
@ -138,8 +138,7 @@ Router::tr_crossbar_power() {
|
|||
crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
|
||||
}
|
||||
|
||||
void Router::buffer_stats()
|
||||
{
|
||||
void Router::buffer_stats() {
|
||||
DynamicParameter dyn_p;
|
||||
dyn_p.is_tag = false;
|
||||
dyn_p.pure_cam = false;
|
||||
|
@ -203,8 +202,7 @@ void Router::buffer_stats()
|
|||
|
||||
|
||||
void
|
||||
Router::cb_stats ()
|
||||
{
|
||||
Router::cb_stats () {
|
||||
if (1) {
|
||||
Crossbar c_b(I, O, flit_size);
|
||||
c_b.compute_power();
|
||||
|
@ -214,8 +212,7 @@ Router::cb_stats ()
|
|||
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
|
||||
crossbar.area = c_b.area;
|
||||
// c_b.print_crossbar();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
crossbar.power.readOp.dynamic = tr_crossbar_power();
|
||||
crossbar.power.readOp.leakage = flit_size * I * O *
|
||||
cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
|
||||
|
@ -225,8 +222,7 @@ Router::cb_stats ()
|
|||
}
|
||||
|
||||
void
|
||||
Router::get_router_power()
|
||||
{
|
||||
Router::get_router_power() {
|
||||
/* calculate buffer stats */
|
||||
buffer_stats();
|
||||
|
||||
|
@ -246,17 +242,18 @@ Router::get_router_power()
|
|||
cbarb.power.readOp.gate_leakage * O;
|
||||
|
||||
// arb_stats();
|
||||
power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
|
||||
power.readOp.dynamic = ((buffer.power.readOp.dynamic +
|
||||
buffer.power.writeOp.dynamic) +
|
||||
crossbar.power.readOp.dynamic +
|
||||
arbiter.power.readOp.dynamic) * MIN(I, O) * M;
|
||||
double pppm_t[4] = {1, I, I, 1};
|
||||
power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
|
||||
power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
|
||||
pppm_lkg;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
Router::get_router_delay ()
|
||||
{
|
||||
Router::get_router_delay () {
|
||||
FREQUENCY = 5; // move this to config file --TODO
|
||||
cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
|
||||
delay = 4;
|
||||
|
@ -268,15 +265,13 @@ Router::get_router_delay ()
|
|||
}
|
||||
|
||||
void
|
||||
Router::get_router_area()
|
||||
{
|
||||
Router::get_router_area() {
|
||||
area.h = I * buffer.area.h;
|
||||
area.w = buffer.area.w + crossbar.area.w;
|
||||
}
|
||||
|
||||
void
|
||||
Router::calc_router_parameters()
|
||||
{
|
||||
Router::calc_router_parameters() {
|
||||
/* calculate router frequency and pipeline cycles */
|
||||
get_router_delay();
|
||||
|
||||
|
@ -288,24 +283,33 @@ Router::calc_router_parameters()
|
|||
}
|
||||
|
||||
void
|
||||
Router::print_router()
|
||||
{
|
||||
Router::print_router() {
|
||||
cout << "\n\nRouter stats:\n";
|
||||
cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
|
||||
cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
|
||||
cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
|
||||
<< "GHz\n";
|
||||
cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
|
||||
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
|
||||
cout << "\tNo. of pipeline stages - " << delay << endl;
|
||||
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
|
||||
cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
|
||||
cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
|
||||
cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
||||
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
|
||||
cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
|
||||
cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
|
||||
cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
|
||||
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
||||
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
|
||||
cout << "\tNo. of buffer entries per virtual channel - "
|
||||
<< vc_buffer_size << "\n";
|
||||
cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
|
||||
<< "(mm^2)\n";
|
||||
cout << "\tSimple buffer access (Read) - "
|
||||
<< buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
|
||||
<< " (mW)\n";
|
||||
cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
|
||||
<< "(mm^2)\n";
|
||||
cout << "\tCross bar access energy - "
|
||||
<< crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||
cout << "\tCross bar leakage power - "
|
||||
<< crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
|
||||
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
|
||||
<< arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
|
||||
<< arbiter.power.readOp.leakage * 1e3 << " (mW)\n";
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -47,8 +48,7 @@
|
|||
#include "parameter.h"
|
||||
#include "wire.h"
|
||||
|
||||
class Router : public Component
|
||||
{
|
||||
class Router : public Component {
|
||||
public:
|
||||
Router(
|
||||
double flit_size_,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -41,13 +42,13 @@
|
|||
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
||||
dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
|
||||
num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
|
||||
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
|
||||
{
|
||||
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
|
||||
//num_cols=7;
|
||||
//cout<<"num_cols ="<< num_cols <<endl;
|
||||
if (!(is_fa || dp.pure_cam))
|
||||
{
|
||||
num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
|
||||
if (!(is_fa || dp.pure_cam)) {
|
||||
// ECC overhead
|
||||
num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
|
||||
num_bits_per_ecc_b_) : 0);
|
||||
uint32_t ram_num_cells_wl_stitching =
|
||||
(dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
|
||||
(dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
|
||||
|
@ -56,19 +57,14 @@ Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
|||
|
||||
area.w = cell.w * num_cols +
|
||||
ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
|
||||
}
|
||||
else //cam fa
|
||||
{
|
||||
} else { //cam fa
|
||||
|
||||
//should not add dummy row here since the dummy row do not need decoder
|
||||
if (is_fa)// fully associative cache
|
||||
{
|
||||
if (is_fa) { // fully associative cache
|
||||
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
||||
num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
|
||||
num_cols = num_cols_fa_cam + num_cols_fa_ram;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
||||
num_cols_fa_ram = 0;
|
||||
num_cols = num_cols_fa_cam;
|
||||
|
@ -76,9 +72,13 @@ Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
|||
|
||||
area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
|
||||
area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
|
||||
+ ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
|
||||
+ 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
|
||||
+ 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
|
||||
+ ceil((num_cols_fa_cam + num_cols_fa_ram) /
|
||||
sram_num_cells_wl_stitching_) *
|
||||
g_tp.ram_wl_stitching_overhead_
|
||||
//the overhead for the NAND gate to connect the two halves
|
||||
+ 16 * g_tp.wire_local.pitch
|
||||
//the overhead for the drivers from matchline to wordline of RAM
|
||||
+ 128 * g_tp.wire_local.pitch;
|
||||
}
|
||||
|
||||
assert(area.h > 0);
|
||||
|
@ -88,14 +88,12 @@ Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
|||
|
||||
|
||||
|
||||
Subarray::~Subarray()
|
||||
{
|
||||
Subarray::~Subarray() {
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Subarray::get_total_cell_area()
|
||||
{
|
||||
double Subarray::get_total_cell_area() {
|
||||
// return (is_fa==false? cell.get_area() * num_rows * num_cols
|
||||
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
|
||||
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
||||
|
@ -103,78 +101,75 @@ double Subarray::get_total_cell_area()
|
|||
|
||||
if (!(is_fa || dp.pure_cam))
|
||||
return (cell.get_area() * num_rows * num_cols);
|
||||
else if (is_fa)
|
||||
{ //for FA, this area includes the dummy cells in SRAM arrays.
|
||||
else if (is_fa) {
|
||||
//for FA, this area includes the dummy cells in SRAM arrays.
|
||||
//return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
||||
//cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
|
||||
return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
|
||||
}
|
||||
else
|
||||
return (cam_cell.h * (num_rows + 1) *
|
||||
(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
|
||||
} else {
|
||||
return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Subarray::compute_C()
|
||||
{
|
||||
void Subarray::compute_C() {
|
||||
double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
||||
double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
||||
double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
|
||||
double C_b_row_drain_C;
|
||||
|
||||
if (dp.is_dram)
|
||||
{
|
||||
if (dp.is_dram) {
|
||||
C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
|
||||
|
||||
if (dp.ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
if (dp.ram_cell_tech_type == comm_dram) {
|
||||
C_bl = num_rows * C_b_metal;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
|
||||
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!(is_fa ||dp.pure_cam))
|
||||
{
|
||||
C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
|
||||
} else {
|
||||
if (!(is_fa || dp.pure_cam)) {
|
||||
C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
|
||||
(g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
|
||||
false, true) * 2 +
|
||||
c_w_metal) * num_cols;
|
||||
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
|
||||
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
//Following is wordline not matchline
|
||||
//CAM portion
|
||||
c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
|
||||
r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
|
||||
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
|
||||
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
|
||||
(g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
|
||||
2.0, false, true) * 2 +
|
||||
c_w_metal) * num_cols_fa_cam;
|
||||
R_wl_cam = (r_w_metal) * num_cols_fa_cam;
|
||||
|
||||
if (!dp.pure_cam)
|
||||
{
|
||||
if (!dp.pure_cam) {
|
||||
//RAM portion
|
||||
c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
||||
r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
||||
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
|
||||
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
|
||||
(g_tp.sram.b_w - 2 *
|
||||
g_tp.sram.cell_a_w) / 2.0, false,
|
||||
true) * 2 +
|
||||
c_w_metal) * num_cols_fa_ram;
|
||||
R_wl_ram = (r_w_metal) * num_cols_fa_ram;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
C_wl_ram = R_wl_ram = 0;
|
||||
}
|
||||
C_wl = C_wl_cam + C_wl_ram;
|
||||
C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
|
||||
C_wl += (16 + 128) * g_tp.wire_local.pitch *
|
||||
g_tp.wire_local.C_per_um;
|
||||
|
||||
R_wl = R_wl_cam + R_wl_ram;
|
||||
R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
|
||||
R_wl += (16 + 128) * g_tp.wire_local.pitch *
|
||||
g_tp.wire_local.R_per_um;
|
||||
|
||||
//there are two ways to write to a FA,
|
||||
//1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -41,8 +42,7 @@
|
|||
using namespace std;
|
||||
|
||||
|
||||
class Subarray : public Component
|
||||
{
|
||||
class Subarray : public Component {
|
||||
public:
|
||||
Subarray(const DynamicParameter & dp, bool is_fa_);
|
||||
~Subarray();
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -34,18 +35,22 @@
|
|||
|
||||
#include "parameter.h"
|
||||
|
||||
double wire_resistance(double resistivity, double wire_width, double wire_thickness,
|
||||
double barrier_thickness, double dishing_thickness, double alpha_scatter)
|
||||
{
|
||||
double wire_resistance(double resistivity, double wire_width,
|
||||
double wire_thickness,
|
||||
double barrier_thickness, double dishing_thickness,
|
||||
double alpha_scatter) {
|
||||
double resistance;
|
||||
resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness));
|
||||
resistance = alpha_scatter * resistivity /
|
||||
((wire_thickness - barrier_thickness - dishing_thickness) *
|
||||
(wire_width - 2 * barrier_thickness));
|
||||
return(resistance);
|
||||
}
|
||||
|
||||
double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing,
|
||||
double ild_thickness, double miller_value, double horiz_dielectric_constant,
|
||||
double vert_dielectric_constant, double fringe_cap)
|
||||
{
|
||||
double wire_capacitance(double wire_width, double wire_thickness,
|
||||
double wire_spacing,
|
||||
double ild_thickness, double miller_value,
|
||||
double horiz_dielectric_constant,
|
||||
double vert_dielectric_constant, double fringe_cap) {
|
||||
double vertical_cap, sidewall_cap, total_cap;
|
||||
vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness;
|
||||
sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing;
|
||||
|
@ -54,8 +59,7 @@ double wire_capacitance(double wire_width, double wire_thickness, double wire_sp
|
|||
}
|
||||
|
||||
|
||||
void init_tech_params(double technology, bool is_tag)
|
||||
{
|
||||
void init_tech_params(double technology, bool is_tag) {
|
||||
int iter, tech, tech_lo, tech_hi;
|
||||
double curr_alpha, curr_vpp;
|
||||
double wire_width, wire_thickness, wire_spacing,
|
||||
|
@ -97,68 +101,42 @@ void init_tech_params(double technology, bool is_tag)
|
|||
double curr_macro_layout_overhead = 0;
|
||||
double curr_sckt_co_eff = 0;
|
||||
|
||||
if (technology < 181 && technology > 179)
|
||||
{
|
||||
if (technology < 181 && technology > 179) {
|
||||
tech_lo = 180;
|
||||
tech_hi = 180;
|
||||
}
|
||||
else if (technology < 91 && technology > 89)
|
||||
{
|
||||
} else if (technology < 91 && technology > 89) {
|
||||
tech_lo = 90;
|
||||
tech_hi = 90;
|
||||
}
|
||||
else if (technology < 66 && technology > 64)
|
||||
{
|
||||
} else if (technology < 66 && technology > 64) {
|
||||
tech_lo = 65;
|
||||
tech_hi = 65;
|
||||
}
|
||||
else if (technology < 46 && technology > 44)
|
||||
{
|
||||
} else if (technology < 46 && technology > 44) {
|
||||
tech_lo = 45;
|
||||
tech_hi = 45;
|
||||
}
|
||||
else if (technology < 33 && technology > 31)
|
||||
{
|
||||
} else if (technology < 33 && technology > 31) {
|
||||
tech_lo = 32;
|
||||
tech_hi = 32;
|
||||
}
|
||||
else if (technology < 23 && technology > 21)
|
||||
{
|
||||
} else if (technology < 23 && technology > 21) {
|
||||
tech_lo = 22;
|
||||
tech_hi = 22;
|
||||
if (ram_cell_tech_type == 3 )
|
||||
{
|
||||
cout<<"current version does not support eDRAM technologies at 22nm"<<endl;
|
||||
if (ram_cell_tech_type == 3 ) {
|
||||
cout << "current version does not support eDRAM technologies at "
|
||||
<< "22nm" << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
// else if (technology < 17 && technology > 15)
|
||||
// {
|
||||
// tech_lo = 16;
|
||||
// tech_hi = 16;
|
||||
// }
|
||||
else if (technology < 180 && technology > 90)
|
||||
{
|
||||
} else if (technology < 180 && technology > 90) {
|
||||
tech_lo = 180;
|
||||
tech_hi = 90;
|
||||
}
|
||||
else if (technology < 90 && technology > 65)
|
||||
{
|
||||
} else if (technology < 90 && technology > 65) {
|
||||
tech_lo = 90;
|
||||
tech_hi = 65;
|
||||
}
|
||||
else if (technology < 65 && technology > 45)
|
||||
{
|
||||
} else if (technology < 65 && technology > 45) {
|
||||
tech_lo = 65;
|
||||
tech_hi = 45;
|
||||
}
|
||||
else if (technology < 45 && technology > 32)
|
||||
{
|
||||
} else if (technology < 45 && technology > 32) {
|
||||
tech_lo = 45;
|
||||
tech_hi = 32;
|
||||
}
|
||||
else if (technology < 32 && technology > 22)
|
||||
{
|
||||
} else if (technology < 32 && technology > 22) {
|
||||
tech_lo = 32;
|
||||
tech_hi = 22;
|
||||
}
|
||||
|
@ -167,8 +145,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
// tech_lo = 22;
|
||||
// tech_hi = 16;
|
||||
// }
|
||||
else
|
||||
{
|
||||
else {
|
||||
cout << "Invalid technology nodes" << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
@ -191,41 +168,28 @@ void init_tech_params(double technology, bool is_tag)
|
|||
double n_to_p_eff_curr_drv_ratio[NUMBER_TECH_FLAVORS];
|
||||
double I_off_n[NUMBER_TECH_FLAVORS][101];
|
||||
double I_g_on_n[NUMBER_TECH_FLAVORS][101];
|
||||
//double I_off_p[NUMBER_TECH_FLAVORS][101];
|
||||
double gmp_to_gmn_multiplier[NUMBER_TECH_FLAVORS];
|
||||
//double curr_sckt_co_eff[NUMBER_TECH_FLAVORS];
|
||||
double long_channel_leakage_reduction[NUMBER_TECH_FLAVORS];
|
||||
|
||||
for (iter = 0; iter <= 1; ++iter)
|
||||
{
|
||||
for (iter = 0; iter <= 1; ++iter) {
|
||||
// linear interpolation
|
||||
if (iter == 0)
|
||||
{
|
||||
if (iter == 0) {
|
||||
tech = tech_lo;
|
||||
if (tech_lo == tech_hi)
|
||||
{
|
||||
if (tech_lo == tech_hi) {
|
||||
curr_alpha = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tech = tech_hi;
|
||||
if (tech_lo == tech_hi)
|
||||
{
|
||||
if (tech_lo == tech_hi) {
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
|
||||
}
|
||||
}
|
||||
|
||||
if (tech == 180)
|
||||
{
|
||||
if (tech == 180) {
|
||||
//180nm technology-node. Corresponds to year 1999 in ITRS
|
||||
//Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king
|
||||
//MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input
|
||||
|
@ -297,8 +261,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
|
||||
}
|
||||
|
||||
if (tech == 90)
|
||||
{
|
||||
if (tech == 90) {
|
||||
SENSE_AMP_D = .28e-9; // s
|
||||
SENSE_AMP_P = 14.7e-15; // J
|
||||
//90nm technology-node. Corresponds to year 2004 in ITRS
|
||||
|
@ -435,8 +398,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[2][90] = 4.31e-8;
|
||||
I_g_on_n[2][100] = 4.31e-8;
|
||||
|
||||
if (ram_cell_tech_type == lp_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == lp_dram) {
|
||||
//LP-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.2;
|
||||
Lphy[3] = 0.12;
|
||||
|
@ -481,9 +443,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][80] = 2.57e-10;
|
||||
I_off_n[3][90] = 3.14e-10;
|
||||
I_off_n[3][100] = 3.85e-10;
|
||||
}
|
||||
else if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
} else if (ram_cell_tech_type == comm_dram) {
|
||||
//COMM-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.6;
|
||||
Lphy[3] = 0.09;
|
||||
|
@ -552,8 +512,8 @@ void init_tech_params(double technology, bool is_tag)
|
|||
|
||||
}
|
||||
|
||||
if (tech == 65)
|
||||
{ //65nm technology-node. Corresponds to year 2007 in ITRS
|
||||
if (tech == 65) {
|
||||
//65nm technology-node. Corresponds to year 2007 in ITRS
|
||||
//ITRS HP device type
|
||||
SENSE_AMP_D = .2e-9; // s
|
||||
SENSE_AMP_P = 5.7e-15; // J
|
||||
|
@ -689,8 +649,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[2][90] = 9.61e-9;
|
||||
I_g_on_n[2][100] = 9.61e-9;
|
||||
|
||||
if (ram_cell_tech_type == lp_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == lp_dram) {
|
||||
//LP-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.2;
|
||||
Lphy[3] = 0.12;
|
||||
|
@ -735,9 +694,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][80] = 3.63e-10;
|
||||
I_off_n[3][90] = 4.41e-10;
|
||||
I_off_n[3][100] = 5.36e-10;
|
||||
}
|
||||
else if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
} else if (ram_cell_tech_type == comm_dram) {
|
||||
//COMM-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.3;
|
||||
Lphy[3] = 0.065;
|
||||
|
@ -804,8 +761,8 @@ void init_tech_params(double technology, bool is_tag)
|
|||
curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
|
||||
}
|
||||
|
||||
if (tech == 45)
|
||||
{ //45nm technology-node. Corresponds to year 2010 in ITRS
|
||||
if (tech == 45) {
|
||||
//45nm technology-node. Corresponds to year 2010 in ITRS
|
||||
//ITRS HP device type
|
||||
SENSE_AMP_D = .04e-9; // s
|
||||
SENSE_AMP_P = 2.7e-15; // J
|
||||
|
@ -829,7 +786,9 @@ void init_tech_params(double technology, bool is_tag)
|
|||
gmp_to_gmn_multiplier[0] = 1.38;
|
||||
Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];
|
||||
Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];
|
||||
long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
|
||||
//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%,
|
||||
//Ioff(Lgate normal)/Ioff(Lgate long)= 3.74
|
||||
long_channel_leakage_reduction[0] = 1 / 3.546;
|
||||
I_off_n[0][0] = 2.8e-7;
|
||||
I_off_n[0][10] = 3.28e-7;
|
||||
I_off_n[0][20] = 3.81e-7;
|
||||
|
@ -942,8 +901,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[2][90] = 1.43e-7;
|
||||
I_g_on_n[2][100] = 1.54e-7;
|
||||
|
||||
if (ram_cell_tech_type == lp_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == lp_dram) {
|
||||
//LP-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.1;
|
||||
Lphy[3] = 0.078;
|
||||
|
@ -988,9 +946,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][80] = 4.26e-10;
|
||||
I_off_n[3][90] = 5.27e-10;
|
||||
I_off_n[3][100] = 6.46e-10;
|
||||
}
|
||||
else if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
} else if (ram_cell_tech_type == comm_dram) {
|
||||
//COMM-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.1;
|
||||
Lphy[3] = 0.045;
|
||||
|
@ -1058,8 +1014,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb
|
||||
}
|
||||
|
||||
if (tech == 32)
|
||||
{
|
||||
if (tech == 32) {
|
||||
SENSE_AMP_D = .03e-9; // s
|
||||
SENSE_AMP_P = 2.16e-15; // J
|
||||
//For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm
|
||||
|
@ -1110,19 +1065,6 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[0][90] = 6.55e-8;
|
||||
I_g_on_n[0][100] = 6.55e-8;
|
||||
|
||||
// 32 DG
|
||||
// I_g_on_n[0][0] = 2.71e-9;//A/micron
|
||||
// I_g_on_n[0][10] = 2.71e-9;
|
||||
// I_g_on_n[0][20] = 2.71e-9;
|
||||
// I_g_on_n[0][30] = 2.71e-9;
|
||||
// I_g_on_n[0][40] = 2.71e-9;
|
||||
// I_g_on_n[0][50] = 2.71e-9;
|
||||
// I_g_on_n[0][60] = 2.71e-9;
|
||||
// I_g_on_n[0][70] = 2.71e-9;
|
||||
// I_g_on_n[0][80] = 2.71e-9;
|
||||
// I_g_on_n[0][90] = 2.71e-9;
|
||||
// I_g_on_n[0][100] = 2.71e-9;
|
||||
|
||||
//LSTP device type
|
||||
vdd[1] = 1;
|
||||
Lphy[1] = 0.020;
|
||||
|
@ -1167,7 +1109,6 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[1][90] = 3.73e-11;
|
||||
I_g_on_n[1][100] = 3.73e-11;
|
||||
|
||||
|
||||
//LOP device type
|
||||
vdd[2] = 0.6;
|
||||
Lphy[2] = 0.016;
|
||||
|
@ -1212,8 +1153,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[2][90] = 2.93e-9;
|
||||
I_g_on_n[2][100] = 2.93e-9;
|
||||
|
||||
if (ram_cell_tech_type == lp_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == lp_dram) {
|
||||
//LP-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.0;
|
||||
Lphy[3] = 0.056;
|
||||
|
@ -1258,9 +1198,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][80] = 5.87e-10;
|
||||
I_off_n[3][90] = 7.29e-10;
|
||||
I_off_n[3][100] = 8.87e-10;
|
||||
}
|
||||
else if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
} else if (ram_cell_tech_type == comm_dram) {
|
||||
//COMM-DRAM cell access transistor technology parameters
|
||||
curr_vdd_dram_cell = 1.0;
|
||||
Lphy[3] = 0.032;
|
||||
|
@ -1353,7 +1291,9 @@ void init_tech_params(double technology, bool is_tag)
|
|||
Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron
|
||||
Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron
|
||||
long_channel_leakage_reduction[0] = 1 / 3.274;
|
||||
I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there.
|
||||
//From 22nm, leakage current are directly from ITRS report rather
|
||||
//than MASTAR, since MASTAR has serious bugs there.
|
||||
I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2;
|
||||
I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2;
|
||||
I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2;
|
||||
I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2;
|
||||
|
@ -1468,10 +1408,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
|
||||
|
||||
|
||||
if (ram_cell_tech_type == 3)
|
||||
{}
|
||||
else if (ram_cell_tech_type == 4)
|
||||
{
|
||||
if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
|
||||
//22 nm commodity DRAM cell access transistor technology parameters.
|
||||
//parameters
|
||||
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
|
||||
|
@ -1522,9 +1459,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][90] = 1.18e-11;
|
||||
I_off_n[3][100] = 1.72e-11;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
//some error handler
|
||||
}
|
||||
|
||||
|
@ -1596,55 +1531,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_g_on_n[0][90] = 1.07e-9;
|
||||
I_g_on_n[0][100] = 1.07e-9;
|
||||
|
||||
// //16 nm LSTP DG
|
||||
// vdd[1] = 0.8;
|
||||
// Lphy[1] = 0.014;
|
||||
// Lelec[1] = 0.008;//Lelec is the electrical gate-length.
|
||||
// t_ox[1] = 1.1e-3;//micron
|
||||
// v_th[1] = 0.40126;//V
|
||||
// c_ox[1] = 2.30e-14;//F/micron2
|
||||
// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs
|
||||
// Vdsat[1] = 6.64e-2; //V/micron
|
||||
// c_g_ideal[1] = 3.22e-16;//F/micron
|
||||
// c_fringe[1] = 0.008e-15;
|
||||
// c_junc[1] = 0;//F/micron2
|
||||
// I_on_n[1] = 727.6e-6;//A/micron
|
||||
// I_on_p[1] = I_on_n[1] / 2;
|
||||
// nmos_effective_resistance_multiplier = 1.99;
|
||||
// n_to_p_eff_curr_drv_ratio[1] = 2;
|
||||
// gmp_to_gmn_multiplier[1] = 0.99;
|
||||
// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron
|
||||
// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron
|
||||
// I_off_n[1][0] = 2.43e-11;
|
||||
// I_off_n[1][10] = 4.85e-11;
|
||||
// I_off_n[1][20] = 9.68e-11;
|
||||
// I_off_n[1][30] = 1.94e-10;
|
||||
// I_off_n[1][40] = 3.87e-10;
|
||||
// I_off_n[1][50] = 7.73e-10;
|
||||
// I_off_n[1][60] = 3.55e-10;
|
||||
// I_off_n[1][70] = 3.09e-9;
|
||||
// I_off_n[1][80] = 6.19e-9;
|
||||
// I_off_n[1][90] = 1.24e-8;
|
||||
// I_off_n[1][100]= 2.48e-8;
|
||||
//
|
||||
// // for 22nm LSTP HP
|
||||
// I_g_on_n[1][0] = 4.51e-10;//A/micron
|
||||
// I_g_on_n[1][10] = 4.51e-10;
|
||||
// I_g_on_n[1][20] = 4.51e-10;
|
||||
// I_g_on_n[1][30] = 4.51e-10;
|
||||
// I_g_on_n[1][40] = 4.51e-10;
|
||||
// I_g_on_n[1][50] = 4.51e-10;
|
||||
// I_g_on_n[1][60] = 4.51e-10;
|
||||
// I_g_on_n[1][70] = 4.51e-10;
|
||||
// I_g_on_n[1][80] = 4.51e-10;
|
||||
// I_g_on_n[1][90] = 4.51e-10;
|
||||
// I_g_on_n[1][100] = 4.51e-10;
|
||||
|
||||
|
||||
if (ram_cell_tech_type == 3)
|
||||
{}
|
||||
else if (ram_cell_tech_type == 4)
|
||||
{
|
||||
if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) {
|
||||
//22 nm commodity DRAM cell access transistor technology parameters.
|
||||
//parameters
|
||||
curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In
|
||||
|
@ -1695,9 +1582,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
I_off_n[3][90] = 1.18e-11;
|
||||
I_off_n[3][100] = 1.72e-11;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
//some error handler
|
||||
}
|
||||
|
||||
|
@ -1865,20 +1750,22 @@ void init_tech_params(double technology, bool is_tag)
|
|||
|
||||
g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2;
|
||||
g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um;
|
||||
g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process
|
||||
g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process
|
||||
g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process
|
||||
g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process
|
||||
//was 10 micron for the 0.8 micron process
|
||||
g_tp.w_iso = 12.5 * g_ip->F_sz_um;
|
||||
// sense amplifier N-trans; was 3 micron for the 0.8 micron process
|
||||
g_tp.w_sense_n = 3.75 * g_ip->F_sz_um;
|
||||
// sense amplifier P-trans; was 6 micron for the 0.8 micron process
|
||||
g_tp.w_sense_p = 7.5 * g_ip->F_sz_um;
|
||||
// Sense enable transistor of the sense amplifier; was 4 micron for the
|
||||
//0.8 micron process
|
||||
g_tp.w_sense_en = 5 * g_ip->F_sz_um;
|
||||
g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_;
|
||||
g_tp.w_nmos_sa_mux= 6 * g_tp.min_w_nmos_;
|
||||
|
||||
if (ram_cell_tech_type == comm_dram)
|
||||
{
|
||||
if (ram_cell_tech_type == comm_dram) {
|
||||
g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um;
|
||||
g_tp.h_dec = 8; // in the unit of memory cell height
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
g_tp.max_w_nmos_dec = g_tp.max_w_nmos_;
|
||||
g_tp.h_dec = 4; // in the unit of memory cell height
|
||||
}
|
||||
|
@ -1921,36 +1808,25 @@ void init_tech_params(double technology, bool is_tag)
|
|||
miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES],
|
||||
ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES];
|
||||
|
||||
for (iter=0; iter<=1; ++iter)
|
||||
{
|
||||
for (iter = 0; iter <= 1; ++iter) {
|
||||
// linear interpolation
|
||||
if (iter == 0)
|
||||
{
|
||||
if (iter == 0) {
|
||||
tech = tech_lo;
|
||||
if (tech_lo == tech_hi)
|
||||
{
|
||||
if (tech_lo == tech_hi) {
|
||||
curr_alpha = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
tech = tech_hi;
|
||||
if (tech_lo == tech_hi)
|
||||
{
|
||||
if (tech_lo == tech_hi) {
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi);
|
||||
}
|
||||
}
|
||||
|
||||
if (tech == 180)
|
||||
{
|
||||
if (tech == 180) {
|
||||
//Aggressive projections
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
|
||||
aspect_ratio[0][0] = 2.0;
|
||||
|
@ -2060,9 +1936,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.18;
|
||||
wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18);
|
||||
wire_r_per_micron[1][3] = 12 / 0.18;
|
||||
}
|
||||
else if (tech == 90)
|
||||
{
|
||||
} else if (tech == 90) {
|
||||
//Aggressive projections
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron
|
||||
aspect_ratio[0][0] = 2.4;
|
||||
|
@ -2171,9 +2045,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.09;
|
||||
wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09);
|
||||
wire_r_per_micron[1][3] = 12 / 0.09;
|
||||
}
|
||||
else if (tech == 65)
|
||||
{
|
||||
} else if (tech == 65) {
|
||||
//Aggressive projections
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
|
||||
aspect_ratio[0][0] = 2.7;
|
||||
|
@ -2279,9 +2151,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.065;
|
||||
wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065);
|
||||
wire_r_per_micron[1][3] = 12 / 0.065;
|
||||
}
|
||||
else if (tech == 45)
|
||||
{
|
||||
} else if (tech == 45) {
|
||||
//Aggressive projections.
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
|
||||
aspect_ratio[0][0] = 3.0;
|
||||
|
@ -2387,9 +2257,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.045;
|
||||
wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045);
|
||||
wire_r_per_micron[1][3] = 12 / 0.045;
|
||||
}
|
||||
else if (tech == 32)
|
||||
{
|
||||
} else if (tech == 32) {
|
||||
//Aggressive projections.
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;
|
||||
aspect_ratio[0][0] = 3.0;
|
||||
|
@ -2494,9 +2362,7 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.032;//micron
|
||||
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron
|
||||
wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron
|
||||
}
|
||||
else if (tech == 22)
|
||||
{
|
||||
} else if (tech == 22) {
|
||||
//Aggressive projections.
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
|
||||
aspect_ratio[0][0] = 3.0;
|
||||
|
@ -2547,44 +2413,6 @@ void init_tech_params(double technology, bool is_tag)
|
|||
ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
|
||||
fringe_cap);
|
||||
|
||||
// //*************************
|
||||
// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][4] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][4] - wire_width;
|
||||
// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][5] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][5] - wire_width;
|
||||
// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][6] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][6] - wire_width;
|
||||
// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//*************************
|
||||
|
||||
//Conservative projections
|
||||
wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
|
||||
aspect_ratio[1][0] = 2.0;
|
||||
|
@ -2639,50 +2467,9 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.022;//micron
|
||||
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron
|
||||
wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron
|
||||
|
||||
//******************
|
||||
// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][4] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][4] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][5] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][5] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][6] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][6] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
}
|
||||
|
||||
else if (tech == 16)
|
||||
{
|
||||
else if (tech == 16) {
|
||||
//Aggressive projections.
|
||||
wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local
|
||||
aspect_ratio[0][0] = 3.0;
|
||||
|
@ -2733,44 +2520,6 @@ void init_tech_params(double technology, bool is_tag)
|
|||
ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2],
|
||||
fringe_cap);
|
||||
|
||||
// //*************************
|
||||
// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][4] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][4] - wire_width;
|
||||
// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][5] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][5] - wire_width;
|
||||
// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global
|
||||
// aspect_ratio = 3.0;
|
||||
// wire_width = wire_pitch[0][6] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[0][6] - wire_width;
|
||||
// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.3;
|
||||
// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//*************************
|
||||
|
||||
//Conservative projections
|
||||
wire_pitch[1][0] = 2.5 * g_ip->F_sz_um;
|
||||
aspect_ratio[1][0] = 2.0;
|
||||
|
@ -2825,83 +2574,75 @@ void init_tech_params(double technology, bool is_tag)
|
|||
wire_pitch[1][3] = 2 * 0.016;//micron
|
||||
wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron
|
||||
wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron
|
||||
|
||||
//******************
|
||||
// wire_pitch[1][4] = 16 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][4] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][4] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[1][5] = 24 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][5] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][5] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
//
|
||||
// wire_pitch[1][6] = 32 * g_ip.F_sz_um;
|
||||
// aspect_ratio = 2.2;
|
||||
// wire_width = wire_pitch[1][6] / 2;
|
||||
// wire_thickness = aspect_ratio * wire_width;
|
||||
// wire_spacing = wire_pitch[1][6] - wire_width;
|
||||
// dishing_thickness = 0.1 * wire_thickness;
|
||||
// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width,
|
||||
// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);
|
||||
// ild_thickness = 0.275;
|
||||
// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing,
|
||||
// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant,
|
||||
// fringe_cap);
|
||||
}
|
||||
g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0];
|
||||
g_tp.wire_local.pitch += curr_alpha *
|
||||
wire_pitch[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.R_per_um += curr_alpha *
|
||||
wire_r_per_micron[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.C_per_um += curr_alpha *
|
||||
wire_c_per_micron[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.aspect_ratio += curr_alpha *
|
||||
aspect_ratio[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.ild_thickness += curr_alpha *
|
||||
ild_thickness[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.miller_value += curr_alpha *
|
||||
miller_value[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.horiz_dielectric_constant += curr_alpha *
|
||||
horiz_dielectric_constant[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
g_tp.wire_local.vert_dielectric_constant += curr_alpha *
|
||||
vert_dielectric_constant[g_ip->ic_proj_type]
|
||||
[(ram_cell_tech_type == comm_dram) ? 3 : 0];
|
||||
|
||||
g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.pitch += curr_alpha *
|
||||
wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.R_per_um += curr_alpha *
|
||||
wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.C_per_um += curr_alpha *
|
||||
wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.aspect_ratio += curr_alpha *
|
||||
aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.ild_thickness += curr_alpha *
|
||||
ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.miller_value += curr_alpha *
|
||||
miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha *
|
||||
horiz_dielectric_constant[g_ip->ic_proj_type]
|
||||
[g_ip->wire_is_mat_type];
|
||||
g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha *
|
||||
vert_dielectric_constant [g_ip->ic_proj_type]
|
||||
[g_ip->wire_is_mat_type];
|
||||
|
||||
g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.pitch += curr_alpha *
|
||||
wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.R_per_um += curr_alpha *
|
||||
wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.C_per_um += curr_alpha *
|
||||
wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.aspect_ratio += curr_alpha *
|
||||
aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.ild_thickness += curr_alpha *
|
||||
ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.miller_value += curr_alpha *
|
||||
miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha *
|
||||
horiz_dielectric_constant[g_ip->ic_proj_type]
|
||||
[g_ip->wire_os_mat_type];
|
||||
g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha *
|
||||
vert_dielectric_constant [g_ip->ic_proj_type]
|
||||
[g_ip->wire_os_mat_type];
|
||||
|
||||
g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2;
|
||||
g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um *
|
||||
g_tp.wire_inside_mat.C_per_um / 2;
|
||||
|
||||
g_tp.sense_delay += curr_alpha * SENSE_AMP_D;
|
||||
g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P;
|
||||
// g_tp.horiz_dielectric_constant += horiz_dielectric_constant;
|
||||
// g_tp.vert_dielectric_constant += vert_dielectric_constant;
|
||||
// g_tp.aspect_ratio += aspect_ratio;
|
||||
// g_tp.miller_value += miller_value;
|
||||
// g_tp.ild_thickness += ild_thickness;
|
||||
|
||||
}
|
||||
g_tp.fringe_cap = fringe_cap;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -37,61 +38,78 @@
|
|||
#include "uca.h"
|
||||
|
||||
UCA::UCA(const DynamicParameter & dyn_p)
|
||||
:dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
|
||||
{
|
||||
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
|
||||
: dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
|
||||
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
|
||||
/ 2 : (_log2(nbanks) - _log2(nbanks) / 2));
|
||||
int num_banks_hor_dir = nbanks / num_banks_ver_dir;
|
||||
|
||||
if (dp.use_inp_params)
|
||||
{
|
||||
if (dp.use_inp_params) {
|
||||
RWP = dp.num_rw_ports;
|
||||
ERP = dp.num_rd_ports;
|
||||
EWP = dp.num_wr_ports;
|
||||
SCHP = dp.num_search_ports;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
RWP = g_ip->num_rw_ports;
|
||||
ERP = g_ip->num_rd_ports;
|
||||
EWP = g_ip->num_wr_ports;
|
||||
SCHP = g_ip->num_search_ports;
|
||||
}
|
||||
|
||||
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
|
||||
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
|
||||
(RWP + ERP + EWP);
|
||||
num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||
num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||
num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
|
||||
num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
|
||||
|
||||
if (!dp.fully_assoc && !dp.pure_cam)
|
||||
{
|
||||
if (!dp.fully_assoc && !dp.pure_cam) {
|
||||
|
||||
if (g_ip->fast_access && dp.is_tag == false)
|
||||
{
|
||||
if (g_ip->fast_access && dp.is_tag == false) {
|
||||
num_do_b_bank *= g_ip->data_assoc;
|
||||
}
|
||||
|
||||
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank, 0,
|
||||
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Add_htree, true);
|
||||
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank, 0,
|
||||
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Data_in_htree, true);
|
||||
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank, 0,
|
||||
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Data_out_htree, true);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
else {
|
||||
|
||||
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank,
|
||||
num_si_b_bank, num_do_b_bank, num_so_b_bank,
|
||||
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
|
||||
Add_htree, true);
|
||||
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank,
|
||||
num_si_b_bank, num_do_b_bank, num_so_b_bank,
|
||||
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
|
||||
Data_in_htree, true);
|
||||
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank,
|
||||
num_si_b_bank, num_do_b_bank,
|
||||
num_so_b_bank, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Data_out_htree, true);
|
||||
htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank,
|
||||
num_si_b_bank, num_do_b_bank,
|
||||
num_so_b_bank, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Data_in_htree, true);
|
||||
htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
||||
num_addr_b_bank, num_di_b_bank,
|
||||
num_si_b_bank, num_do_b_bank,
|
||||
num_so_b_bank, num_banks_ver_dir * 2,
|
||||
num_banks_hor_dir * 2, Data_out_htree,
|
||||
true);
|
||||
}
|
||||
|
||||
area.w = htree_in_data->area.w;
|
||||
|
@ -108,8 +126,7 @@ UCA::UCA(const DynamicParameter & dyn_p)
|
|||
|
||||
|
||||
|
||||
UCA::~UCA()
|
||||
{
|
||||
UCA::~UCA() {
|
||||
delete htree_in_add;
|
||||
delete htree_in_data;
|
||||
delete htree_out_data;
|
||||
|
@ -117,8 +134,7 @@ UCA::~UCA()
|
|||
|
||||
|
||||
|
||||
double UCA::compute_delays(double inrisetime)
|
||||
{
|
||||
double UCA::compute_delays(double inrisetime) {
|
||||
double outrisetime = bank.compute_delays(inrisetime);
|
||||
|
||||
double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
|
||||
|
@ -141,8 +157,7 @@ double UCA::compute_delays(double inrisetime)
|
|||
access_time = bank.mat.delay_comparator;
|
||||
|
||||
double ram_delay_inside_mat;
|
||||
if (dp.fully_assoc)
|
||||
{
|
||||
if (dp.fully_assoc) {
|
||||
//delay of FA contains both CAM tag and RAM data
|
||||
{ //delay of CAM
|
||||
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
||||
|
@ -150,14 +165,11 @@ double UCA::compute_delays(double inrisetime)
|
|||
//delay of fully-associative data array
|
||||
access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
|
||||
}
|
||||
|
||||
if (dp.is_main_mem)
|
||||
{
|
||||
if (dp.is_main_mem) {
|
||||
double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
|
||||
double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
|
||||
delay_from_subarray_out_drv_to_out;
|
||||
|
@ -166,11 +178,9 @@ double UCA::compute_delays(double inrisetime)
|
|||
|
||||
double temp;
|
||||
|
||||
if (!dp.fully_assoc)
|
||||
{
|
||||
if (!dp.fully_assoc) {
|
||||
temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
|
||||
if (dp.is_dram)
|
||||
{
|
||||
if (dp.is_dram) {
|
||||
temp += bank.mat.delay_writeback; // temp stores random cycle time
|
||||
}
|
||||
|
||||
|
@ -179,9 +189,7 @@ double UCA::compute_delays(double inrisetime)
|
|||
temp = MAX(temp, bank.mat.b_mux_predec->delay);
|
||||
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
|
||||
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
||||
temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
|
||||
+ bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
|
||||
|
@ -192,8 +200,7 @@ double UCA::compute_delays(double inrisetime)
|
|||
}
|
||||
|
||||
// The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
|
||||
if (g_ip->rpters_in_htree == false)
|
||||
{
|
||||
if (g_ip->rpters_in_htree == false) {
|
||||
temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
|
||||
}
|
||||
cycle_time = temp;
|
||||
|
@ -202,22 +209,18 @@ double UCA::compute_delays(double inrisetime)
|
|||
double delay_rep_network = delay_from_subarray_out_drv_to_out;
|
||||
multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
|
||||
|
||||
if (dp.is_main_mem)
|
||||
{
|
||||
if (dp.is_main_mem) {
|
||||
multisubbank_interleave_cycle_time = htree_in_add->delay;
|
||||
precharge_delay = htree_in_add->delay +
|
||||
bank.htree_in_add->delay + bank.mat.delay_writeback +
|
||||
bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
|
||||
cycle_time = access_time + precharge_delay;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
precharge_delay = 0;
|
||||
}
|
||||
|
||||
double dram_array_availability = 0;
|
||||
if (dp.is_dram)
|
||||
{
|
||||
if (dp.is_dram) {
|
||||
dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
|
||||
}
|
||||
|
||||
|
@ -227,25 +230,27 @@ double UCA::compute_delays(double inrisetime)
|
|||
|
||||
|
||||
// note: currently, power numbers are for a bank of an array
|
||||
void UCA::compute_power_energy()
|
||||
{
|
||||
void UCA::compute_power_energy() {
|
||||
bank.compute_power_energy();
|
||||
power = bank.power;
|
||||
|
||||
power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
|
||||
power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
|
||||
power_routing_to_bank.searchOp.dynamic =
|
||||
htree_in_search->power.searchOp.dynamic +
|
||||
htree_out_search->power.searchOp.dynamic;
|
||||
|
||||
power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
|
||||
power_routing_to_bank.readOp.leakage +=
|
||||
htree_in_add->power.readOp.leakage +
|
||||
htree_in_data->power.readOp.leakage +
|
||||
htree_out_data->power.readOp.leakage;
|
||||
|
||||
power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
|
||||
power_routing_to_bank.readOp.gate_leakage +=
|
||||
htree_in_add->power.readOp.gate_leakage +
|
||||
htree_in_data->power.readOp.gate_leakage +
|
||||
htree_out_data->power.readOp.gate_leakage;
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
if (dp.fully_assoc || dp.pure_cam) {
|
||||
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
||||
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
||||
}
|
||||
|
@ -264,8 +269,7 @@ void UCA::compute_power_energy()
|
|||
+ bank.htree_in_data->power.readOp.dynamic
|
||||
- bank.htree_out_data->power.readOp.dynamic;
|
||||
|
||||
if (dp.is_dram == false)
|
||||
{
|
||||
if (dp.is_dram == false) {
|
||||
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||
}
|
||||
|
||||
|
@ -370,15 +374,15 @@ void UCA::compute_power_energy()
|
|||
bank.htree_in_data->power.readOp.gate_leakage +
|
||||
bank.htree_out_data->power.readOp.gate_leakage;
|
||||
|
||||
if (dp.fully_assoc || dp.pure_cam)
|
||||
{
|
||||
if (dp.fully_assoc || dp.pure_cam) {
|
||||
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
||||
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
||||
}
|
||||
|
||||
|
||||
if (dp.is_dram)
|
||||
{ // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
|
||||
// if DRAM, add contribution of power spent in row predecoder drivers,
|
||||
// blocks and decoders to refresh power
|
||||
if (dp.is_dram) {
|
||||
refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
|
||||
bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
|
||||
refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
|
||||
|
@ -388,8 +392,7 @@ void UCA::compute_power_energy()
|
|||
}
|
||||
|
||||
|
||||
if (dp.is_tag == false)
|
||||
{
|
||||
if (dp.is_tag == false) {
|
||||
power.readOp.dynamic = dyn_read_energy_from_closed_page;
|
||||
power.writeOp.dynamic = dyn_read_energy_from_closed_page
|
||||
- dyn_read_energy_remaining_words_in_burst
|
||||
|
@ -401,15 +404,13 @@ void UCA::compute_power_energy()
|
|||
bank.htree_in_data->power.readOp.dynamic) *
|
||||
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
|
||||
|
||||
if (dp.is_dram == false)
|
||||
{
|
||||
if (dp.is_dram == false) {
|
||||
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||
}
|
||||
}
|
||||
|
||||
// if DRAM, add refresh power to total leakage
|
||||
if (dp.is_dram)
|
||||
{
|
||||
if (dp.is_dram) {
|
||||
power.readOp.leakage += refresh_power;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -40,8 +41,7 @@
|
|||
#include "htree2.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class UCA : public Component
|
||||
{
|
||||
class UCA : public Component {
|
||||
public:
|
||||
UCA(const DynamicParameter & dyn_p);
|
||||
~UCA();
|
||||
|
@ -66,7 +66,10 @@ class UCA : public Component
|
|||
int num_do_b_bank;
|
||||
int num_si_b_bank;
|
||||
int num_so_b_bank;
|
||||
int RWP, ERP, EWP,SCHP;
|
||||
int RWP;
|
||||
int ERP;
|
||||
int EWP;
|
||||
int SCHP;
|
||||
double area_all_dataramcells;
|
||||
|
||||
double dyn_read_energy_from_closed_page;
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -41,9 +42,9 @@ Wire::Wire(
|
|||
enum Wire_placement wp,
|
||||
double resistivity,
|
||||
TechnologyParameter::DeviceType *dt
|
||||
):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s),
|
||||
resistivity(resistivity), deviceType(dt)
|
||||
{
|
||||
): wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s),
|
||||
s_scale(s_s),
|
||||
resistivity(resistivity), deviceType(dt) {
|
||||
wire_placement = wp;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||
in_rise_time = 0;
|
||||
|
@ -78,8 +79,8 @@ Wire::Wire(
|
|||
double Wire::wire_spacing_init;
|
||||
|
||||
|
||||
Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt)
|
||||
{
|
||||
Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis,
|
||||
TechnologyParameter::DeviceType *dt) {
|
||||
w_scale = w_s;
|
||||
s_scale = s_s;
|
||||
deviceType = dt;
|
||||
|
@ -89,11 +90,16 @@ Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, Technol
|
|||
in_rise_time = 0;
|
||||
out_rise_time = 0;
|
||||
|
||||
switch (wire_placement)
|
||||
{
|
||||
case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break;
|
||||
case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break;
|
||||
default: wire_width = g_tp.wire_local.pitch; break;
|
||||
switch (wire_placement) {
|
||||
case outside_mat:
|
||||
wire_width = g_tp.wire_outside_mat.pitch;
|
||||
break;
|
||||
case inside_mat :
|
||||
wire_width = g_tp.wire_inside_mat.pitch;
|
||||
break;
|
||||
default:
|
||||
wire_width = g_tp.wire_local.pitch;
|
||||
break;
|
||||
}
|
||||
|
||||
wire_spacing = wire_width;
|
||||
|
@ -113,23 +119,19 @@ Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, Technol
|
|||
|
||||
|
||||
|
||||
Wire::~Wire()
|
||||
{
|
||||
Wire::~Wire() {
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
Wire::calculate_wire_stats()
|
||||
{
|
||||
Wire::calculate_wire_stats() {
|
||||
|
||||
if (wire_placement == outside_mat) {
|
||||
wire_width = g_tp.wire_outside_mat.pitch;
|
||||
}
|
||||
else if (wire_placement == inside_mat) {
|
||||
} else if (wire_placement == inside_mat) {
|
||||
wire_width = g_tp.wire_inside_mat.pitch;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
wire_width = g_tp.wire_local.pitch;
|
||||
}
|
||||
|
||||
|
@ -152,9 +154,9 @@ Wire::calculate_wire_stats()
|
|||
repeater_size = global.area.h;
|
||||
area.set_area((wire_length / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_size,
|
||||
g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
|
||||
}
|
||||
else if (wt == Global_5) {
|
||||
g_tp.min_w_nmos_ * repeater_size,
|
||||
g_tp.cell_h_def));
|
||||
} else if (wt == Global_5) {
|
||||
delay = global_5.delay * wire_length;
|
||||
power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length;
|
||||
power.readOp.leakage = global_5.power.readOp.leakage * wire_length;
|
||||
|
@ -163,9 +165,9 @@ Wire::calculate_wire_stats()
|
|||
repeater_size = global_5.area.h;
|
||||
area.set_area((wire_length / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_size,
|
||||
g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
|
||||
}
|
||||
else if (wt == Global_10) {
|
||||
g_tp.min_w_nmos_ * repeater_size,
|
||||
g_tp.cell_h_def));
|
||||
} else if (wt == Global_10) {
|
||||
delay = global_10.delay * wire_length;
|
||||
power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length;
|
||||
power.readOp.leakage = global_10.power.readOp.leakage * wire_length;
|
||||
|
@ -174,9 +176,9 @@ Wire::calculate_wire_stats()
|
|||
repeater_size = global_10.area.h;
|
||||
area.set_area((wire_length / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_size,
|
||||
g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
|
||||
}
|
||||
else if (wt == Global_20) {
|
||||
g_tp.min_w_nmos_ * repeater_size,
|
||||
g_tp.cell_h_def));
|
||||
} else if (wt == Global_20) {
|
||||
delay = global_20.delay * wire_length;
|
||||
power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length;
|
||||
power.readOp.leakage = global_20.power.readOp.leakage * wire_length;
|
||||
|
@ -185,9 +187,9 @@ Wire::calculate_wire_stats()
|
|||
repeater_size = global_20.area.h;
|
||||
area.set_area((wire_length / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_size,
|
||||
g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
|
||||
}
|
||||
else if (wt == Global_30) {
|
||||
g_tp.min_w_nmos_ * repeater_size,
|
||||
g_tp.cell_h_def));
|
||||
} else if (wt == Global_30) {
|
||||
delay = global_30.delay * wire_length;
|
||||
power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length;
|
||||
power.readOp.leakage = global_30.power.readOp.leakage * wire_length;
|
||||
|
@ -196,16 +198,15 @@ Wire::calculate_wire_stats()
|
|||
repeater_size = global_30.area.h;
|
||||
area.set_area((wire_length / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_size,
|
||||
g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def));
|
||||
g_tp.min_w_nmos_ * repeater_size,
|
||||
g_tp.cell_h_def));
|
||||
}
|
||||
out_rise_time = delay * repeater_spacing / deviceType->Vth;
|
||||
}
|
||||
else if (wt == Low_swing) {
|
||||
} else if (wt == Low_swing) {
|
||||
low_swing_model ();
|
||||
repeater_spacing = wire_length;
|
||||
repeater_size = 1;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
@ -219,8 +220,7 @@ Wire::calculate_wire_stats()
|
|||
* section 6.1.3)
|
||||
*/
|
||||
double
|
||||
Wire::signal_fall_time ()
|
||||
{
|
||||
Wire::signal_fall_time () {
|
||||
|
||||
/* rise time of inverter 1's output */
|
||||
double rt;
|
||||
|
@ -232,19 +232,21 @@ Wire::signal_fall_time ()
|
|||
drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
|
||||
tr_R_on(min_w_pmos, PCH, 1);
|
||||
rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
|
||||
rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
|
||||
deviceType->Vth / deviceType->Vdd, FALL) /
|
||||
(deviceType->Vdd - deviceType->Vth);
|
||||
timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
|
||||
tr_R_on(g_tp.min_w_nmos_, NCH, 1);
|
||||
ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
|
||||
ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
|
||||
deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
|
||||
return ft;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double Wire::signal_rise_time ()
|
||||
{
|
||||
double Wire::signal_rise_time () {
|
||||
|
||||
/* rise time of inverter 1's output */
|
||||
double ft;
|
||||
|
@ -256,12 +258,15 @@ double Wire::signal_rise_time ()
|
|||
drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
|
||||
tr_R_on(g_tp.min_w_nmos_, NCH, 1);
|
||||
rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth;
|
||||
rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd,
|
||||
deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth;
|
||||
timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) *
|
||||
tr_R_on(min_w_pmos, PCH, 1);
|
||||
ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth);
|
||||
ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd,
|
||||
deviceType->Vth / deviceType->Vdd, FALL) /
|
||||
(deviceType->Vdd - deviceType->Vth);
|
||||
return ft; //sec
|
||||
}
|
||||
|
||||
|
@ -281,18 +286,19 @@ double Wire::signal_rise_time ()
|
|||
*
|
||||
*/
|
||||
|
||||
double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
||||
{
|
||||
double Wire::wire_cap (double len /* in m */, bool call_from_outside) {
|
||||
//TODO: this should be consistent with the wire_res in technology file
|
||||
double sidewall, adj, tot_cap;
|
||||
double wire_height;
|
||||
double epsilon0 = 8.8542e-12;
|
||||
double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness;
|
||||
double aspect_ratio;
|
||||
double horiz_dielectric_constant;
|
||||
double vert_dielectric_constant;
|
||||
double miller_value;
|
||||
double ild_thickness;
|
||||
|
||||
switch (wire_placement)
|
||||
{
|
||||
case outside_mat:
|
||||
{
|
||||
switch (wire_placement) {
|
||||
case outside_mat: {
|
||||
aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
|
||||
horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant;
|
||||
vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant;
|
||||
|
@ -300,8 +306,7 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
ild_thickness = g_tp.wire_outside_mat.ild_thickness;
|
||||
break;
|
||||
}
|
||||
case inside_mat :
|
||||
{
|
||||
case inside_mat : {
|
||||
aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
|
||||
horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant;
|
||||
vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant;
|
||||
|
@ -309,8 +314,7 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
ild_thickness = g_tp.wire_inside_mat.ild_thickness;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
default: {
|
||||
aspect_ratio = g_tp.wire_local.aspect_ratio;
|
||||
horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant;
|
||||
vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant;
|
||||
|
@ -320,8 +324,7 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
}
|
||||
}
|
||||
|
||||
if (call_from_outside)
|
||||
{
|
||||
if (call_from_outside) {
|
||||
wire_width *= 1e-6;
|
||||
wire_spacing *= 1e-6;
|
||||
}
|
||||
|
@ -335,7 +338,8 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
// sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
|
||||
// * epsilon0;
|
||||
|
||||
sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing)
|
||||
sidewall = miller_value * horiz_dielectric_constant *
|
||||
(wire_height / wire_spacing)
|
||||
* epsilon0;
|
||||
|
||||
|
||||
|
@ -343,14 +347,14 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
//adj = miller_value * vert_dielectric_constant *w_scale * epsilon0;
|
||||
//adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
|
||||
|
||||
adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0;
|
||||
adj = miller_value * vert_dielectric_constant * wire_width /
|
||||
(ild_thickness * 1e-6) * epsilon0;
|
||||
//Change ild_thickness from micron to M
|
||||
|
||||
//tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m
|
||||
tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m
|
||||
|
||||
if (call_from_outside)
|
||||
{
|
||||
if (call_from_outside) {
|
||||
wire_width *= 1e6;
|
||||
wire_spacing *= 1e6;
|
||||
}
|
||||
|
@ -359,32 +363,32 @@ double Wire::wire_cap (double len /* in m */, bool call_from_outside)
|
|||
|
||||
|
||||
double
|
||||
Wire::wire_res (double len /*(in m)*/)
|
||||
{
|
||||
Wire::wire_res (double len /*(in m)*/) {
|
||||
|
||||
double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0;
|
||||
double aspect_ratio;
|
||||
double alpha_scatter = 1.05;
|
||||
double dishing_thickness = 0;
|
||||
double barrier_thickness = 0;
|
||||
//TODO: this should be consistent with the wire_res in technology file
|
||||
//The whole computation should be consistent with the wire_res in technology.cc too!
|
||||
|
||||
switch (wire_placement)
|
||||
{
|
||||
case outside_mat:
|
||||
{
|
||||
switch (wire_placement) {
|
||||
case outside_mat: {
|
||||
aspect_ratio = g_tp.wire_outside_mat.aspect_ratio;
|
||||
break;
|
||||
}
|
||||
case inside_mat :
|
||||
{
|
||||
case inside_mat : {
|
||||
aspect_ratio = g_tp.wire_inside_mat.aspect_ratio;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
default: {
|
||||
aspect_ratio = g_tp.wire_local.aspect_ratio;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)*
|
||||
return (alpha_scatter * resistivity * 1e-6 * len /
|
||||
((aspect_ratio*wire_width / w_scale - dishing_thickness -
|
||||
barrier_thickness)*
|
||||
(wire_width - 2*barrier_thickness)));
|
||||
}
|
||||
|
||||
|
@ -396,8 +400,7 @@ Wire::wire_res (double len /*(in m)*/)
|
|||
* (ref: Technical report 6)
|
||||
*/
|
||||
void
|
||||
Wire::low_swing_model()
|
||||
{
|
||||
Wire::low_swing_model() {
|
||||
double len = wire_length;
|
||||
double beta = pmos_to_nmos_sz_ratio();
|
||||
|
||||
|
@ -426,8 +429,7 @@ Wire::low_swing_model()
|
|||
nsize = MIN(nsize, g_tp.max_w_nmos_);
|
||||
nsize = MAX(nsize, g_tp.min_w_nmos_);
|
||||
|
||||
if(rwire*cwire > 8*g_tp.FO4)
|
||||
{
|
||||
if (rwire*cwire > 8*g_tp.FO4) {
|
||||
nsize = g_tp.max_w_nmos_;
|
||||
}
|
||||
|
||||
|
@ -435,10 +437,12 @@ Wire::low_swing_model()
|
|||
// Note - In order to minimize leakage, we are not adding a set of inverters to
|
||||
// bring down delay. Instead, we are sizing the single gate
|
||||
// based on the logical effort.
|
||||
double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0)
|
||||
double st_eff = sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) /
|
||||
(gate_C(2 * g_tp.min_w_nmos_, 0)
|
||||
+ gate_C(2 * min_w_pmos, 0)));
|
||||
double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff;
|
||||
double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0));
|
||||
double inv_size = req_cin / (gate_C(min_w_pmos, 0) +
|
||||
gate_C(g_tp.min_w_nmos_, 0));
|
||||
inv_size = MAX(inv_size, 1);
|
||||
|
||||
/* nand gate delay */
|
||||
|
@ -473,7 +477,8 @@ Wire::low_swing_model()
|
|||
|
||||
|
||||
transmitter.delay = delay;
|
||||
transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/
|
||||
/* since it is a diff. model*/
|
||||
transmitter.power.readOp.dynamic = temp_power * 2;
|
||||
transmitter.power.readOp.leakage = deviceType->Vdd *
|
||||
(4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) +
|
||||
4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv));
|
||||
|
@ -504,7 +509,8 @@ Wire::low_swing_model()
|
|||
* swing wires, the net time constant is less
|
||||
* than the actual value
|
||||
*/
|
||||
delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0);
|
||||
delay += horowitz(inputrise, timeconst, deviceType->Vth /
|
||||
deviceType->Vdd, .25, 0);
|
||||
#define VOL_SWING .1
|
||||
temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */
|
||||
temp_power *= 2; /* differential wire */
|
||||
|
@ -538,8 +544,7 @@ Wire::low_swing_model()
|
|||
}
|
||||
|
||||
double
|
||||
Wire::sense_amp_input_cap()
|
||||
{
|
||||
Wire::sense_amp_input_cap() {
|
||||
return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) +
|
||||
gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) +
|
||||
drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) +
|
||||
|
@ -547,8 +552,7 @@ Wire::sense_amp_input_cap()
|
|||
}
|
||||
|
||||
|
||||
void Wire::delay_optimal_wire ()
|
||||
{
|
||||
void Wire::delay_optimal_wire () {
|
||||
double len = wire_length;
|
||||
//double min_wire_width = wire_width; //m
|
||||
double beta = pmos_to_nmos_sz_ratio();
|
||||
|
@ -571,7 +575,8 @@ void Wire::delay_optimal_wire ()
|
|||
double wc = wire_cap(len);
|
||||
|
||||
// size the repeater such that the delay of the wire is minimum
|
||||
double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel
|
||||
// len will cancel
|
||||
double repeater_scaling = sqrt(out_res * wc / (wr * input_cap));
|
||||
|
||||
// calc the optimum spacing between the repeaters (m)
|
||||
|
||||
|
@ -580,7 +585,8 @@ void Wire::delay_optimal_wire ()
|
|||
repeater_size = repeater_scaling;
|
||||
|
||||
switching = (repeater_scaling * (input_cap + out_cap) +
|
||||
repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
|
||||
repeater_spacing * (wc / len)) * deviceType->Vdd *
|
||||
deviceType->Vdd;
|
||||
|
||||
tc = out_res * (input_cap + out_cap) +
|
||||
out_res * wc / len * repeater_spacing / repeater_scaling +
|
||||
|
@ -595,14 +601,21 @@ void Wire::delay_optimal_wire ()
|
|||
|
||||
area.set_area((len / repeater_spacing) *
|
||||
compute_gate_area(INV, 1, min_w_pmos * repeater_scaling,
|
||||
g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def));
|
||||
g_tp.min_w_nmos_ * repeater_scaling,
|
||||
g_tp.cell_h_def));
|
||||
power.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
|
||||
power.readOp.leakage = ((len / repeater_spacing) *
|
||||
deviceType->Vdd *
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_ *
|
||||
repeater_scaling, beta *
|
||||
g_tp.min_w_nmos_ *
|
||||
repeater_scaling, 1, inv));
|
||||
power.readOp.gate_leakage = ((len / repeater_spacing) *
|
||||
deviceType->Vdd *
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv));
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_ *
|
||||
repeater_scaling, beta *
|
||||
g_tp.min_w_nmos_ *
|
||||
repeater_scaling, 1, inv));
|
||||
}
|
||||
|
||||
|
||||
|
@ -652,8 +665,7 @@ Wire::init_wire(){
|
|||
|
||||
|
||||
|
||||
void Wire::update_fullswing()
|
||||
{
|
||||
void Wire::update_fullswing() {
|
||||
|
||||
list<Component>::iterator citer;
|
||||
double del[4];
|
||||
|
@ -668,34 +680,30 @@ void Wire::update_fullswing()
|
|||
while (i > 0) {
|
||||
threshold = del[i-1];
|
||||
cost = BIGNUM;
|
||||
for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++)
|
||||
{
|
||||
for (citer = repeated_wire.begin(); citer != repeated_wire.end();
|
||||
citer++) {
|
||||
if (citer->delay > threshold) {
|
||||
citer = repeated_wire.erase(citer);
|
||||
citer --;
|
||||
}
|
||||
else {
|
||||
ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic +
|
||||
} else {
|
||||
ncost = citer->power.readOp.dynamic /
|
||||
global.power.readOp.dynamic +
|
||||
citer->power.readOp.leakage / global.power.readOp.leakage;
|
||||
if(ncost < cost)
|
||||
{
|
||||
if (ncost < cost) {
|
||||
cost = ncost;
|
||||
if (i == 4) {
|
||||
global_30.delay = citer->delay;
|
||||
global_30.power = citer->power;
|
||||
global_30.area = citer->area;
|
||||
}
|
||||
else if (i==3) {
|
||||
} else if (i == 3) {
|
||||
global_20.delay = citer->delay;
|
||||
global_20.power = citer->power;
|
||||
global_20.area = citer->area;
|
||||
}
|
||||
else if(i==2) {
|
||||
} else if (i == 2) {
|
||||
global_10.delay = citer->delay;
|
||||
global_10.power = citer->power;
|
||||
global_10.area = citer->area;
|
||||
}
|
||||
else if(i==1) {
|
||||
} else if (i == 1) {
|
||||
global_5.delay = citer->delay;
|
||||
global_5.power = citer->power;
|
||||
global_5.area = citer->area;
|
||||
|
@ -709,8 +717,7 @@ void Wire::update_fullswing()
|
|||
|
||||
|
||||
|
||||
powerDef Wire::wire_model (double space, double size, double *delay)
|
||||
{
|
||||
powerDef Wire::wire_model (double space, double size, double *delay) {
|
||||
powerDef ptemp;
|
||||
double len = 1;
|
||||
//double min_wire_width = wire_width; //m
|
||||
|
@ -741,7 +748,8 @@ powerDef Wire::wire_model (double space, double size, double *delay)
|
|||
repeater_size = size;
|
||||
|
||||
switching = (repeater_size * (input_cap + out_cap) +
|
||||
repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd;
|
||||
repeater_spacing * (wc / len)) * deviceType->Vdd *
|
||||
deviceType->Vdd;
|
||||
|
||||
tc = out_res * (input_cap + out_cap) +
|
||||
out_res * wc / len * repeater_spacing / repeater_size +
|
||||
|
@ -757,18 +765,23 @@ powerDef Wire::wire_model (double space, double size, double *delay)
|
|||
ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt));
|
||||
ptemp.readOp.leakage = ((len / repeater_spacing) *
|
||||
deviceType->Vdd *
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
|
||||
cmos_Isub_leakage(g_tp.min_w_nmos_ *
|
||||
repeater_size, beta *
|
||||
g_tp.min_w_nmos_ *
|
||||
repeater_size, 1, inv));
|
||||
|
||||
ptemp.readOp.gate_leakage = ((len / repeater_spacing) *
|
||||
deviceType->Vdd *
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv));
|
||||
cmos_Ig_leakage(g_tp.min_w_nmos_ *
|
||||
repeater_size, beta *
|
||||
g_tp.min_w_nmos_ *
|
||||
repeater_size, 1, inv));
|
||||
|
||||
return ptemp;
|
||||
}
|
||||
|
||||
void
|
||||
Wire::print_wire()
|
||||
{
|
||||
Wire::print_wire() {
|
||||
|
||||
cout << "\nWire Properties:\n\n";
|
||||
cout << " Delay Optimal\n\tRepeater size - " << global.area.h <<
|
||||
|
@ -776,7 +789,8 @@ Wire::print_wire()
|
|||
" \n\tDelay - " << global.delay*1e6 << " (ns/mm)"
|
||||
" \n\tPowerD - " << global.power.readOp.dynamic *1e6 << " (nJ/mm)"
|
||||
" \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)"
|
||||
" \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n";
|
||||
" \n\tPowerLgate - " << global.power.readOp.gate_leakage <<
|
||||
" (mW/mm)\n";
|
||||
cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
|
||||
cout << endl;
|
||||
|
@ -786,7 +800,8 @@ Wire::print_wire()
|
|||
" \n\tDelay - " << global_5.delay *1e6 << " (ns/mm)"
|
||||
" \n\tPowerD - " << global_5.power.readOp.dynamic *1e6 << " (nJ/mm)"
|
||||
" \n\tPowerL - " << global_5.power.readOp.leakage << " (mW/mm)"
|
||||
" \n\tPowerLgate - " << global_5.power.readOp.gate_leakage << " (mW/mm)\n";
|
||||
" \n\tPowerLgate - " << global_5.power.readOp.gate_leakage <<
|
||||
" (mW/mm)\n";
|
||||
cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
|
||||
cout << endl;
|
||||
|
@ -795,7 +810,8 @@ Wire::print_wire()
|
|||
" \n\tDelay - " << global_10.delay *1e6 << " (ns/mm)"
|
||||
" \n\tPowerD - " << global_10.power.readOp.dynamic *1e6 << " (nJ/mm)"
|
||||
" \n\tPowerL - " << global_10.power.readOp.leakage << " (mW/mm)"
|
||||
" \n\tPowerLgate - " << global_10.power.readOp.gate_leakage << " (mW/mm)\n";
|
||||
" \n\tPowerLgate - " << global_10.power.readOp.gate_leakage <<
|
||||
" (mW/mm)\n";
|
||||
cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
|
||||
cout << endl;
|
||||
|
@ -804,7 +820,8 @@ Wire::print_wire()
|
|||
" \n\tDelay - " << global_20.delay *1e6 << " (ns/mm)"
|
||||
" \n\tPowerD - " << global_20.power.readOp.dynamic *1e6 << " (nJ/mm)"
|
||||
" \n\tPowerL - " << global_20.power.readOp.leakage << " (mW/mm)"
|
||||
" \n\tPowerLgate - " << global_20.power.readOp.gate_leakage << " (mW/mm)\n";
|
||||
" \n\tPowerLgate - " << global_20.power.readOp.gate_leakage <<
|
||||
" (mW/mm)\n";
|
||||
cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
|
||||
cout << endl;
|
||||
|
@ -813,18 +830,23 @@ Wire::print_wire()
|
|||
" \n\tDelay - " << global_30.delay *1e6 << " (ns/mm)"
|
||||
" \n\tPowerD - " << global_30.power.readOp.dynamic *1e6 << " (nJ/mm)"
|
||||
" \n\tPowerL - " << global_30.power.readOp.leakage << " (mW/mm)"
|
||||
" \n\tPowerLgate - " << global_30.power.readOp.gate_leakage << " (mW/mm)\n";
|
||||
" \n\tPowerLgate - " << global_30.power.readOp.gate_leakage <<
|
||||
" (mW/mm)\n";
|
||||
cout << "\tWire width - " << wire_width_init*1e6 << " microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init*1e6 << " microns\n";
|
||||
cout << endl;
|
||||
cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\tdelay and power "
|
||||
"values of low-swing wires do not\n\thave a linear relationship with length." <<
|
||||
cout << " Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\t" <<
|
||||
"delay and power values of low-swing wires do not\n\t" <<
|
||||
"have a linear relationship with length." <<
|
||||
" \n\tdelay - " << low_swing.delay *1e9 << " (ns)"
|
||||
" \n\tpowerD - " << low_swing.power.readOp.dynamic *1e9 << " (nJ)"
|
||||
" \n\tPowerL - " << low_swing.power.readOp.leakage << " (mW)"
|
||||
" \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage << " (mW)\n";
|
||||
cout << "\tWire width - " <<wire_width_init * 2 /* differential */<< " microns\n";
|
||||
cout << "\tWire spacing - " <<wire_spacing_init * 2 /* differential */<< " microns\n";
|
||||
" \n\tPowerLgate - " << low_swing.power.readOp.gate_leakage <<
|
||||
" (mW)\n";
|
||||
cout << "\tWire width - " << wire_width_init * 2 /* differential */ <<
|
||||
" microns\n";
|
||||
cout << "\tWire spacing - " << wire_spacing_init * 2 /* differential */ <<
|
||||
" microns\n";
|
||||
cout << endl;
|
||||
cout << endl;
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT/CACTI
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -43,8 +44,7 @@
|
|||
#include "component.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class Wire : public Component
|
||||
{
|
||||
class Wire : public Component {
|
||||
public:
|
||||
Wire(enum Wire_type wire_model, double len /* in u*/,
|
||||
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
|
||||
|
@ -81,8 +81,7 @@ class Wire : public Component
|
|||
double wire_length;
|
||||
double in_rise_time, out_rise_time;
|
||||
|
||||
void set_in_rise_time(double rt)
|
||||
{
|
||||
void set_in_rise_time(double rt) {
|
||||
in_rise_time = rt;
|
||||
}
|
||||
static Component global;
|
||||
|
|
65
ext/mcpat/common.h
Normal file
65
ext/mcpat/common.h
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Author: Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef __COMMON_H__
|
||||
#define __COMMON_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "xmlParser.h"
|
||||
|
||||
// Macro definitions to do string comparson to specific parameter/stat.
|
||||
// Note: These macros assume node_name and value variables of type XMLCSTR
|
||||
// to exist already.
|
||||
#define STRCMP(var, str) else if (strcmp(var, str) == 0)
|
||||
|
||||
#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \
|
||||
lhs = atoi(value)
|
||||
|
||||
#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \
|
||||
lhs = atof(value)
|
||||
|
||||
#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \
|
||||
lhs = string(value)
|
||||
|
||||
#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \
|
||||
lhs = (etype)atoi(value)
|
||||
|
||||
|
||||
// Constants shared across many system components
|
||||
#define BITS_PER_BYTE 8.0
|
||||
#define MIN_BUFFER_SIZE 64
|
||||
// CAM structures do not have any associativity
|
||||
#define CAM_ASSOC 0
|
||||
|
||||
#endif // __COMMON_H__
|
6602
ext/mcpat/core.cc
6602
ext/mcpat/core.cc
File diff suppressed because it is too large
Load diff
312
ext/mcpat/core.h
312
ext/mcpat/core.h
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -33,94 +34,131 @@
|
|||
#ifndef CORE_H_
|
||||
#define CORE_H_
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "cacheunit.h"
|
||||
#include "interconnect.h"
|
||||
#include "logic.h"
|
||||
#include "parameter.h"
|
||||
#include "sharedcache.h"
|
||||
|
||||
class BranchPredictor :public Component {
|
||||
// Macros used in the various core-related classes
|
||||
#define NUM_SOURCE_OPERANDS 2
|
||||
#define NUM_INT_INST_SOURCE_OPERANDS 2
|
||||
|
||||
class BranchPredictorParameters {
|
||||
public:
|
||||
int assoc;
|
||||
int nbanks;
|
||||
int local_l1_predictor_size;
|
||||
int local_l2_predictor_size;
|
||||
int local_predictor_entries;
|
||||
int global_predictor_bits;
|
||||
int global_predictor_entries;
|
||||
int chooser_predictor_bits;
|
||||
int chooser_predictor_entries;
|
||||
};
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
class BranchPredictor : public McPATComponent {
|
||||
public:
|
||||
ArrayST* globalBPT;
|
||||
ArrayST* localBPT;
|
||||
ArrayST* L1_localBPT;
|
||||
ArrayST* L2_localBPT;
|
||||
ArrayST* chooser;
|
||||
ArrayST* RAS;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
BranchPredictorParameters branch_pred_params;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
bool exist;
|
||||
|
||||
BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exsit = true);
|
||||
void set_params_stats();
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~BranchPredictor();
|
||||
};
|
||||
|
||||
|
||||
class InstFetchU :public Component {
|
||||
class InstFetchParameters {
|
||||
public:
|
||||
int btb_size;
|
||||
int btb_block_size;
|
||||
int btb_assoc;
|
||||
int btb_num_banks;
|
||||
int btb_latency;
|
||||
int btb_throughput;
|
||||
int btb_rw_ports;
|
||||
};
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
enum Cache_policy cache_p;
|
||||
InstCache icache;
|
||||
class InstFetchStatistics {
|
||||
public:
|
||||
double btb_read_accesses;
|
||||
double btb_write_accesses;
|
||||
};
|
||||
|
||||
class InstFetchU : public McPATComponent {
|
||||
public:
|
||||
CacheUnit* icache;
|
||||
ArrayST* IB;
|
||||
ArrayST* BTB;
|
||||
BranchPredictor* BPT;
|
||||
inst_decoder * ID_inst;
|
||||
inst_decoder * ID_operand;
|
||||
inst_decoder * ID_misc;
|
||||
InstructionDecoder* ID_inst;
|
||||
InstructionDecoder* ID_operand;
|
||||
InstructionDecoder* ID_misc;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
InstFetchParameters inst_fetch_params;
|
||||
InstFetchStatistics inst_fetch_stats;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
enum Cache_policy cache_p;
|
||||
bool exist;
|
||||
|
||||
InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exsit = true);
|
||||
void set_params_stats();
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~InstFetchU();
|
||||
};
|
||||
|
||||
|
||||
class SchedulerU :public Component {
|
||||
class SchedulerU : public McPATComponent {
|
||||
public:
|
||||
static int ROB_STATUS_BITS;
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double Iw_height, fp_Iw_height,ROB_height;
|
||||
ArrayST* int_inst_window;
|
||||
ArrayST* fp_inst_window;
|
||||
ArrayST* ROB;
|
||||
selection_logic * instruction_selection;
|
||||
selection_logic* int_instruction_selection;
|
||||
selection_logic* fp_instruction_selection;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double Iw_height, fp_Iw_height, ROB_height;
|
||||
bool exist;
|
||||
|
||||
SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exist_ = true);
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~SchedulerU();
|
||||
};
|
||||
|
||||
class RENAMINGU :public Component {
|
||||
class RENAMINGU : public McPATComponent {
|
||||
public:
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
double clockRate,executionTime;
|
||||
CoreDynParam coredynp;
|
||||
ArrayST* iFRAT;
|
||||
ArrayST* fFRAT;
|
||||
ArrayST* iRRAT;
|
||||
|
@ -129,118 +167,149 @@ class RENAMINGU :public Component {
|
|||
ArrayST* ffreeL;
|
||||
dep_resource_conflict_check* idcl;
|
||||
dep_resource_conflict_check* fdcl;
|
||||
ArrayST * RAHT;//register alias history table Used to store GC
|
||||
ArrayST* RAHT;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
bool exist;
|
||||
|
||||
|
||||
RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exist_ = true);
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~RENAMINGU();
|
||||
};
|
||||
|
||||
class LoadStoreU :public Component {
|
||||
class LoadStoreU : public McPATComponent {
|
||||
public:
|
||||
CacheUnit* dcache;
|
||||
ArrayST* LSQ;
|
||||
ArrayST* LoadQ;
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
enum Cache_policy cache_p;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double lsq_height;
|
||||
DataCache dcache;
|
||||
ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
|
||||
ArrayST * LoadQ;
|
||||
bool exist;
|
||||
|
||||
LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exist_ = true);
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~LoadStoreU();
|
||||
};
|
||||
|
||||
class MemManU :public Component {
|
||||
class MemoryManagementParams {
|
||||
public:
|
||||
int itlb_number_entries;
|
||||
double itlb_latency;
|
||||
double itlb_throughput;
|
||||
int itlb_assoc;
|
||||
int itlb_nbanks;
|
||||
int dtlb_number_entries;
|
||||
double dtlb_latency;
|
||||
double dtlb_throughput;
|
||||
int dtlb_assoc;
|
||||
int dtlb_nbanks;
|
||||
};
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
class MemoryManagementStats {
|
||||
public:
|
||||
double itlb_total_accesses;
|
||||
double itlb_total_misses;
|
||||
double itlb_conflicts;
|
||||
double dtlb_read_accesses;
|
||||
double dtlb_read_misses;
|
||||
double dtlb_write_accesses;
|
||||
double dtlb_write_misses;
|
||||
double dtlb_conflicts;
|
||||
};
|
||||
|
||||
class MemManU : public McPATComponent {
|
||||
public:
|
||||
ArrayST* itlb;
|
||||
ArrayST* dtlb;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
MemoryManagementParams mem_man_params;
|
||||
MemoryManagementStats mem_man_stats;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
bool exist;
|
||||
|
||||
MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats, bool exist_ = true);
|
||||
void set_params_stats();
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~MemManU();
|
||||
};
|
||||
|
||||
class RegFU :public Component {
|
||||
class RegFU : public McPATComponent {
|
||||
public:
|
||||
static int RFWIN_ACCESS_MULTIPLIER;
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double int_regfile_height, fp_regfile_height;
|
||||
ArrayST* IRF;
|
||||
ArrayST* FRF;
|
||||
ArrayST* RFWIN;
|
||||
|
||||
InputParameter interface_ip;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double int_regfile_height, fp_regfile_height;
|
||||
bool exist;
|
||||
|
||||
RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
RegFU(XMLNode* _xml_data,
|
||||
InputParameter* interface_ip_, const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats,
|
||||
bool exist_ = true);
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~RegFU();
|
||||
};
|
||||
|
||||
class EXECU :public Component {
|
||||
class EXECU : public McPATComponent {
|
||||
public:
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double lsq_height;
|
||||
CoreDynParam coredynp;
|
||||
RegFU* rfu;
|
||||
SchedulerU* scheu;
|
||||
FunctionalUnit* fp_u;
|
||||
FunctionalUnit* exeu;
|
||||
FunctionalUnit* mul;
|
||||
interconnect * int_bypass;
|
||||
interconnect * intTagBypass;
|
||||
interconnect * int_mul_bypass;
|
||||
interconnect * intTag_mul_Bypass;
|
||||
interconnect * fp_bypass;
|
||||
interconnect * fpTagBypass;
|
||||
Interconnect* int_bypass;
|
||||
Interconnect* intTagBypass;
|
||||
Interconnect* int_mul_bypass;
|
||||
Interconnect* intTag_mul_Bypass;
|
||||
Interconnect* fp_bypass;
|
||||
Interconnect* fpTagBypass;
|
||||
|
||||
Component bypass;
|
||||
InputParameter interface_ip;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
double lsq_height;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
bool exist;
|
||||
|
||||
EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
EXECU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
double lsq_height_, const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats, bool exist_ = true);
|
||||
void computeEnergy();
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
~EXECU();
|
||||
};
|
||||
|
||||
|
||||
class Core :public Component {
|
||||
class Core : public McPATComponent {
|
||||
public:
|
||||
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
InstFetchU* ifu;
|
||||
LoadStoreU* lsu;
|
||||
MemManU* mmu;
|
||||
|
@ -248,14 +317,21 @@ class Core :public Component {
|
|||
RENAMINGU* rnu;
|
||||
Pipeline* corepipe;
|
||||
UndiffCore* undiffCore;
|
||||
SharedCache * l2cache;
|
||||
CoreDynParam coredynp;
|
||||
//full_decoder inst_decoder;
|
||||
//clock_network clockNetwork;
|
||||
Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_);
|
||||
CacheUnit* l2cache;
|
||||
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
|
||||
// TODO: Migrate component ID handling into the XML data to remove this
|
||||
// ithCore variable
|
||||
Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_);
|
||||
void initialize_params();
|
||||
void initialize_stats();
|
||||
void set_core_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
void computeEnergy();
|
||||
~Core();
|
||||
};
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -33,51 +34,36 @@
|
|||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
#include "globalvar.h"
|
||||
#include "basic_components.h"
|
||||
#include "interconnect.h"
|
||||
#include "wire.h"
|
||||
|
||||
interconnect::interconnect(
|
||||
string name_,
|
||||
enum Device_ty device_ty_,
|
||||
double base_w, double base_h,
|
||||
int data_w, double len,const InputParameter *configure_interface,
|
||||
int start_wiring_level_,
|
||||
bool pipelinable_ ,
|
||||
double route_over_perc_ ,
|
||||
bool opt_local_,
|
||||
enum Core_type core_ty_,
|
||||
double Interconnect::width_scaling_threshold = 3.0;
|
||||
|
||||
Interconnect::Interconnect(XMLNode* _xml_data, string name_,
|
||||
enum Device_ty device_ty_, double base_w,
|
||||
double base_h, int data_w,
|
||||
double len,
|
||||
const InputParameter *configure_interface,
|
||||
int start_wiring_level_, double _clockRate,
|
||||
bool pipelinable_, double route_over_perc_,
|
||||
bool opt_local_, enum Core_type core_ty_,
|
||||
enum Wire_type wire_model,
|
||||
double width_s, double space_s,
|
||||
TechnologyParameter::DeviceType *dt
|
||||
)
|
||||
:name(name_),
|
||||
device_ty(device_ty_),
|
||||
in_rise_time(0),
|
||||
out_rise_time(0),
|
||||
base_width(base_w),
|
||||
base_height(base_h),
|
||||
data_width(data_w),
|
||||
wt(wire_model),
|
||||
width_scaling(width_s),
|
||||
space_scaling(space_s),
|
||||
start_wiring_level(start_wiring_level_),
|
||||
length(len),
|
||||
//interconnect_latency(1e-12),
|
||||
//interconnect_throughput(1e-12),
|
||||
opt_local(opt_local_),
|
||||
core_ty(core_ty_),
|
||||
pipelinable(pipelinable_),
|
||||
route_over_perc(route_over_perc_),
|
||||
deviceType(dt)
|
||||
{
|
||||
|
||||
wt = Global;
|
||||
TechnologyParameter::DeviceType *dt)
|
||||
: McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0),
|
||||
out_rise_time(0), base_width(base_w), base_height(base_h),
|
||||
data_width(data_w), wt(wire_model), width_scaling(width_s),
|
||||
space_scaling(space_s), start_wiring_level(start_wiring_level_),
|
||||
length(len), opt_local(opt_local_), core_ty(core_ty_),
|
||||
pipelinable(pipelinable_), route_over_perc(route_over_perc_),
|
||||
deviceType(dt) {
|
||||
name = name_;
|
||||
clockRate = _clockRate;
|
||||
l_ip = *configure_interface;
|
||||
local_result = init_interface(&l_ip);
|
||||
local_result = init_interface(&l_ip, name);
|
||||
|
||||
|
||||
max_unpipelined_link_delay = 0; //TODO
|
||||
max_unpipelined_link_delay = 0;
|
||||
min_w_nmos = g_tp.min_w_nmos_;
|
||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
||||
|
||||
|
@ -88,52 +74,34 @@ interconnect::interconnect(
|
|||
latency_overflow = false;
|
||||
throughput_overflow = false;
|
||||
|
||||
/*
|
||||
* TODO: Add wiring option from semi-global to global automatically
|
||||
* And directly jump to global if semi-global cannot satisfy timing
|
||||
* Fat wires only available for global wires, thus
|
||||
* if signal wiring layer starts from semi-global,
|
||||
* the next layer up will be global, i.e., semi-global does
|
||||
* not have fat wires.
|
||||
*/
|
||||
if (pipelinable == false)
|
||||
if (pipelinable == false) {
|
||||
//Non-pipelinable wires, such as bypass logic, care latency
|
||||
{
|
||||
compute();
|
||||
if (opt_for_clk && opt_local)
|
||||
{
|
||||
while (delay > latency && width_scaling<3.0)
|
||||
{
|
||||
calcWireData();
|
||||
if (opt_for_clk && opt_local) {
|
||||
while (delay > latency &&
|
||||
width_scaling < width_scaling_threshold) {
|
||||
width_scaling *= 2;
|
||||
space_scaling *= 2;
|
||||
Wire winit(width_scaling, space_scaling);
|
||||
compute();
|
||||
calcWireData();
|
||||
}
|
||||
if (delay > latency)
|
||||
{
|
||||
if (delay > latency) {
|
||||
latency_overflow = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else //Pipelinable wires, such as bus, does not care latency but throughput
|
||||
{
|
||||
/*
|
||||
* TODO: Add pipe regs power, area, and timing;
|
||||
* Pipelinable wires optimize latency first.
|
||||
*/
|
||||
compute();
|
||||
if (opt_for_clk && opt_local)
|
||||
{
|
||||
while (delay > throughput && width_scaling<3.0)
|
||||
{
|
||||
} else {
|
||||
//Pipelinable wires, such as bus, does not care latency but throughput
|
||||
calcWireData();
|
||||
if (opt_for_clk && opt_local) {
|
||||
while (delay > throughput &&
|
||||
width_scaling < width_scaling_threshold) {
|
||||
width_scaling *= 2;
|
||||
space_scaling *= 2;
|
||||
Wire winit(width_scaling, space_scaling);
|
||||
compute();
|
||||
calcWireData();
|
||||
}
|
||||
if (delay > throughput)
|
||||
if (delay > throughput) {
|
||||
// insert pipeline stages
|
||||
{
|
||||
num_pipe_stages = (int)ceil(delay / throughput);
|
||||
assert(num_pipe_stages > 0);
|
||||
delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay;
|
||||
|
@ -148,15 +116,17 @@ interconnect::interconnect(
|
|||
area.set_area(area.get_area()*data_width);
|
||||
no_device_under_wire_area.h *= data_width;
|
||||
|
||||
if (latency_overflow==true)
|
||||
cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl;
|
||||
|
||||
if (latency_overflow == true) {
|
||||
cout << "Warning: " << name
|
||||
<< " wire structure cannot satisfy latency constraint." << endl;
|
||||
}
|
||||
|
||||
assert(power.readOp.dynamic > 0);
|
||||
assert(power.readOp.leakage > 0);
|
||||
assert(power.readOp.gate_leakage > 0);
|
||||
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(device_ty, core_ty);
|
||||
|
||||
double sckRation = g_tp.sckt_co_eff;
|
||||
power.readOp.dynamic *= sckRation;
|
||||
|
@ -166,8 +136,11 @@ interconnect::interconnect(
|
|||
power.readOp.longer_channel_leakage =
|
||||
power.readOp.leakage * long_channel_device_reduction;
|
||||
|
||||
if (pipelinable)//Only global wires has the option to choose whether routing over or not
|
||||
area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc));
|
||||
//Only global wires has the option to choose whether routing over or not
|
||||
if (pipelinable)
|
||||
area.set_area(area.get_area() * route_over_perc +
|
||||
no_device_under_wire_area.get_area() *
|
||||
(1 - route_over_perc));
|
||||
|
||||
Wire wreset();
|
||||
}
|
||||
|
@ -175,8 +148,7 @@ interconnect::interconnect(
|
|||
|
||||
|
||||
void
|
||||
interconnect::compute()
|
||||
{
|
||||
Interconnect::calcWireData() {
|
||||
|
||||
Wire *wtemp1 = 0;
|
||||
wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
|
||||
|
@ -194,12 +166,46 @@ interconnect::compute()
|
|||
|
||||
}
|
||||
|
||||
void interconnect::leakage_feedback(double temperature)
|
||||
{
|
||||
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
|
||||
uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
|
||||
void
|
||||
Interconnect::computeEnergy() {
|
||||
double pppm_t[4] = {1, 1, 1, 1};
|
||||
|
||||
compute();
|
||||
// Compute TDP
|
||||
power_t.reset();
|
||||
set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle,
|
||||
int_params.active_ports, int_params.active_ports,
|
||||
int_params.active_ports * int_stats.duty_cycle);
|
||||
power_t = power * pppm_t;
|
||||
|
||||
rt_power.reset();
|
||||
set_pppm(pppm_t, int_stats.accesses, int_params.active_ports,
|
||||
int_params.active_ports, int_stats.accesses);
|
||||
rt_power = power * pppm_t;
|
||||
|
||||
output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate;
|
||||
output_data.subthreshold_leakage_power = power_t.readOp.leakage;
|
||||
output_data.gate_leakage_power = power_t.readOp.gate_leakage;
|
||||
output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
|
||||
}
|
||||
|
||||
void
|
||||
Interconnect::computeArea() {
|
||||
output_data.area = area.get_area() / 1e6;
|
||||
}
|
||||
|
||||
void
|
||||
Interconnect::set_params_stats(double active_ports,
|
||||
double duty_cycle, double accesses) {
|
||||
int_params.active_ports = active_ports;
|
||||
int_stats.duty_cycle = duty_cycle;
|
||||
int_stats.accesses = accesses;
|
||||
}
|
||||
|
||||
void Interconnect::leakage_feedback(double temperature) {
|
||||
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
|
||||
uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
|
||||
|
||||
calcWireData();
|
||||
|
||||
power_bit = power;
|
||||
power.readOp.dynamic *= data_width;
|
||||
|
@ -210,13 +216,15 @@ void interconnect::leakage_feedback(double temperature)
|
|||
assert(power.readOp.leakage > 0);
|
||||
assert(power.readOp.gate_leakage > 0);
|
||||
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(device_ty,core_ty);
|
||||
|
||||
double sckRation = g_tp.sckt_co_eff;
|
||||
power.readOp.dynamic *= sckRation;
|
||||
power.writeOp.dynamic *= sckRation;
|
||||
power.searchOp.dynamic *= sckRation;
|
||||
|
||||
power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
|
||||
power.readOp.longer_channel_leakage =
|
||||
power.readOp.leakage*long_channel_device_reduction;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -42,46 +43,31 @@
|
|||
#include "subarray.h"
|
||||
#include "wire.h"
|
||||
|
||||
// leakge power includes entire htree in a bank (when uca_tree == false)
|
||||
// leakge power includes only part to one bank when uca_tree == true
|
||||
|
||||
class interconnect : public Component
|
||||
{
|
||||
class InterconnectParameters {
|
||||
public:
|
||||
interconnect(
|
||||
string name_,
|
||||
enum Device_ty device_ty_,
|
||||
double base_w, double base_h, int data_w, double len,
|
||||
const InputParameter *configure_interface, int start_wiring_level_,
|
||||
bool pipelinable_ = false,
|
||||
double route_over_perc_ =0.5,
|
||||
bool opt_local_=true,
|
||||
enum Core_type core_ty_=Inorder,
|
||||
enum Wire_type wire_model=Global,
|
||||
double width_s=1.0, double space_s=1.0,
|
||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
||||
);
|
||||
double active_ports;
|
||||
};
|
||||
|
||||
~interconnect() {};
|
||||
class InterconnectStatistics {
|
||||
public:
|
||||
double duty_cycle;
|
||||
double accesses;
|
||||
};
|
||||
|
||||
class Interconnect : public McPATComponent {
|
||||
public:
|
||||
static double width_scaling_threshold;
|
||||
|
||||
void compute();
|
||||
string name;
|
||||
enum Device_ty device_ty;
|
||||
double in_rise_time, out_rise_time;
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
Area no_device_under_wire_area;
|
||||
void set_in_rise_time(double rt)
|
||||
{
|
||||
in_rise_time = rt;
|
||||
}
|
||||
|
||||
void leakage_feedback(double temperature);
|
||||
double max_unpipelined_link_delay;
|
||||
powerDef power_bit;
|
||||
|
||||
double wire_bw;
|
||||
double init_wire_bw; // bus width at root
|
||||
double init_wire_bw;
|
||||
double base_width;
|
||||
double base_height;
|
||||
int data_width;
|
||||
|
@ -101,10 +87,30 @@ class interconnect : public Component
|
|||
bool pipelinable;
|
||||
double route_over_perc;
|
||||
int num_pipe_stages;
|
||||
|
||||
private:
|
||||
TechnologyParameter::DeviceType* deviceType;
|
||||
InterconnectParameters int_params;
|
||||
InterconnectStatistics int_stats;
|
||||
|
||||
Interconnect(XMLNode* _xml_data, string name_,
|
||||
enum Device_ty device_ty_, double base_w,
|
||||
double base_h, int data_w, double len,
|
||||
const InputParameter *configure_interface,
|
||||
int start_wiring_level_,
|
||||
double _clockRate = 0.0f,
|
||||
bool pipelinable_ = false, double route_over_perc_ = 0.5,
|
||||
bool opt_local_ = true, enum Core_type core_ty_ = Inorder,
|
||||
enum Wire_type wire_model = Global, double width_s = 1.0,
|
||||
double space_s = 1.0,
|
||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
||||
private:
|
||||
void calcWireData();
|
||||
public:
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
void set_params_stats(double active_ports,
|
||||
double duty_cycle, double accesses);
|
||||
void leakage_feedback(double temperature);
|
||||
~Interconnect() {};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
#include <algorithm>
|
||||
|
@ -34,14 +35,12 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "basic_components.h"
|
||||
#include "common.h"
|
||||
#include "const.h"
|
||||
#include "io.h"
|
||||
#include "iocontrollers.h"
|
||||
#include "logic.h"
|
||||
#include "parameter.h"
|
||||
|
||||
/*
|
||||
SUN Niagara 2 I/O power analysis:
|
||||
|
@ -69,42 +68,80 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F
|
|||
*
|
||||
*/
|
||||
|
||||
NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
||||
:XML(XML_interface),
|
||||
interface_ip(*interface_ip_)
|
||||
{
|
||||
local_result = init_interface(&interface_ip);
|
||||
|
||||
double frontend_area, phy_area, mac_area, SerDer_area;
|
||||
double frontend_dyn, mac_dyn, SerDer_dyn;
|
||||
double frontend_gates, mac_gates, SerDer_gates;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
double NMOS_sizing, PMOS_sizing;
|
||||
|
||||
NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
|
||||
: McPATComponent(_xml_data, interface_ip_) {
|
||||
name = "NIU";
|
||||
set_niu_param();
|
||||
}
|
||||
|
||||
if (niup.type == 0) //high performance NIU
|
||||
{
|
||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
|
||||
mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
|
||||
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
|
||||
frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
|
||||
void NIUController::computeArea() {
|
||||
double mac_area;
|
||||
double frontend_area;
|
||||
double SerDer_area;
|
||||
|
||||
if (niup.type == 0) { //high performance NIU
|
||||
//Area estimation based on average of die photo from Niagara 2 and
|
||||
//Cadence ChipEstimate using 65nm.
|
||||
mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2, ISSCC
|
||||
//"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
|
||||
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
|
||||
//With Robust VCO Tuning Technique" Frontend is PCS
|
||||
frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
|
||||
(interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2 and
|
||||
//Cadence ChipEstimate hard IP @65nm.
|
||||
//SerDer is very hard to scale
|
||||
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
|
||||
phy_area = frontend_area + SerDer_area;
|
||||
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
|
||||
0.065);//* (interface_ip.F_sz_um/0.065);
|
||||
} else {
|
||||
//Low power implementations are mostly from Cadence ChipEstimator;
|
||||
//Ignore the multiple IP effect
|
||||
// ---When there are multiple IP (same kind or not) selected, Cadence
|
||||
//ChipEstimator results are not a simple summation of all IPs.
|
||||
//Ignore this effect
|
||||
mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
|
||||
SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um/0.065);
|
||||
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
|
||||
//Transceiver and XAUI Interface With Robust VCO Tuning Technique"
|
||||
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
|
||||
//scale perfectly with the technology
|
||||
}
|
||||
|
||||
//total area
|
||||
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
|
||||
output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
|
||||
}
|
||||
|
||||
void NIUController::computeEnergy() {
|
||||
double mac_dyn;
|
||||
double frontend_dyn;
|
||||
double SerDer_dyn;
|
||||
double frontend_gates;
|
||||
double mac_gates;
|
||||
double SerDer_gates;
|
||||
double NMOS_sizing;
|
||||
double PMOS_sizing;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
|
||||
if (niup.type == 0) { //high performance NIU
|
||||
//Power
|
||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||
mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
|
||||
//E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||
mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
|
||||
1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
|
||||
//Cadence ChipEstimate using 65nm soft IP;
|
||||
frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
|
||||
frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
|
||||
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||
//according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
|
||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||
SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
||||
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
|
||||
SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
|
||||
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||
|
||||
//Cadence ChipEstimate using 65nm
|
||||
mac_gates = 111700;
|
||||
|
@ -112,157 +149,175 @@ NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_i
|
|||
SerDer_gates = 200000;
|
||||
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
|
||||
// ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
|
||||
// a simple summation of all IPs. Ignore this effect
|
||||
mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
|
||||
SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
|
||||
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
|
||||
//total area
|
||||
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
|
||||
} else {
|
||||
//Power
|
||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||
mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
|
||||
///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||
mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
|
||||
/ 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
|
||||
//Cadence ChipEstimate using 65nm soft IP;
|
||||
frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
|
||||
frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
|
||||
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||
//SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
|
||||
SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
||||
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
|
||||
SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
|
||||
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||
|
||||
mac_gates = 111700;
|
||||
frontend_gates = 52000;
|
||||
SerDer_gates = 199260;
|
||||
|
||||
NMOS_sizing = g_tp.min_w_nmos_;
|
||||
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
|
||||
}
|
||||
|
||||
power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
|
||||
power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
||||
power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
//covert to energy per clock cycle of whole NIU
|
||||
SerDer_dyn /= niup.clockRate;
|
||||
|
||||
power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
|
||||
power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
|
||||
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(Uncore_device);
|
||||
power.readOp.longer_channel_leakage =
|
||||
power.readOp.leakage * long_channel_device_reduction;
|
||||
power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
|
||||
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
|
||||
// Output power
|
||||
output_data.subthreshold_leakage_power =
|
||||
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||
power.readOp.leakage;
|
||||
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||
output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
|
||||
output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
|
||||
}
|
||||
|
||||
void NIUController::computeEnergy(bool is_tdp)
|
||||
{
|
||||
if (is_tdp)
|
||||
{
|
||||
void NIUController::set_niu_param() {
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
power = power_t;
|
||||
power.readOp.dynamic *= niup.duty_cycle;
|
||||
ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
|
||||
ASSIGN_INT_IF("num_units", niup.num_units);
|
||||
ASSIGN_INT_IF("type", niup.type);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
rt_power = power_t;
|
||||
rt_power.readOp.dynamic *= niup.perc_load;
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent+2, ' ');
|
||||
bool long_channel = XML->sys.longer_channel_device;
|
||||
|
||||
if (is_tdp)
|
||||
{
|
||||
cout << "NIU:" << endl;
|
||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl;
|
||||
cout << indent_str<< "Subthreshold Leakage = "
|
||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
|
||||
cout<<endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void NIUController::set_niu_param()
|
||||
{
|
||||
niup.clockRate = XML->sys.niu.clockrate;
|
||||
// Change from MHz to Hz
|
||||
niup.clockRate *= 1e6;
|
||||
niup.num_units = XML->sys.niu.number_units;
|
||||
niup.duty_cycle = XML->sys.niu.duty_cycle;
|
||||
niup.perc_load = XML->sys.niu.total_load_perc;
|
||||
niup.type = XML->sys.niu.type;
|
||||
// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
|
||||
ASSIGN_FP_IF("perc_load", nius.perc_load);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
||||
:XML(XML_interface),
|
||||
interface_ip(*interface_ip_)
|
||||
{
|
||||
local_result = init_interface(&interface_ip);
|
||||
double frontend_area, phy_area, ctrl_area, SerDer_area;
|
||||
double ctrl_dyn, frontend_dyn, SerDer_dyn;
|
||||
double ctrl_gates,frontend_gates, SerDer_gates;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
double NMOS_sizing, PMOS_sizing;
|
||||
PCIeController::PCIeController(XMLNode* _xml_data,
|
||||
InputParameter* interface_ip_)
|
||||
: McPATComponent(_xml_data, interface_ip_) {
|
||||
name = "PCIe";
|
||||
set_pcie_param();
|
||||
}
|
||||
|
||||
void PCIeController::computeArea() {
|
||||
double ctrl_area;
|
||||
double SerDer_area;
|
||||
|
||||
/* Assuming PCIe is bit-slice based architecture
|
||||
* This is the reason for /8 in both area and power calculation
|
||||
* to get per lane numbers
|
||||
*/
|
||||
|
||||
set_pcie_param();
|
||||
if (pciep.type == 0) //high performance NIU
|
||||
{
|
||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
|
||||
ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
|
||||
frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
|
||||
if (pciep.type == 0) { //high performance PCIe
|
||||
//Area estimation based on average of die photo from Niagara 2 and
|
||||
//Cadence ChipEstimate @ 65nm.
|
||||
ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2 and
|
||||
//Cadence ChipEstimate hard IP @65nm.
|
||||
//SerDer is very hard to scale
|
||||
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
|
||||
phy_area = frontend_area + SerDer_area;
|
||||
//total area
|
||||
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
|
||||
0.065);//* (interface_ip.F_sz_um/0.065);
|
||||
} else {
|
||||
ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2, and
|
||||
//Cadence ChipEstimate @ 65nm.
|
||||
SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
}
|
||||
|
||||
// Total area
|
||||
output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
|
||||
pciep.num_channels) * 1e6;
|
||||
}
|
||||
|
||||
void PCIeController::computeEnergy() {
|
||||
double ctrl_dyn;
|
||||
double SerDer_dyn;
|
||||
double ctrl_gates;
|
||||
double SerDer_gates = 0;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
double NMOS_sizing;
|
||||
double PMOS_sizing;
|
||||
|
||||
/* Assuming PCIe is bit-slice based architecture
|
||||
* This is the reason for /8 in both area and power calculation
|
||||
* to get per lane numbers
|
||||
*/
|
||||
|
||||
if (pciep.type == 0) { //high performance PCIe
|
||||
//Power
|
||||
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
||||
ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||
ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
|
||||
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||
// //Cadence ChipEstimate using 65nm soft IP;
|
||||
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
|
||||
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
|
||||
//PCIe 2.0 max per lane speed is 4Gb/s
|
||||
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
|
||||
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
|
||||
|
||||
//power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
|
||||
//Cadence ChipEstimate using 65nm
|
||||
ctrl_gates = 900000 / 8 * pciep.num_channels;
|
||||
// frontend_gates = 120000/8;
|
||||
// SerDer_gates = 200000/8;
|
||||
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
}
|
||||
else
|
||||
{
|
||||
ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
|
||||
SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//total area
|
||||
} else {
|
||||
//Power
|
||||
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
||||
ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||
ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
|
||||
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||
// //Cadence ChipEstimate using 65nm soft IP;
|
||||
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
|
||||
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
|
||||
//PCIe 2.0 max per lane speed is 4Gb/s
|
||||
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
|
||||
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
|
||||
|
||||
//Cadence ChipEstimate using 65nm
|
||||
ctrl_gates = 200000 / 8 * pciep.num_channels;
|
||||
|
@ -272,175 +327,214 @@ PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface
|
|||
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
|
||||
}
|
||||
area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
|
||||
power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
|
||||
power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
||||
power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
|
||||
//covert to energy per clock cycle
|
||||
SerDer_dyn /= pciep.clockRate;
|
||||
|
||||
power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
|
||||
pciep.num_channels;
|
||||
power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
|
||||
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(Uncore_device);
|
||||
power.readOp.longer_channel_leakage =
|
||||
power.readOp.leakage * long_channel_device_reduction;
|
||||
power.readOp.gate_leakage = (ctrl_gates +
|
||||
(pciep.withPHY ? SerDer_gates : 0)) *
|
||||
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
|
||||
// Output power
|
||||
output_data.subthreshold_leakage_power =
|
||||
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||
power.readOp.leakage;
|
||||
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||
output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
|
||||
output_data.runtime_dynamic_energy =
|
||||
power.readOp.dynamic * pcies.perc_load;
|
||||
}
|
||||
|
||||
void PCIeController::computeEnergy(bool is_tdp)
|
||||
{
|
||||
if (is_tdp)
|
||||
{
|
||||
void PCIeController::set_pcie_param() {
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
power = power_t;
|
||||
power.readOp.dynamic *= pciep.duty_cycle;
|
||||
ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
|
||||
ASSIGN_INT_IF("num_units", pciep.num_units);
|
||||
ASSIGN_INT_IF("num_channels", pciep.num_channels);
|
||||
ASSIGN_INT_IF("type", pciep.type);
|
||||
ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
rt_power = power_t;
|
||||
rt_power.readOp.dynamic *= pciep.perc_load;
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent+2, ' ');
|
||||
bool long_channel = XML->sys.longer_channel_device;
|
||||
|
||||
if (is_tdp)
|
||||
{
|
||||
cout << "PCIe:" << endl;
|
||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl;
|
||||
cout << indent_str<< "Subthreshold Leakage = "
|
||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
|
||||
cout<<endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PCIeController::set_pcie_param()
|
||||
{
|
||||
pciep.clockRate = XML->sys.pcie.clockrate;
|
||||
// Change from MHz to Hz
|
||||
pciep.clockRate *= 1e6;
|
||||
pciep.num_units = XML->sys.pcie.number_units;
|
||||
pciep.num_channels = XML->sys.pcie.num_channels;
|
||||
pciep.duty_cycle = XML->sys.pcie.duty_cycle;
|
||||
pciep.perc_load = XML->sys.pcie.total_load_perc;
|
||||
pciep.type = XML->sys.pcie.type;
|
||||
pciep.withPHY = XML->sys.pcie.withPHY;
|
||||
// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
|
||||
ASSIGN_FP_IF("perc_load", pcies.perc_load);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
||||
:XML(XML_interface),
|
||||
interface_ip(*interface_ip_)
|
||||
{
|
||||
local_result = init_interface(&interface_ip);
|
||||
double frontend_area, phy_area, ctrl_area, SerDer_area;
|
||||
double ctrl_dyn, frontend_dyn, SerDer_dyn;
|
||||
double ctrl_gates,frontend_gates, SerDer_gates;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
double NMOS_sizing, PMOS_sizing;
|
||||
FlashController::FlashController(XMLNode* _xml_data,
|
||||
InputParameter* interface_ip_)
|
||||
: McPATComponent(_xml_data, interface_ip_) {
|
||||
name = "Flash Controller";
|
||||
set_fc_param();
|
||||
}
|
||||
|
||||
/* Assuming PCIe is bit-slice based architecture
|
||||
void FlashController::computeArea() {
|
||||
double ctrl_area;
|
||||
double SerDer_area;
|
||||
|
||||
/* Assuming Flash is bit-slice based architecture
|
||||
* This is the reason for /8 in both area and power calculation
|
||||
* to get per lane numbers
|
||||
*/
|
||||
|
||||
set_fc_param();
|
||||
if (fcp.type == 0) //high performance NIU
|
||||
{
|
||||
cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
|
||||
if (fcp.type == 0) { //high performance flash controller
|
||||
cout << "Current McPAT does not support high performance flash "
|
||||
<< "controller since even low power designs are enough for "
|
||||
<< "maintain throughput" <<endl;
|
||||
exit(0);
|
||||
} else {
|
||||
ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
|
||||
//from CAST
|
||||
SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
|
||||
(interface_ip.F_sz_um / 0.065);
|
||||
}
|
||||
|
||||
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
|
||||
output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
|
||||
1e6 * number_channel;
|
||||
}
|
||||
|
||||
void FlashController::computeEnergy() {
|
||||
double ctrl_dyn;
|
||||
double SerDer_dyn;
|
||||
double ctrl_gates;
|
||||
double SerDer_gates;
|
||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||
double NMOS_sizing;
|
||||
double PMOS_sizing;
|
||||
|
||||
/* Assuming Flash is bit-slice based architecture
|
||||
* This is the reason for /8 in both area and power calculation
|
||||
* to get per lane numbers
|
||||
*/
|
||||
|
||||
if (fcp.type == 0) { //high performance flash controller
|
||||
cout << "Current McPAT does not support high performance flash "
|
||||
<< "controller since even low power designs are enough for "
|
||||
<< "maintain throughput" <<endl;
|
||||
exit(0);
|
||||
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
}
|
||||
else
|
||||
{
|
||||
ctrl_area = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
|
||||
SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
||||
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
|
||||
//speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
|
||||
} else {
|
||||
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
|
||||
//support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
|
||||
//This is already saturate the 200MB/s of the flash controller core
|
||||
//above.
|
||||
ctrl_gates = 129267;
|
||||
SerDer_gates = 200000 / 8;
|
||||
NMOS_sizing = g_tp.min_w_nmos_;
|
||||
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||
|
||||
//Power
|
||||
//Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
|
||||
ctrl_dyn = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||
//Cadence ChipEstimate using 65nm the controller 125mW for every
|
||||
//200MB/s This is power not energy!
|
||||
ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
|
||||
1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||
SerDer_dyn = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
||||
SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
|
||||
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||
//max Per controller speed is 1.6Gb/s (200MB/s)
|
||||
}
|
||||
|
||||
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
|
||||
area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
|
||||
power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
|
||||
power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
||||
power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
||||
}
|
||||
|
||||
void FlashController::computeEnergy(bool is_tdp)
|
||||
{
|
||||
if (is_tdp)
|
||||
{
|
||||
|
||||
|
||||
power = power_t;
|
||||
power.readOp.dynamic *= fcp.duty_cycle;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
rt_power = power_t;
|
||||
rt_power.readOp.dynamic *= fcp.perc_load;
|
||||
}
|
||||
}
|
||||
|
||||
void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent+2, ' ');
|
||||
bool long_channel = XML->sys.longer_channel_device;
|
||||
|
||||
if (is_tdp)
|
||||
{
|
||||
cout << "Flash Controller:" << endl;
|
||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
|
||||
cout << indent_str<< "Subthreshold Leakage = "
|
||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
|
||||
cout<<endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
}
|
||||
power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
|
||||
number_channel;
|
||||
power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
|
||||
number_channel) *
|
||||
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
double long_channel_device_reduction =
|
||||
longer_channel_device_reduction(Uncore_device);
|
||||
power.readOp.longer_channel_leakage =
|
||||
power.readOp.leakage * long_channel_device_reduction;
|
||||
power.readOp.gate_leakage =
|
||||
((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
|
||||
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||
g_tp.peri_global.Vdd;//unit W
|
||||
|
||||
// Output power
|
||||
output_data.subthreshold_leakage_power =
|
||||
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||
power.readOp.leakage;
|
||||
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||
output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
|
||||
output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
|
||||
}
|
||||
|
||||
void FlashController::set_fc_param()
|
||||
{
|
||||
// fcp.clockRate = XML->sys.flashc.mc_clock;
|
||||
// fcp.clockRate *= 1e6;
|
||||
fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
|
||||
fcp.num_channels = ceil(fcp.peakDataTransferRate/200);
|
||||
fcp.num_mcs = XML->sys.flashc.number_mcs;
|
||||
fcp.duty_cycle = XML->sys.flashc.duty_cycle;
|
||||
fcp.perc_load = XML->sys.flashc.total_load_perc;
|
||||
fcp.type = XML->sys.flashc.type;
|
||||
fcp.withPHY = XML->sys.flashc.withPHY;
|
||||
// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_INT_IF("num_channels", fcp.num_channels);
|
||||
ASSIGN_INT_IF("type", fcp.type);
|
||||
ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
|
||||
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
|
||||
ASSIGN_FP_IF("perc_load", fcs.perc_load);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,63 +26,52 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
#ifndef IOCONTROLLERS_H_
|
||||
#define IOCONTROLLERS_H_
|
||||
|
||||
|
||||
#endif /* IOCONTROLLERS_H_ */
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "parameter.h"
|
||||
//#include "io.h"
|
||||
#include "array.h"
|
||||
//#include "Undifferentiated_Core_Area.h"
|
||||
#include <vector>
|
||||
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class NIUController : public Component {
|
||||
class NIUController : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
InputParameter interface_ip;
|
||||
NIUParam niup;
|
||||
powerDef power_t;
|
||||
uca_org_t local_result;
|
||||
NIUController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
||||
NIUParameters niup;
|
||||
NIUStatistics nius;
|
||||
|
||||
NIUController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||
void set_niu_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
~NIUController(){};
|
||||
};
|
||||
|
||||
class PCIeController : public Component {
|
||||
class PCIeController : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
InputParameter interface_ip;
|
||||
PCIeParam pciep;
|
||||
powerDef power_t;
|
||||
uca_org_t local_result;
|
||||
PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
||||
PCIeParameters pciep;
|
||||
PCIeStatistics pcies;
|
||||
|
||||
PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||
void set_pcie_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
~PCIeController(){};
|
||||
};
|
||||
|
||||
class FlashController : public Component {
|
||||
class FlashController : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
InputParameter interface_ip;
|
||||
MCParam fcp;
|
||||
powerDef power_t;
|
||||
uca_org_t local_result;
|
||||
FlashController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
||||
MCParameters fcp;
|
||||
MCStatistics fcs;
|
||||
|
||||
FlashController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||
void set_fc_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
~FlashController(){};
|
||||
};
|
||||
|
||||
#endif /* IOCONTROLLERS_H_ */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,18 +26,16 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
#ifndef LOGIC_H_
|
||||
#define LOGIC_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "arch_const.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "basic_components.h"
|
||||
|
@ -49,42 +48,48 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
class selection_logic : public Component{
|
||||
class selection_logic : public McPATComponent {
|
||||
public:
|
||||
selection_logic(bool _is_default, int win_entries_,
|
||||
int issue_width_, const InputParameter *configure_interface,
|
||||
enum Device_ty device_ty_=Core_device,
|
||||
enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface);
|
||||
bool is_default;
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
const ParseXML *XML_interface;
|
||||
int win_entries;
|
||||
int issue_width;
|
||||
double accesses;
|
||||
int num_threads;
|
||||
enum Device_ty device_ty;
|
||||
enum Core_type core_ty;
|
||||
|
||||
void selection_power();
|
||||
selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries,
|
||||
int issue_width_, const InputParameter* configure_interface,
|
||||
string _name, double _accesses,
|
||||
double clockRate_ = 0.0f,
|
||||
enum Device_ty device_ty_ = Core_device,
|
||||
enum Core_type core_ty_ = Inorder);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
void leakage_feedback(double temperature); // TODO
|
||||
// TODO: Add a deconstructor
|
||||
};
|
||||
|
||||
class dep_resource_conflict_check : public Component{
|
||||
class dep_resource_conflict_check : public McPATComponent {
|
||||
public:
|
||||
dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true);
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
|
||||
CoreDynParam coredynp;
|
||||
CoreParameters coredynp;
|
||||
int compare_bits;
|
||||
bool is_default;
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
|
||||
dep_resource_conflict_check(XMLNode* _xml_data, const string _name,
|
||||
const InputParameter *configure_interface,
|
||||
const CoreParameters & dyn_p_, int compare_bits_,
|
||||
double clockRate_ = 0.0f,
|
||||
bool _is_default = true);
|
||||
void conflict_check_power();
|
||||
double compare_cap();
|
||||
void computeEnergy() {};
|
||||
~dep_resource_conflict_check() {
|
||||
local_result.cleanup();
|
||||
}
|
||||
|
@ -92,15 +97,11 @@ public:
|
|||
void leakage_feedback(double temperature);
|
||||
};
|
||||
|
||||
class inst_decoder: public Component{
|
||||
class InstructionDecoder: public McPATComponent {
|
||||
public:
|
||||
inst_decoder(bool _is_default, const InputParameter *configure_interface,
|
||||
int opcode_length_,
|
||||
int num_decoders_,
|
||||
bool x86_,
|
||||
enum Device_ty device_ty_=Core_device,
|
||||
enum Core_type core_ty_=Inorder);
|
||||
inst_decoder();
|
||||
Decoder* final_dec;
|
||||
Predec* pre_dec;
|
||||
|
||||
bool is_default;
|
||||
int opcode_length;
|
||||
int num_decoders;
|
||||
|
@ -111,23 +112,24 @@ public:
|
|||
uca_org_t local_result;
|
||||
enum Device_ty device_ty;
|
||||
enum Core_type core_ty;
|
||||
|
||||
Decoder * final_dec;
|
||||
Predec * pre_dec;
|
||||
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
|
||||
InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default,
|
||||
const InputParameter *configure_interface,
|
||||
int opcode_length_, int num_decoders_, bool x86_,
|
||||
double clockRate_ = 0.0f,
|
||||
enum Device_ty device_ty_ = Core_device,
|
||||
enum Core_type core_ty_ = Inorder);
|
||||
InstructionDecoder();
|
||||
void computeEnergy() {};
|
||||
void inst_decoder_delay_power();
|
||||
~inst_decoder();
|
||||
~InstructionDecoder();
|
||||
void leakage_feedback(double temperature);
|
||||
};
|
||||
|
||||
// TODO: This should be defined elsewhere? This isn't a true McPATComponent
|
||||
class DFFCell : public Component {
|
||||
public:
|
||||
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load,
|
||||
const InputParameter *configure_interface);
|
||||
InputParameter l_ip;
|
||||
bool is_dram;
|
||||
double cell_load;
|
||||
|
@ -144,90 +146,92 @@ public:
|
|||
powerDef e_keep_0;
|
||||
powerDef e_clock;
|
||||
|
||||
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load,
|
||||
const InputParameter *configure_interface);
|
||||
double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
|
||||
void compute_DFF_cell(void);
|
||||
~DFFCell() {};
|
||||
};
|
||||
|
||||
class Pipeline : public Component{
|
||||
// TODO: This is a very ambiguous component. Try to refactor it.
|
||||
class Pipeline : public McPATComponent {
|
||||
public:
|
||||
Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true);
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
CoreDynParam coredynp;
|
||||
CoreParameters coredynp;
|
||||
enum Device_ty device_ty;
|
||||
bool is_core_pipeline, is_default;
|
||||
double num_piperegs;
|
||||
// int pipeline_stages;
|
||||
// int tot_stage_vector, per_stage_vector;
|
||||
bool process_ind;
|
||||
double WNANDn;
|
||||
double WNANDp;
|
||||
double load_per_pipeline_stage;
|
||||
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
|
||||
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
|
||||
// bool thread_clock_gated;
|
||||
// bool in_order, multithreaded;
|
||||
|
||||
Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||
const CoreParameters & dyn_p_,
|
||||
enum Device_ty device_ty_ = Core_device,
|
||||
bool _is_core_pipeline = true, bool _is_default = true);
|
||||
void compute_stage_vector();
|
||||
/**
|
||||
* TODO: compute() completes work that should be completed in computeArea()
|
||||
* and computeEnergy() recursively. Consider shifting these calculations
|
||||
* around to be consistent with rest of hierarchy
|
||||
*/
|
||||
void compute();
|
||||
void computeArea() {};
|
||||
// TODO: Move energy computation to this function to unify hierarchy
|
||||
void computeEnergy() {};
|
||||
~Pipeline() {
|
||||
local_result.cleanup();
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
//class core_pipeline :public pipeline{
|
||||
//public:
|
||||
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
|
||||
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
|
||||
// bool thread_clock_gated;
|
||||
// bool in_order, multithreaded;
|
||||
// core_pipeline(bool _is_default, const InputParameter *configure_interface);
|
||||
// virtual void compute_stage_vector();
|
||||
//
|
||||
//};
|
||||
|
||||
class FunctionalUnit :public Component{
|
||||
class FunctionalUnit : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
CoreParameters core_params;
|
||||
CoreStatistics core_stats;
|
||||
double FU_height;
|
||||
double clockRate,executionTime;
|
||||
double num_fu;
|
||||
double energy, base_energy,per_access_energy, leakage, gate_leakage;
|
||||
double energy;
|
||||
double base_energy;
|
||||
double per_access_energy;
|
||||
bool is_default;
|
||||
enum FU_type fu_type;
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
|
||||
FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & _core_params,
|
||||
const CoreStatistics & _core_stats, enum FU_type fu_type);
|
||||
void computeEnergy();
|
||||
void leakage_feedback(double temperature);
|
||||
|
||||
~FunctionalUnit() {};
|
||||
};
|
||||
|
||||
class UndiffCore :public Component{
|
||||
// TODO: This is a very ambiguous component. Try to refactor it.
|
||||
class UndiffCore : public McPATComponent {
|
||||
public:
|
||||
UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false);
|
||||
ParseXML *XML;
|
||||
int ithCore;
|
||||
InputParameter interface_ip;
|
||||
CoreDynParam coredynp;
|
||||
double clockRate,executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
CoreParameters coredynp;
|
||||
double scktRatio;
|
||||
double chip_PR_overhead;
|
||||
double macro_PR_overhead;
|
||||
enum Core_type core_ty;
|
||||
bool opt_performance, embedded;
|
||||
double pipeline_stage,num_hthreads,issue_width;
|
||||
bool opt_performance;
|
||||
bool embedded;
|
||||
double pipeline_stage;
|
||||
double num_hthreads;
|
||||
double issue_width;
|
||||
bool is_default;
|
||||
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
~UndiffCore(){};
|
||||
bool exist;
|
||||
|
||||
|
||||
UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const CoreParameters & dyn_p_,
|
||||
bool exist_ = true);
|
||||
void computeArea() {};
|
||||
// TODO: Move energy computation to this function to unify hierarchy
|
||||
void computeEnergy() {};
|
||||
~UndiffCore() {};
|
||||
};
|
||||
#endif /* LOGIC_H_ */
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,15 +26,17 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "globalvar.h"
|
||||
#include "basic_components.h"
|
||||
#include "io.h"
|
||||
#include "processor.h"
|
||||
#include "system.h"
|
||||
#include "version.h"
|
||||
#include "xmlParser.h"
|
||||
|
||||
|
@ -41,61 +44,68 @@ using namespace std;
|
|||
|
||||
void print_usage(char * argv0);
|
||||
|
||||
int main(int argc,char *argv[])
|
||||
{
|
||||
char * fb ;
|
||||
bool infile_specified = false;
|
||||
int main(int argc, char *argv[]) {
|
||||
char* xml_file = NULL;
|
||||
int plevel = 2;
|
||||
opt_for_clk =true;
|
||||
//cout.precision(10);
|
||||
if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help"))
|
||||
{
|
||||
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
if (argv[i] == string("-infile")) {
|
||||
xml_file = argv[++i];
|
||||
|
||||
} else if (argv[i] == string("-print_level")) {
|
||||
plevel = atoi(argv[++i]);
|
||||
|
||||
} else if (argv[i] == string("-opt_for_clk")) {
|
||||
McPATComponent::opt_for_clk = (bool)atoi(argv[++i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that the XML file was specified
|
||||
if (xml_file == NULL) {
|
||||
cerr << "ERROR: Please specify infile\n\n";
|
||||
print_usage(argv[0]);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < argc; i++)
|
||||
{
|
||||
if (argv[i] == string("-infile"))
|
||||
{
|
||||
infile_specified = true;
|
||||
i++;
|
||||
fb = argv[ i];
|
||||
}
|
||||
|
||||
if (argv[i] == string("-print_level"))
|
||||
{
|
||||
i++;
|
||||
plevel = atoi(argv[i]);
|
||||
}
|
||||
|
||||
if (argv[i] == string("-opt_for_clk"))
|
||||
{
|
||||
i++;
|
||||
opt_for_clk = (bool)atoi(argv[i]);
|
||||
}
|
||||
}
|
||||
if (infile_specified == false)
|
||||
{
|
||||
// Ensure that the XML file exists
|
||||
struct stat file_info;
|
||||
if (stat(xml_file, &file_info)) {
|
||||
cerr << "ERROR: File not found: " << xml_file << endl << endl;
|
||||
print_usage(argv[0]);
|
||||
}
|
||||
|
||||
|
||||
cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||
<< " of " << VER_UPDATE << ") is computing the target processor...\n "<<endl;
|
||||
<< " of " << VER_UPDATE << ") is computing the target processor...\n "
|
||||
<< endl;
|
||||
|
||||
//parse XML-based interface
|
||||
ParseXML *p1= new ParseXML();
|
||||
p1->parse(fb);
|
||||
Processor proc(p1);
|
||||
proc.displayEnergy(2, plevel);
|
||||
delete p1;
|
||||
// Parse the XML input file
|
||||
XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component");
|
||||
unsigned int num_children = xml_data.nChildNode("component");
|
||||
assert(num_children == 1);
|
||||
XMLNode system_xml = xml_data.getChildNode("component");
|
||||
assert(strcmp(system_xml.getAttribute("type"), "System") == 0);
|
||||
|
||||
// Recursively instantiate the system hierarchy
|
||||
System* system = new System(&system_xml);
|
||||
|
||||
// Recursively compute chip area
|
||||
system->computeArea();
|
||||
|
||||
// Recursively compute the power consumed
|
||||
system->computeEnergy();
|
||||
|
||||
// Recursively output the computed values
|
||||
system->displayData(2, plevel);
|
||||
|
||||
// Clean up
|
||||
delete system;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
void print_usage(char * argv0)
|
||||
{
|
||||
void print_usage(char * argv0) {
|
||||
cerr << "How to use McPAT:" << endl;
|
||||
cerr << " mcpat -infile <input file name> -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl;
|
||||
//cerr << " Note:default print level is at processor level, please increase it to see the details" << endl;
|
||||
cerr << " mcpat -infile <input file name> -print_level < "
|
||||
<< "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P "
|
||||
<< "only)/1 (optimzed for target clock rate)>" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
|
|
@ -29,13 +29,16 @@ VPATH = cacti
|
|||
|
||||
SRCS = \
|
||||
Ucache.cc \
|
||||
XML_Parse.cc \
|
||||
arbiter.cc \
|
||||
area.cc \
|
||||
array.cc \
|
||||
bank.cc \
|
||||
basic_circuit.cc \
|
||||
basic_components.cc \
|
||||
bus_interconnect.cc \
|
||||
cachearray.cc \
|
||||
cachecontroller.cc \
|
||||
cacheunit.cc \
|
||||
cacti_interface.cc \
|
||||
component.cc \
|
||||
core.cc \
|
||||
|
@ -52,10 +55,9 @@ SRCS = \
|
|||
noc.cc \
|
||||
nuca.cc \
|
||||
parameter.cc \
|
||||
processor.cc \
|
||||
router.cc \
|
||||
sharedcache.cc \
|
||||
subarray.cc \
|
||||
system.cc \
|
||||
technology.cc \
|
||||
uca.cc \
|
||||
wire.cc \
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
TARGET = mcpatXeonCore
|
||||
SHELL = /bin/sh
|
||||
.PHONY: all depend clean
|
||||
.SUFFIXES: .cc .o
|
||||
|
||||
ifndef NTHREADS
|
||||
NTHREADS = 4
|
||||
endif
|
||||
|
||||
|
||||
LIBS =
|
||||
INCS = -lm
|
||||
|
||||
ifeq ($(TAG),dbg)
|
||||
DBG = -Wall
|
||||
OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti
|
||||
else
|
||||
DBG =
|
||||
OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti
|
||||
#OPT = -O0 -DNTHREADS=$(NTHREADS)
|
||||
endif
|
||||
|
||||
#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
|
||||
CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
|
||||
CXX = g++ -m32
|
||||
CC = gcc -m32
|
||||
|
||||
VPATH = cacti
|
||||
|
||||
SRCS = \
|
||||
Ucache.cc \
|
||||
XML_Parse.cc \
|
||||
arbiter.cc \
|
||||
area.cc \
|
||||
array.cc \
|
||||
bank.cc \
|
||||
basic_circuit.cc \
|
||||
basic_components.cc \
|
||||
cacti_interface.cc \
|
||||
component.cc \
|
||||
core.cc \
|
||||
crossbar.cc \
|
||||
decoder.cc \
|
||||
htree2.cc \
|
||||
interconnect.cc \
|
||||
io.cc \
|
||||
iocontrollers.cc \
|
||||
logic.cc \
|
||||
main.cc \
|
||||
mat.cc \
|
||||
memoryctrl.cc \
|
||||
noc.cc \
|
||||
nuca.cc \
|
||||
parameter.cc \
|
||||
processor.cc \
|
||||
router.cc \
|
||||
sharedcache.cc \
|
||||
subarray.cc \
|
||||
technology_xeon_core.cc \
|
||||
uca.cc \
|
||||
wire.cc \
|
||||
xmlParser.cc
|
||||
|
||||
OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
|
||||
|
||||
all: obj_$(TAG)/$(TARGET)
|
||||
cp -f obj_$(TAG)/$(TARGET) $(TARGET)
|
||||
|
||||
obj_$(TAG)/$(TARGET) : $(OBJS)
|
||||
$(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
|
||||
|
||||
#obj_$(TAG)/%.o : %.cc
|
||||
# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
|
||||
|
||||
obj_$(TAG)/%.o : %.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
-rm -f *.o $(TARGET)
|
||||
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,89 +26,75 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef MEMORYCTRL_H_
|
||||
#define MEMORYCTRL_H_
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "parameter.h"
|
||||
//#include "io.h"
|
||||
#include "array.h"
|
||||
//#include "Undifferentiated_Core_Area.h"
|
||||
#include <vector>
|
||||
|
||||
#include "basic_components.h"
|
||||
#include "cachearray.h"
|
||||
#include "parameter.h"
|
||||
|
||||
class MCBackend : public Component {
|
||||
class MCBackend : public McPATComponent {
|
||||
public:
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
enum MemoryCtrl_type mc_type;
|
||||
MCParam mcp;
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
MCParameters mcp;
|
||||
MCStatistics mcs;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
||||
void compute();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
|
||||
MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const MCParameters & mcp_, const MCStatistics & mcs_);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
~MCBackend() {};
|
||||
};
|
||||
|
||||
class MCPHY : public Component {
|
||||
class MCPHY : public McPATComponent {
|
||||
public:
|
||||
InputParameter l_ip;
|
||||
uca_org_t local_result;
|
||||
enum MemoryCtrl_type mc_type;
|
||||
MCParam mcp;
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
MCParameters mcp;
|
||||
MCStatistics mcs;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
||||
void compute();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
|
||||
MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||
const MCParameters & mcp_, const MCStatistics & mcs_);
|
||||
void computeArea();
|
||||
void computeEnergy();
|
||||
~MCPHY() {};
|
||||
};
|
||||
|
||||
class MCFrontEnd : public Component {
|
||||
class MCFrontEnd : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
InputParameter interface_ip;
|
||||
enum MemoryCtrl_type mc_type;
|
||||
MCParam mcp;
|
||||
CacheArray* frontendBuffer;
|
||||
CacheArray* readBuffer;
|
||||
CacheArray* writeBuffer;
|
||||
selection_logic* MC_arb;
|
||||
ArrayST * frontendBuffer;
|
||||
ArrayST * readBuffer;
|
||||
ArrayST * writeBuffer;
|
||||
|
||||
MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
InputParameter interface_ip;
|
||||
MCParameters mcp;
|
||||
MCStatistics mcs;
|
||||
|
||||
MCFrontEnd(XMLNode* _xml_data,
|
||||
InputParameter* interface_ip_, const MCParameters & mcp_,
|
||||
const MCStatistics & mcs_);
|
||||
~MCFrontEnd();
|
||||
};
|
||||
|
||||
class MemoryController : public Component {
|
||||
class MemoryController : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
InputParameter interface_ip;
|
||||
enum MemoryCtrl_type mc_type;
|
||||
MCParam mcp;
|
||||
MCFrontEnd * frontend;
|
||||
MCBackend * transecEngine;
|
||||
MCPHY * PHY;
|
||||
Pipeline * pipeLogic;
|
||||
MCParameters mcp;
|
||||
MCStatistics mcs;
|
||||
|
||||
//clock_network clockNetwork;
|
||||
MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_);
|
||||
MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||
void initialize_params();
|
||||
void set_mc_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
~MemoryController();
|
||||
};
|
||||
|
||||
#endif /* MEMORYCTRL_H_ */
|
||||
|
|
442
ext/mcpat/noc.cc
442
ext/mcpat/noc.cc
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,7 +26,7 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
|
@ -35,73 +36,43 @@
|
|||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "common.h"
|
||||
#include "const.h"
|
||||
#include "io.h"
|
||||
#include "noc.h"
|
||||
#include "parameter.h"
|
||||
|
||||
NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_)
|
||||
:XML(XML_interface),
|
||||
ithNoC(ithNoC_),
|
||||
interface_ip(*interface_ip_),
|
||||
router(0),
|
||||
link_bus(0),
|
||||
link_bus_exist(false),
|
||||
router_exist(false),
|
||||
M_traffic_pattern(M_traffic_pattern_)
|
||||
{
|
||||
/*
|
||||
* initialize, compute and optimize individual components.
|
||||
*/
|
||||
|
||||
if (XML->sys.Embedded)
|
||||
{
|
||||
interface_ip.wt =Global_30;
|
||||
interface_ip.wire_is_mat_type = 0;
|
||||
interface_ip.wire_os_mat_type = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
interface_ip.wt =Global;
|
||||
interface_ip.wire_is_mat_type = 2;
|
||||
interface_ip.wire_os_mat_type = 2;
|
||||
}
|
||||
set_noc_param();
|
||||
local_result=init_interface(&interface_ip);
|
||||
OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
|
||||
InputParameter* interface_ip_)
|
||||
: McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_),
|
||||
interface_ip(*interface_ip_), link_bus_exist(false),
|
||||
router_exist(false) {
|
||||
name = "On-Chip Network";
|
||||
set_param_stats();
|
||||
local_result = init_interface(&interface_ip, name);
|
||||
scktRatio = g_tp.sckt_co_eff;
|
||||
|
||||
if (nocdynp.type)
|
||||
{/*
|
||||
* if NOC compute router, router links must be computed separately
|
||||
* and called from external
|
||||
* since total chip area must be known first
|
||||
*/
|
||||
// TODO: Routers and links should be children of the NOC component
|
||||
if (noc_params.type) {
|
||||
init_router();
|
||||
} else {
|
||||
init_link_bus();
|
||||
}
|
||||
else
|
||||
{
|
||||
init_link_bus(link_len_); //if bus compute bus
|
||||
}
|
||||
|
||||
// //clock power
|
||||
// clockNetwork.init_wire_external(is_default, &interface_ip);
|
||||
// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
|
||||
// clockNetwork.end_wiring_level =5;//toplevel metal
|
||||
// clockNetwork.start_wiring_level =5;//toplevel metal
|
||||
// clockNetwork.num_regs = corepipe.tot_stage_vector;
|
||||
// clockNetwork.optimize_wire();
|
||||
}
|
||||
|
||||
void NoC::init_router()
|
||||
{
|
||||
router = new Router(nocdynp.flit_size,
|
||||
nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc,
|
||||
nocdynp.virtual_channel_per_port, &(g_tp.peri_global),
|
||||
nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern);
|
||||
//router->print_router();
|
||||
area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes);
|
||||
void OnChipNetwork::init_router() {
|
||||
router = new Router(noc_params.flit_size,
|
||||
noc_params.virtual_channel_per_port *
|
||||
noc_params.input_buffer_entries_per_vc,
|
||||
noc_params.virtual_channel_per_port,
|
||||
&(g_tp.peri_global),
|
||||
noc_params.input_ports, noc_params.output_ports,
|
||||
noc_params.M_traffic_pattern);
|
||||
// TODO: Make a router class within McPAT that descends from McPATComponent
|
||||
// children.push_back(router);
|
||||
area.set_area(area.get_area() + router->area.get_area() *
|
||||
noc_params.total_nodes);
|
||||
|
||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
||||
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
|
||||
|
@ -111,245 +82,190 @@ void NoC::init_router()
|
|||
router_exist = true;
|
||||
}
|
||||
|
||||
void NoC ::init_link_bus(double link_len_)
|
||||
{
|
||||
|
||||
|
||||
// if (nocdynp.min_ports==1 )
|
||||
if (nocdynp.type)
|
||||
void OnChipNetwork::init_link_bus() {
|
||||
if (noc_params.type) {
|
||||
link_name = "Links";
|
||||
else
|
||||
} else {
|
||||
link_name = "Bus";
|
||||
}
|
||||
|
||||
link_len=link_len_;
|
||||
assert(link_len>0);
|
||||
interface_ip.throughput = noc_params.link_throughput /
|
||||
noc_params.clockRate;
|
||||
interface_ip.latency = noc_params.link_latency / noc_params.clockRate;
|
||||
|
||||
interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate;
|
||||
interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate;
|
||||
link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2;
|
||||
|
||||
link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2;
|
||||
if (noc_params.total_nodes > 1) {
|
||||
//All links are shared by neighbors
|
||||
link_len /= 2;
|
||||
}
|
||||
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
|
||||
noc_params.link_base_width,
|
||||
noc_params.link_base_height,
|
||||
noc_params.flit_size, link_len, &interface_ip,
|
||||
noc_params.link_start_wiring_level,
|
||||
noc_params.clockRate, true/*pipelinable*/,
|
||||
noc_params.route_over_perc);
|
||||
children.push_back(link_bus);
|
||||
|
||||
if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors
|
||||
link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size,
|
||||
link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc);
|
||||
|
||||
link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area()
|
||||
* nocdynp.global_linked_ports);
|
||||
|
||||
area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes);
|
||||
link_bus_exist = true;
|
||||
}
|
||||
void NoC::computeEnergy(bool is_tdp)
|
||||
{
|
||||
//power_point_product_masks
|
||||
|
||||
// TODO: This should use the McPATComponent::computeEnergy function to
|
||||
// recursively calculate energy of routers and links and then add
|
||||
void OnChipNetwork::computeEnergy() {
|
||||
double pppm_t[4] = {1, 1, 1, 1};
|
||||
double M=nocdynp.duty_cycle;
|
||||
if (is_tdp)
|
||||
{
|
||||
//init stats for TDP
|
||||
stats_t.readAc.access = M;
|
||||
tdp_stats = stats_t;
|
||||
if (router_exist)
|
||||
{
|
||||
set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern
|
||||
|
||||
// Initialize stats for TDP
|
||||
tdp_stats.reset();
|
||||
tdp_stats.readAc.access = noc_stats.duty_cycle;
|
||||
if (router_exist) {
|
||||
// TODO: Define a regression to exercise routers
|
||||
// TODO: Clean this up: it is too invasive and breaks abstraction
|
||||
set_pppm(pppm_t, 1 * tdp_stats.readAc.access, 1, 1, 1);
|
||||
router->power = router->power * pppm_t;
|
||||
set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes);
|
||||
power = power + router->power*pppm_t;
|
||||
set_pppm(pppm_t, noc_params.total_nodes,
|
||||
noc_params.total_nodes,
|
||||
noc_params.total_nodes,
|
||||
noc_params.total_nodes);
|
||||
}
|
||||
if (link_bus_exist)
|
||||
{
|
||||
if (nocdynp.type)
|
||||
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports,
|
||||
nocdynp.global_linked_ports, nocdynp.global_linked_ports);
|
||||
//reset traffic pattern; local port do not have router links
|
||||
else
|
||||
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports,
|
||||
nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern
|
||||
|
||||
link_bus_tot_per_Router.power = link_bus->power*pppm_t;
|
||||
|
||||
set_pppm(pppm_t, nocdynp.total_nodes,
|
||||
nocdynp.total_nodes,
|
||||
nocdynp.total_nodes,
|
||||
nocdynp.total_nodes);
|
||||
power = power + link_bus_tot_per_Router.power*pppm_t;
|
||||
|
||||
if (link_bus_exist) {
|
||||
if (noc_params.type) {
|
||||
link_bus->int_params.active_ports = noc_params.min_ports - 1;
|
||||
} else {
|
||||
link_bus->int_params.active_ports = noc_params.min_ports;
|
||||
}
|
||||
link_bus->int_stats.duty_cycle =
|
||||
noc_params.M_traffic_pattern * noc_stats.duty_cycle;
|
||||
|
||||
// TODO: Decide how to roll multiple routers into a single top-level
|
||||
// NOC module. I would prefer not to, but it might be a nice feature
|
||||
set_pppm(pppm_t, noc_params.total_nodes,
|
||||
noc_params.total_nodes,
|
||||
noc_params.total_nodes,
|
||||
noc_params.total_nodes);
|
||||
}
|
||||
else
|
||||
{
|
||||
//init stats for runtime power (RTP)
|
||||
stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses;
|
||||
rtp_stats = stats_t;
|
||||
|
||||
// Initialize stats for runtime energy and power
|
||||
rtp_stats.reset();
|
||||
rtp_stats.readAc.access = noc_stats.total_access;
|
||||
set_pppm(pppm_t, 1, 0 , 0, 0);
|
||||
if (router_exist)
|
||||
{
|
||||
router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ;
|
||||
router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ;
|
||||
router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ;
|
||||
if (router_exist) {
|
||||
// TODO: Move this to a McPATComponent parent class of Router
|
||||
router->buffer.rt_power.readOp.dynamic =
|
||||
(router->buffer.power.readOp.dynamic +
|
||||
router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access;
|
||||
router->crossbar.rt_power.readOp.dynamic =
|
||||
router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access;
|
||||
router->arbiter.rt_power.readOp.dynamic =
|
||||
router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access;
|
||||
|
||||
router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t +
|
||||
router->rt_power = router->rt_power +
|
||||
(router->buffer.rt_power + router->crossbar.rt_power +
|
||||
router->arbiter.rt_power) * pppm_t +
|
||||
router->power * pppm_lkg;//TDP power must be calculated first!
|
||||
rt_power = rt_power + router->rt_power;
|
||||
}
|
||||
if (link_bus_exist)
|
||||
{
|
||||
set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access);
|
||||
link_bus->rt_power = link_bus->power * pppm_t;
|
||||
rt_power = rt_power + link_bus->rt_power;
|
||||
if (link_bus_exist) {
|
||||
link_bus->int_stats.accesses = noc_stats.total_access;
|
||||
}
|
||||
|
||||
// Recursively compute energy
|
||||
McPATComponent::computeEnergy();
|
||||
}
|
||||
|
||||
void OnChipNetwork::set_param_stats() {
|
||||
// TODO: Remove this or move initialization elsewhere
|
||||
memset(&noc_params, 0, sizeof(OnChipNetworkParameters));
|
||||
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
int mat_type;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_INT_IF("type", noc_params.type);
|
||||
ASSIGN_FP_IF("clockrate", noc_params.clockRate);
|
||||
ASSIGN_INT_IF("flit_bits", noc_params.flit_size);
|
||||
ASSIGN_FP_IF("link_len", link_len);
|
||||
ASSIGN_FP_IF("link_throughput", noc_params.link_throughput);
|
||||
ASSIGN_FP_IF("link_latency", noc_params.link_latency);
|
||||
ASSIGN_INT_IF("input_ports", noc_params.input_ports);
|
||||
ASSIGN_INT_IF("output_ports", noc_params.output_ports);
|
||||
ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports);
|
||||
ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes);
|
||||
ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes);
|
||||
ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage);
|
||||
ASSIGN_FP_IF("link_routing_over_percentage",
|
||||
noc_params.route_over_perc);
|
||||
ASSIGN_INT_IF("has_global_link", noc_params.has_global_link);
|
||||
ASSIGN_INT_IF("virtual_channel_per_port",
|
||||
noc_params.virtual_channel_per_port);
|
||||
ASSIGN_INT_IF("input_buffer_entries_per_vc",
|
||||
noc_params.input_buffer_entries_per_vc);
|
||||
ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern);
|
||||
ASSIGN_FP_IF("link_base_width", noc_params.link_base_width);
|
||||
ASSIGN_FP_IF("link_base_height", noc_params.link_base_height);
|
||||
ASSIGN_INT_IF("link_start_wiring_level",
|
||||
noc_params.link_start_wiring_level);
|
||||
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Change from MHz to Hz
|
||||
noc_params.clockRate *= 1e6;
|
||||
|
||||
void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent+2, ' ');
|
||||
bool long_channel = XML->sys.longer_channel_device;
|
||||
interface_ip.wire_is_mat_type = mat_type;
|
||||
interface_ip.wire_os_mat_type = mat_type;
|
||||
|
||||
double M =M_traffic_pattern*nocdynp.duty_cycle;
|
||||
/*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc;
|
||||
* When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to
|
||||
* be applied together with McPAT's extra traffic pattern.
|
||||
* */
|
||||
if (is_tdp)
|
||||
{
|
||||
cout << name << endl;
|
||||
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
|
||||
cout << indent_str << "Subthreshold Leakage = "
|
||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout<<endl;
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (router_exist)
|
||||
{
|
||||
cout << indent_str << "Router: " << endl;
|
||||
cout << indent_str_next << "Area = " << router->area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next<< "Peak Dynamic = " << router->power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? router->power.readOp.longer_channel_leakage:router->power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next<< "Runtime Dynamic = " << router->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout<<endl;
|
||||
if (plevel >2){
|
||||
cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl;
|
||||
cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl;
|
||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)
|
||||
*nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout <<endl;
|
||||
cout << indent_str<< indent_str<< "Crossbar:" << endl;
|
||||
cout << indent_str<< indent_str_next << "Area = " << router->crossbar.area.get_area()*1e-6 << " mm^2" << endl;
|
||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->crossbar.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? router->crossbar.power.readOp.longer_channel_leakage:router->crossbar.power.readOp.leakage) << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->crossbar.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout <<endl;
|
||||
cout << indent_str<< indent_str<< "Arbiter:" << endl;
|
||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->arbiter.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? router->arbiter.power.readOp.longer_channel_leakage:router->arbiter.power.readOp.leakage) << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->arbiter.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
}
|
||||
if (link_bus_exist)
|
||||
{
|
||||
cout << indent_str << (nocdynp.type? "Per Router ":"") << link_name<<": " << endl;
|
||||
cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next<< "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic*
|
||||
nocdynp.clockRate << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? link_bus_tot_per_Router.power.readOp.longer_channel_leakage:link_bus_tot_per_Router.power.readOp.leakage)
|
||||
<<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage
|
||||
<< " W" << endl;
|
||||
cout << indent_str_next<< "Runtime Dynamic = " << link_bus->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
||||
cout<<endl;
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
||||
// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
|
||||
// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
||||
// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
||||
// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
||||
// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
|
||||
// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
|
||||
ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle);
|
||||
ASSIGN_FP_IF("total_accesses", noc_stats.total_access);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
void NoC::set_noc_param()
|
||||
{
|
||||
|
||||
nocdynp.type = XML->sys.NoC[ithNoC].type;
|
||||
nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate;
|
||||
nocdynp.clockRate *= 1e6;
|
||||
nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
||||
|
||||
nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits;
|
||||
if (nocdynp.type)
|
||||
{
|
||||
nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports;
|
||||
nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1
|
||||
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
|
||||
nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1);
|
||||
/*
|
||||
* Except local i/o ports, all ports needs links( global_linked_ports);
|
||||
* However only min_ports can be fully active simultaneously
|
||||
* since the fewer number of ports (input or output ) is the bottleneck.
|
||||
*/
|
||||
clockRate = noc_params.clockRate;
|
||||
noc_params.min_ports =
|
||||
min(noc_params.input_ports, noc_params.output_ports);
|
||||
if (noc_params.type) {
|
||||
noc_params.global_linked_ports = (noc_params.input_ports - 1) +
|
||||
(noc_params.output_ports - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
nocdynp.input_ports = 1;
|
||||
nocdynp.output_ports = 1;
|
||||
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
|
||||
nocdynp.global_linked_ports = 1;
|
||||
noc_params.total_nodes =
|
||||
noc_params.horizontal_nodes * noc_params.vertical_nodes;
|
||||
|
||||
assert(noc_params.chip_coverage <= 1);
|
||||
assert(noc_params.route_over_perc <= 1);
|
||||
assert(link_len > 0);
|
||||
}
|
||||
|
||||
nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port;
|
||||
nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc;
|
||||
|
||||
nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes;
|
||||
nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes;
|
||||
nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes;
|
||||
nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle;
|
||||
nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link;
|
||||
nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput;
|
||||
nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency;
|
||||
nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage;
|
||||
nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc;
|
||||
|
||||
assert (nocdynp.chip_coverage <=1);
|
||||
assert (nocdynp.route_over_perc <=1);
|
||||
|
||||
if (nocdynp.type)
|
||||
name = "NOC";
|
||||
else
|
||||
name = "BUSES";
|
||||
OnChipNetwork ::~OnChipNetwork() {
|
||||
|
||||
if (router) {
|
||||
delete router;
|
||||
router = 0;
|
||||
}
|
||||
if (link_bus) {
|
||||
delete link_bus;
|
||||
link_bus = 0;
|
||||
}
|
||||
|
||||
|
||||
NoC ::~NoC(){
|
||||
|
||||
if(router) {delete router; router = 0;}
|
||||
if(link_bus) {delete link_bus; link_bus = 0;}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,13 +26,13 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef NOC_H_
|
||||
#define NOC_H_
|
||||
#include "XML_Parse.h"
|
||||
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "interconnect.h"
|
||||
|
@ -39,37 +40,62 @@
|
|||
#include "parameter.h"
|
||||
#include "router.h"
|
||||
|
||||
class NoC :public Component {
|
||||
class OnChipNetworkParameters {
|
||||
public:
|
||||
double clockRate;
|
||||
int flit_size;
|
||||
int input_ports;
|
||||
int output_ports;
|
||||
int min_ports;
|
||||
int global_linked_ports;
|
||||
int virtual_channel_per_port;
|
||||
int input_buffer_entries_per_vc;
|
||||
int horizontal_nodes;
|
||||
int vertical_nodes;
|
||||
int total_nodes;
|
||||
double link_throughput;
|
||||
double link_latency;
|
||||
double chip_coverage;
|
||||
double route_over_perc;
|
||||
bool has_global_link;
|
||||
bool type;
|
||||
double M_traffic_pattern;
|
||||
double link_base_width;
|
||||
double link_base_height;
|
||||
int link_start_wiring_level;
|
||||
};
|
||||
|
||||
class OnChipNetworkStatistics {
|
||||
public:
|
||||
double duty_cycle;
|
||||
double total_access;
|
||||
};
|
||||
|
||||
class OnChipNetwork : public McPATComponent {
|
||||
public:
|
||||
Router* router;
|
||||
Interconnect* link_bus;
|
||||
Component link_bus_tot_per_Router;
|
||||
|
||||
ParseXML *XML;
|
||||
int ithNoC;
|
||||
InputParameter interface_ip;
|
||||
double link_len;
|
||||
double executionTime;
|
||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||
Router * router;
|
||||
interconnect * link_bus;
|
||||
NoCParam nocdynp;
|
||||
OnChipNetworkParameters noc_params;
|
||||
OnChipNetworkStatistics noc_stats;
|
||||
uca_org_t local_result;
|
||||
statsDef tdp_stats;
|
||||
statsDef rtp_stats;
|
||||
statsDef stats_t;
|
||||
powerDef power_t;
|
||||
Component link_bus_tot_per_Router;
|
||||
bool link_bus_exist;
|
||||
bool router_exist;
|
||||
string name, link_name;
|
||||
double M_traffic_pattern;
|
||||
NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0);
|
||||
void set_noc_param();
|
||||
void computeEnergy(bool is_tdp=true);
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
void init_link_bus(double link_len_);
|
||||
string link_name;
|
||||
|
||||
OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
|
||||
InputParameter* interface_ip_);
|
||||
void set_param_stats();
|
||||
void computeEnergy();
|
||||
void init_link_bus();
|
||||
void init_router();
|
||||
void computeEnergy_link_bus(bool is_tdp=true);
|
||||
void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
~NoC();
|
||||
~OnChipNetwork();
|
||||
};
|
||||
|
||||
#endif /* NOC_H_ */
|
||||
|
|
|
@ -1,839 +0,0 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
*
|
||||
***************************************************************************/
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "array.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "const.h"
|
||||
#include "parameter.h"
|
||||
#include "processor.h"
|
||||
#include "version.h"
|
||||
|
||||
Processor::Processor(ParseXML *XML_interface)
|
||||
:XML(XML_interface),//TODO: using one global copy may have problems.
|
||||
mc(0),
|
||||
niu(0),
|
||||
pcie(0),
|
||||
flashcontroller(0)
|
||||
{
|
||||
/*
|
||||
* placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
* There is no point to have heterogeneous memory controller on chip,
|
||||
* thus McPAT only support homogeneous memory controllers.
|
||||
*/
|
||||
int i;
|
||||
double pppm_t[4] = {1,1,1,1};
|
||||
set_proc_param();
|
||||
if (procdynp.homoCore)
|
||||
numCore = procdynp.numCore==0? 0:1;
|
||||
else
|
||||
numCore = procdynp.numCore;
|
||||
|
||||
if (procdynp.homoL2)
|
||||
numL2 = procdynp.numL2==0? 0:1;
|
||||
else
|
||||
numL2 = procdynp.numL2;
|
||||
|
||||
if (XML->sys.Private_L2 && numCore != numL2)
|
||||
{
|
||||
cout<<"Number of private L2 does not match number of cores"<<endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (procdynp.homoL3)
|
||||
numL3 = procdynp.numL3==0? 0:1;
|
||||
else
|
||||
numL3 = procdynp.numL3;
|
||||
|
||||
if (procdynp.homoNOC)
|
||||
numNOC = procdynp.numNOC==0? 0:1;
|
||||
else
|
||||
numNOC = procdynp.numNOC;
|
||||
|
||||
// if (!procdynp.homoNOC)
|
||||
// {
|
||||
// cout<<"Current McPAT does not support heterogeneous NOC"<<endl;
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
if (procdynp.homoL1Dir)
|
||||
numL1Dir = procdynp.numL1Dir==0? 0:1;
|
||||
else
|
||||
numL1Dir = procdynp.numL1Dir;
|
||||
|
||||
if (procdynp.homoL2Dir)
|
||||
numL2Dir = procdynp.numL2Dir==0? 0:1;
|
||||
else
|
||||
numL2Dir = procdynp.numL2Dir;
|
||||
|
||||
for (i = 0;i < numCore; i++)
|
||||
{
|
||||
cores.push_back(new Core(XML,i, &interface_ip));
|
||||
cores[i]->computeEnergy();
|
||||
cores[i]->computeEnergy(false);
|
||||
if (procdynp.homoCore){
|
||||
core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore);
|
||||
set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore);
|
||||
core.power = core.power + cores[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore);
|
||||
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
|
||||
area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
power = power + core.power;
|
||||
rt_power = rt_power + core.rt_power;
|
||||
}
|
||||
else{
|
||||
core.area.set_area(core.area.get_area() + cores[i]->area.get_area());
|
||||
area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
|
||||
set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1);
|
||||
core.power = core.power + cores[i]->power*pppm_t;
|
||||
power = power + cores[i]->power*pppm_t;
|
||||
|
||||
set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1);
|
||||
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + cores[i]->rt_power*pppm_t;
|
||||
}
|
||||
}
|
||||
|
||||
if (!XML->sys.Private_L2)
|
||||
{
|
||||
if (numL2 >0)
|
||||
for (i = 0;i < numL2; i++)
|
||||
{
|
||||
l2array.push_back(new SharedCache(XML,i, &interface_ip));
|
||||
l2array[i]->computeEnergy();
|
||||
l2array[i]->computeEnergy(false);
|
||||
if (procdynp.homoL2){
|
||||
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2);
|
||||
set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2);
|
||||
l2.power = l2.power + l2array[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2);
|
||||
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
|
||||
area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
power = power + l2.power;
|
||||
rt_power = rt_power + l2.rt_power;
|
||||
}
|
||||
else{
|
||||
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area());
|
||||
area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
|
||||
set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1);
|
||||
l2.power = l2.power + l2array[i]->power*pppm_t;
|
||||
power = power + l2array[i]->power*pppm_t;;
|
||||
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1);
|
||||
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + l2array[i]->rt_power*pppm_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numL3 >0)
|
||||
for (i = 0;i < numL3; i++)
|
||||
{
|
||||
l3array.push_back(new SharedCache(XML,i, &interface_ip, L3));
|
||||
l3array[i]->computeEnergy();
|
||||
l3array[i]->computeEnergy(false);
|
||||
if (procdynp.homoL3){
|
||||
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3);
|
||||
set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3);
|
||||
l3.power = l3.power + l3array[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3);
|
||||
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
|
||||
area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
power = power + l3.power;
|
||||
rt_power = rt_power + l3.rt_power;
|
||||
|
||||
}
|
||||
else{
|
||||
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area());
|
||||
area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1);
|
||||
l3.power = l3.power + l3array[i]->power*pppm_t;
|
||||
power = power + l3array[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1);
|
||||
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + l3array[i]->rt_power*pppm_t;
|
||||
|
||||
}
|
||||
}
|
||||
if (numL1Dir >0)
|
||||
for (i = 0;i < numL1Dir; i++)
|
||||
{
|
||||
l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory));
|
||||
l1dirarray[i]->computeEnergy();
|
||||
l1dirarray[i]->computeEnergy(false);
|
||||
if (procdynp.homoL1Dir){
|
||||
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir);
|
||||
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
|
||||
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
|
||||
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
|
||||
area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
power = power + l1dir.power;
|
||||
rt_power = rt_power + l1dir.rt_power;
|
||||
|
||||
}
|
||||
else{
|
||||
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area());
|
||||
area.set_area(area.get_area() + l1dirarray[i]->area.get_area());
|
||||
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1);
|
||||
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
|
||||
power = power + l1dirarray[i]->power;
|
||||
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1);
|
||||
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + l1dirarray[i]->rt_power;
|
||||
}
|
||||
}
|
||||
|
||||
if (numL2Dir >0)
|
||||
for (i = 0;i < numL2Dir; i++)
|
||||
{
|
||||
l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory));
|
||||
l2dirarray[i]->computeEnergy();
|
||||
l2dirarray[i]->computeEnergy(false);
|
||||
if (procdynp.homoL2Dir){
|
||||
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir);
|
||||
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
|
||||
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
|
||||
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
|
||||
area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm
|
||||
power = power + l2dir.power;
|
||||
rt_power = rt_power + l2dir.rt_power;
|
||||
|
||||
}
|
||||
else{
|
||||
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area());
|
||||
area.set_area(area.get_area() + l2dirarray[i]->area.get_area());
|
||||
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1);
|
||||
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
|
||||
power = power + l2dirarray[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1);
|
||||
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t;
|
||||
}
|
||||
}
|
||||
|
||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
||||
{
|
||||
mc = new MemoryController(XML, &interface_ip, MC);
|
||||
mc->computeEnergy();
|
||||
mc->computeEnergy(false);
|
||||
mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
|
||||
area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
|
||||
set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
|
||||
mcs.power = mc->power*pppm_t;
|
||||
power = power + mcs.power;
|
||||
set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
|
||||
mcs.rt_power = mc->rt_power*pppm_t;
|
||||
rt_power = rt_power + mcs.rt_power;
|
||||
|
||||
}
|
||||
|
||||
if (XML->sys.flashc.number_mcs >0 )//flash controller
|
||||
{
|
||||
flashcontroller = new FlashController(XML, &interface_ip);
|
||||
flashcontroller->computeEnergy();
|
||||
flashcontroller->computeEnergy(false);
|
||||
double number_fcs = flashcontroller->fcp.num_mcs;
|
||||
flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs);
|
||||
area.set_area(area.get_area()+flashcontrollers.area.get_area());
|
||||
set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs );
|
||||
flashcontrollers.power = flashcontroller->power*pppm_t;
|
||||
power = power + flashcontrollers.power;
|
||||
set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs );
|
||||
flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t;
|
||||
rt_power = rt_power + flashcontrollers.rt_power;
|
||||
|
||||
}
|
||||
|
||||
if (XML->sys.niu.number_units >0)
|
||||
{
|
||||
niu = new NIUController(XML, &interface_ip);
|
||||
niu->computeEnergy();
|
||||
niu->computeEnergy(false);
|
||||
nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
|
||||
area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
|
||||
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
|
||||
nius.power = niu->power*pppm_t;
|
||||
power = power + nius.power;
|
||||
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
|
||||
nius.rt_power = niu->rt_power*pppm_t;
|
||||
rt_power = rt_power + nius.rt_power;
|
||||
|
||||
}
|
||||
|
||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0)
|
||||
{
|
||||
pcie = new PCIeController(XML, &interface_ip);
|
||||
pcie->computeEnergy();
|
||||
pcie->computeEnergy(false);
|
||||
pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
|
||||
area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
|
||||
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
|
||||
pcies.power = pcie->power*pppm_t;
|
||||
power = power + pcies.power;
|
||||
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
|
||||
pcies.rt_power = pcie->rt_power*pppm_t;
|
||||
rt_power = rt_power + pcies.rt_power;
|
||||
|
||||
}
|
||||
|
||||
if (numNOC >0)
|
||||
{
|
||||
for (i = 0;i < numNOC; i++)
|
||||
{
|
||||
if (XML->sys.NoC[i].type)
|
||||
{//First add up area of routers if NoC is used
|
||||
nocs.push_back(new NoC(XML,i, &interface_ip, 1));
|
||||
if (procdynp.homoNOC)
|
||||
{
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
|
||||
area.set_area(area.get_area() + noc.area.get_area());
|
||||
}
|
||||
else
|
||||
{
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
|
||||
area.set_area(area.get_area() + nocs[i]->area.get_area());
|
||||
}
|
||||
}
|
||||
else
|
||||
{//Bus based interconnect
|
||||
nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage)));
|
||||
if (procdynp.homoNOC){
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
|
||||
area.set_area(area.get_area() + noc.area.get_area());
|
||||
}
|
||||
else
|
||||
{
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
|
||||
area.set_area(area.get_area() + nocs[i]->area.get_area());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip
|
||||
* area must be obtain to decide the link routing
|
||||
*/
|
||||
for (i = 0;i < numNOC; i++)
|
||||
{
|
||||
if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type)
|
||||
{
|
||||
nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links
|
||||
if (procdynp.homoNOC)
|
||||
{
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
||||
* nocs[i]->nocdynp.total_nodes
|
||||
* procdynp.numNOC);
|
||||
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
||||
* nocs[i]->nocdynp.total_nodes
|
||||
* procdynp.numNOC);
|
||||
}
|
||||
else
|
||||
{
|
||||
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
||||
* nocs[i]->nocdynp.total_nodes);
|
||||
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
||||
* nocs[i]->nocdynp.total_nodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
//Compute energy of NoC (w or w/o links) or buses
|
||||
for (i = 0;i < numNOC; i++)
|
||||
{
|
||||
nocs[i]->computeEnergy();
|
||||
nocs[i]->computeEnergy(false);
|
||||
if (procdynp.homoNOC){
|
||||
set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
|
||||
noc.power = noc.power + nocs[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
|
||||
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
|
||||
power = power + noc.power;
|
||||
rt_power = rt_power + noc.rt_power;
|
||||
}
|
||||
else
|
||||
{
|
||||
set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1);
|
||||
noc.power = noc.power + nocs[i]->power*pppm_t;
|
||||
power = power + nocs[i]->power*pppm_t;
|
||||
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1);
|
||||
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
|
||||
rt_power = rt_power + nocs[i]->rt_power*pppm_t;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// //clock power
|
||||
// globalClock.init_wire_external(is_default, &interface_ip);
|
||||
// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2
|
||||
// globalClock.end_wiring_level =5;//toplevel metal
|
||||
// globalClock.start_wiring_level =5;//toplevel metal
|
||||
// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes
|
||||
// globalClock.optimize_wire();
|
||||
|
||||
}
|
||||
|
||||
void Processor::displayDeviceType(int device_type_, uint32_t indent)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
switch ( device_type_ ) {
|
||||
|
||||
case 0 :
|
||||
cout <<indent_str<<"Device Type= "<<"ITRS high performance device type"<<endl;
|
||||
break;
|
||||
case 1 :
|
||||
cout <<indent_str<<"Device Type= "<<"ITRS low standby power device type"<<endl;
|
||||
break;
|
||||
case 2 :
|
||||
cout <<indent_str<<"Device Type= "<<"ITRS low operating power device type"<<endl;
|
||||
break;
|
||||
case 3 :
|
||||
cout <<indent_str<<"Device Type= "<<"LP-DRAM device type"<<endl;
|
||||
break;
|
||||
case 4 :
|
||||
cout <<indent_str<<"Device Type= "<<"COMM-DRAM device type"<<endl;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
cout <<indent_str<<"Unknown Device Type"<<endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Processor::displayInterconnectType(int interconnect_type_, uint32_t indent)
|
||||
{
|
||||
string indent_str(indent, ' ');
|
||||
|
||||
switch ( interconnect_type_ ) {
|
||||
|
||||
case 0 :
|
||||
cout <<indent_str<<"Interconnect metal projection= "<<"aggressive interconnect technology projection"<<endl;
|
||||
break;
|
||||
case 1 :
|
||||
cout <<indent_str<<"Interconnect metal projection= "<<"conservative interconnect technology projection"<<endl;
|
||||
break;
|
||||
default :
|
||||
{
|
||||
cout <<indent_str<<"Unknown Interconnect Projection Type"<<endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp)
|
||||
{
|
||||
int i;
|
||||
bool long_channel = XML->sys.longer_channel_device;
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent+2, ' ');
|
||||
if (is_tdp)
|
||||
{
|
||||
|
||||
if (plevel<5)
|
||||
{
|
||||
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
|
||||
<< " of " << VER_UPDATE << ") results (current print level is "<< plevel
|
||||
<<", please increase print level to see the details in components): "<<endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
|
||||
<< " of " << VER_UPDATE << ") results (current print level is 5)"<< endl;
|
||||
}
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
cout <<indent_str<<"Technology "<<XML->sys.core_tech_node<<" nm"<<endl;
|
||||
//cout <<indent_str<<"Device Type= "<<XML->sys.device_type<<endl;
|
||||
if (long_channel)
|
||||
cout <<indent_str<<"Using Long Channel Devices When Appropriate"<<endl;
|
||||
//cout <<indent_str<<"Interconnect metal projection= "<<XML->sys.interconnect_projection_type<<endl;
|
||||
displayInterconnectType(XML->sys.interconnect_projection_type, indent);
|
||||
cout <<indent_str<<"Core clock Rate(MHz) "<<XML->sys.core[0].clock_rate<<endl;
|
||||
cout <<endl;
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
cout <<"Processor: "<<endl;
|
||||
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str << "Peak Power = " << power.readOp.dynamic +
|
||||
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
|
||||
cout << indent_str << "Total Leakage = " <<
|
||||
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
|
||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str << "Subthreshold Leakage = " << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
if (numCore >0){
|
||||
cout <<indent_str<<"Total Cores: "<<XML->sys.number_of_cores << " cores "<<endl;
|
||||
displayDeviceType(XML->sys.device_type,indent);
|
||||
cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (!XML->sys.Private_L2)
|
||||
{
|
||||
if (numL2 >0){
|
||||
cout <<indent_str<<"Total L2s: "<<endl;
|
||||
displayDeviceType(XML->sys.L2[0].device_type,indent);
|
||||
cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
}
|
||||
if (numL3 >0){
|
||||
cout <<indent_str<<"Total L3s: "<<endl;
|
||||
displayDeviceType(XML->sys.L3[0].device_type, indent);
|
||||
cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (numL1Dir >0){
|
||||
cout <<indent_str<<"Total First Level Directory: "<<endl;
|
||||
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
|
||||
cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (numL2Dir >0){
|
||||
cout <<indent_str<<"Total First Level Directory: "<<endl;
|
||||
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
|
||||
cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (numNOC >0){
|
||||
cout <<indent_str<<"Total NoCs (Network/Bus): "<<endl;
|
||||
displayDeviceType(XML->sys.device_type, indent);
|
||||
cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl;
|
||||
//cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
||||
{
|
||||
cout <<indent_str<<"Total MCs: "<<XML->sys.mc.number_mcs << " Memory Controllers "<<endl;
|
||||
displayDeviceType(XML->sys.device_type, indent);
|
||||
cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (XML->sys.flashc.number_mcs >0)
|
||||
{
|
||||
cout <<indent_str<<"Total Flash/SSD Controllers: "<<flashcontroller->fcp.num_mcs << " Flash/SSD Controllers "<<endl;
|
||||
displayDeviceType(XML->sys.device_type, indent);
|
||||
cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (XML->sys.niu.number_units >0 )
|
||||
{
|
||||
cout <<indent_str<<"Total NIUs: "<<niu->niup.num_units << " Network Interface Units "<<endl;
|
||||
displayDeviceType(XML->sys.device_type, indent);
|
||||
cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
|
||||
{
|
||||
cout <<indent_str<<"Total PCIes: "<<pcie->pciep.num_units << " PCIe Controllers "<<endl;
|
||||
displayDeviceType(XML->sys.device_type, indent);
|
||||
cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl;
|
||||
cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl;
|
||||
cout << indent_str_next << "Subthreshold Leakage = "
|
||||
<< (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl;
|
||||
cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl;
|
||||
cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl;
|
||||
cout <<endl;
|
||||
}
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
if (plevel >1)
|
||||
{
|
||||
for (i = 0;i < numCore; i++)
|
||||
{
|
||||
cores[i]->displayEnergy(indent+4,plevel,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
if (!XML->sys.Private_L2)
|
||||
{
|
||||
for (i = 0;i < numL2; i++)
|
||||
{
|
||||
l2array[i]->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
}
|
||||
for (i = 0;i < numL3; i++)
|
||||
{
|
||||
l3array[i]->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
for (i = 0;i < numL1Dir; i++)
|
||||
{
|
||||
l1dirarray[i]->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
for (i = 0;i < numL2Dir; i++)
|
||||
{
|
||||
l2dirarray[i]->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
||||
{
|
||||
mc->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
if (XML->sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0)
|
||||
{
|
||||
flashcontroller->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
if (XML->sys.niu.number_units >0 )
|
||||
{
|
||||
niu->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
|
||||
{
|
||||
pcie->displayEnergy(indent+4,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
|
||||
for (i = 0;i < numNOC; i++)
|
||||
{
|
||||
nocs[i]->displayEnergy(indent+4,plevel,is_tdp);
|
||||
cout <<"*****************************************************************************************"<<endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Processor::set_proc_param()
|
||||
{
|
||||
bool debug = false;
|
||||
|
||||
procdynp.homoCore = bool(debug?1:XML->sys.homogeneous_cores);
|
||||
procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s);
|
||||
procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s);
|
||||
procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs);
|
||||
procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories);
|
||||
procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories);
|
||||
|
||||
procdynp.numCore = XML->sys.number_of_cores;
|
||||
procdynp.numL2 = XML->sys.number_of_L2s;
|
||||
procdynp.numL3 = XML->sys.number_of_L3s;
|
||||
procdynp.numNOC = XML->sys.number_of_NoCs;
|
||||
procdynp.numL1Dir = XML->sys.number_of_L1Directories;
|
||||
procdynp.numL2Dir = XML->sys.number_of_L2Directories;
|
||||
procdynp.numMC = XML->sys.mc.number_mcs;
|
||||
procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc;
|
||||
|
||||
// if (procdynp.numCore<1)
|
||||
// {
|
||||
// cout<<" The target processor should at least have one core on chip." <<endl;
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
// if (numNOCs<0 || numNOCs>2)
|
||||
// {
|
||||
// cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<<endl;
|
||||
// exit(0);
|
||||
// }
|
||||
|
||||
/* Basic parameters*/
|
||||
interface_ip.data_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
|
||||
interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
|
||||
interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
|
||||
interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
|
||||
|
||||
interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type;
|
||||
interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
|
||||
interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components.
|
||||
interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components.
|
||||
interface_ip.leakage_power_wt = 0;
|
||||
interface_ip.cycle_time_wt = 0;
|
||||
|
||||
interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied.
|
||||
interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
|
||||
interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
|
||||
interface_ip.leakage_power_dev = 10000;
|
||||
interface_ip.cycle_time_dev = 10000;
|
||||
|
||||
interface_ip.ed = 2;
|
||||
interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately
|
||||
interface_ip.int_prefetch_w = 1;
|
||||
interface_ip.page_sz_bits = 0;
|
||||
interface_ip.temp = debug?360: XML->sys.temperature;
|
||||
interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node;
|
||||
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
|
||||
|
||||
//***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors.
|
||||
//They will be overridden during each components initialization
|
||||
interface_ip.cache_sz =64;
|
||||
interface_ip.line_sz = 1;
|
||||
interface_ip.assoc = 1;
|
||||
interface_ip.nbanks = 1;
|
||||
interface_ip.out_w = interface_ip.line_sz*8;
|
||||
interface_ip.specific_tag = 1;
|
||||
interface_ip.tag_w = 64;
|
||||
interface_ip.access_mode = 2;
|
||||
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
|
||||
interface_ip.is_main_mem = false;
|
||||
interface_ip.rpters_in_htree = true ;
|
||||
interface_ip.ver_htree_wires_over_array = 0;
|
||||
interface_ip.broadcast_addr_din_over_ver_htrees = 0;
|
||||
|
||||
interface_ip.num_rw_ports = 1;
|
||||
interface_ip.num_rd_ports = 0;
|
||||
interface_ip.num_wr_ports = 0;
|
||||
interface_ip.num_se_rd_ports = 0;
|
||||
interface_ip.num_search_ports = 1;
|
||||
interface_ip.nuca = 0;
|
||||
interface_ip.nuca_bank_count = 0;
|
||||
interface_ip.is_cache =true;
|
||||
interface_ip.pure_ram =false;
|
||||
interface_ip.pure_cam =false;
|
||||
interface_ip.force_cache_config =false;
|
||||
if (XML->sys.Embedded)
|
||||
{
|
||||
interface_ip.wt =Global_30;
|
||||
interface_ip.wire_is_mat_type = 0;
|
||||
interface_ip.wire_os_mat_type = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
interface_ip.wt =Global;
|
||||
interface_ip.wire_is_mat_type = 2;
|
||||
interface_ip.wire_os_mat_type = 2;
|
||||
}
|
||||
interface_ip.force_wiretype = false;
|
||||
interface_ip.print_detail = 1;
|
||||
interface_ip.add_ecc_b_ =true;
|
||||
}
|
||||
|
||||
Processor::~Processor(){
|
||||
while (!cores.empty())
|
||||
{
|
||||
delete cores.back();
|
||||
cores.pop_back();
|
||||
}
|
||||
while (!l2array.empty())
|
||||
{
|
||||
delete l2array.back();
|
||||
l2array.pop_back();
|
||||
}
|
||||
while (!l3array.empty())
|
||||
{
|
||||
delete l3array.back();
|
||||
l3array.pop_back();
|
||||
}
|
||||
while (!nocs.empty())
|
||||
{
|
||||
delete nocs.back();
|
||||
nocs.pop_back();
|
||||
}
|
||||
if (!mc)
|
||||
{
|
||||
delete mc;
|
||||
}
|
||||
if (!niu)
|
||||
{
|
||||
delete niu;
|
||||
}
|
||||
if (!pcie)
|
||||
{
|
||||
delete pcie;
|
||||
}
|
||||
if (!flashcontroller)
|
||||
{
|
||||
delete flashcontroller;
|
||||
}
|
||||
};
|
File diff suppressed because it is too large
Load diff
350
ext/mcpat/system.cc
Normal file
350
ext/mcpat/system.cc
Normal file
|
@ -0,0 +1,350 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "array.h"
|
||||
#include "basic_circuit.h"
|
||||
#include "common.h"
|
||||
#include "const.h"
|
||||
#include "parameter.h"
|
||||
#include "system.h"
|
||||
#include "version.h"
|
||||
|
||||
// TODO: Fix this constructor to default initialize all pointers to NULL
|
||||
System::System(XMLNode* _xml_data)
|
||||
: McPATComponent(_xml_data) {
|
||||
int i;
|
||||
int currCore = 0;
|
||||
int currNOC = 0;
|
||||
name = "System";
|
||||
set_proc_param();
|
||||
|
||||
// TODO: This loop can (and should) be called by every component in
|
||||
// the hierarchy. Consider moving it to McPATComponent
|
||||
int numChildren = xml_data->nChildNode("component");
|
||||
for (i = 0; i < numChildren; i++ ) {
|
||||
// For each child node of the system,
|
||||
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
|
||||
XMLCSTR type = childXML->getAttribute("type");
|
||||
|
||||
if (!type) {
|
||||
warnMissingComponentType(childXML->getAttribute("id"));
|
||||
|
||||
} STRCMP(type, "Core") {
|
||||
// TODO: If homogeneous cores, and currCore > 0, just copy core 0
|
||||
children.push_back(new Core(childXML, currCore, &interface_ip));
|
||||
currCore++;
|
||||
} STRCMP(type, "CacheUnit") {
|
||||
children.push_back(new CacheUnit(childXML, &interface_ip));
|
||||
} STRCMP(type, "CacheController") {
|
||||
// TODO: Remove reliance on interface_ip - there should be a better
|
||||
// way to share global variables than passing, copying
|
||||
children.push_back(new CacheController(childXML, &interface_ip));
|
||||
} STRCMP(type, "MemoryController") {
|
||||
children.push_back(new MemoryController(childXML, &interface_ip));
|
||||
} STRCMP(type, "FlashController") {
|
||||
children.push_back(new FlashController(childXML, &interface_ip));
|
||||
} STRCMP(type, "NIUController") {
|
||||
children.push_back(new NIUController(childXML, &interface_ip));
|
||||
} STRCMP(type, "PCIeController") {
|
||||
children.push_back(new PCIeController(childXML, &interface_ip));
|
||||
} STRCMP(type, "Memory") {
|
||||
// TODO:
|
||||
warnIncompleteComponentType(type);
|
||||
} STRCMP(type, "OnChipNetwork") {
|
||||
// TODO: Many of the parameters to this constructor should be
|
||||
// handled in another way
|
||||
children.push_back(new OnChipNetwork(childXML, currNOC,
|
||||
&interface_ip));
|
||||
currNOC++;
|
||||
warnIncompleteComponentType(type);
|
||||
} STRCMP(type, "BusInterconnect") {
|
||||
// TODO: Many of the parameters to this constructor should be
|
||||
// handled in another way
|
||||
children.push_back(new BusInterconnect(childXML, &interface_ip));
|
||||
warnIncompleteComponentType(type);
|
||||
|
||||
// TODO: Add a directory data type that can handle the directories
|
||||
// as defined by certain McScript output
|
||||
} else {
|
||||
warnUnrecognizedComponent(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void System::displayDeviceType(int device_type_, uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
cout << indent_str << "Device Type = ";
|
||||
|
||||
switch ( device_type_ ) {
|
||||
case 0:
|
||||
cout << "ITRS high performance device type" << endl;
|
||||
break;
|
||||
case 1:
|
||||
cout << "ITRS low standby power device type" << endl;
|
||||
break;
|
||||
case 2:
|
||||
cout << "ITRS low operating power device type" << endl;
|
||||
break;
|
||||
case 3:
|
||||
cout << "LP-DRAM device type" << endl;
|
||||
break;
|
||||
case 4:
|
||||
cout << "COMM-DRAM device type" << endl;
|
||||
break;
|
||||
default:
|
||||
cout << indent_str << "Unknown!" << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
void System::displayInterconnectType(int interconnect_type_, uint32_t indent) {
|
||||
string indent_str(indent, ' ');
|
||||
cout << indent_str << "Interconnect metal projection = ";
|
||||
|
||||
switch ( interconnect_type_ ) {
|
||||
case 0:
|
||||
cout << "aggressive interconnect technology projection" << endl;
|
||||
break;
|
||||
case 1:
|
||||
cout << "conservative interconnect technology projection" << endl;
|
||||
break;
|
||||
default:
|
||||
cout << indent_str << "Unknown!" << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Migrate this down to the McPATComponent::displayData function
|
||||
void System::displayData(uint32_t indent, int plevel) {
|
||||
string indent_str(indent, ' ');
|
||||
string indent_str_next(indent + 2, ' ');
|
||||
if (plevel < 5) {
|
||||
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||
<< " of " << VER_UPDATE << ") results (current print level is "
|
||||
<< plevel
|
||||
<< ", please increase print level to see the details in "
|
||||
<< "components) " << endl;
|
||||
} else {
|
||||
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||
<< " of " << VER_UPDATE << ") results (current print level is 5)"
|
||||
<< endl;
|
||||
}
|
||||
|
||||
cout << "*****************************************************************"
|
||||
<< "************************" << endl;
|
||||
cout << indent_str << "Technology " << core_tech_node << " nm" << endl;
|
||||
if (longer_channel_device)
|
||||
cout << indent_str << "Using Long Channel Devices When Appropriate" << endl;
|
||||
displayInterconnectType(interconnect_projection_type, indent);
|
||||
cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl;
|
||||
cout << endl;
|
||||
|
||||
cout << "*****************************************************************"
|
||||
<< "************************" << endl;
|
||||
|
||||
McPATComponent::displayData(indent, plevel);
|
||||
}
|
||||
|
||||
void System::set_proc_param() {
|
||||
// TODO: Consider creating a SystemParams class that tracks system-wide
|
||||
// parameters like these
|
||||
longer_channel_device = false;
|
||||
core_tech_node = -1;
|
||||
temperature = -1;
|
||||
interconnect_projection_type = -1;
|
||||
device_type = -1;
|
||||
physical_address_width = -1;
|
||||
|
||||
int num_children = xml_data->nChildNode("param");
|
||||
int i;
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||
XMLCSTR value = paramNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingParamName(paramNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("core_tech_node", core_tech_node);
|
||||
ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate);
|
||||
ASSIGN_INT_IF("temperature", temperature);
|
||||
ASSIGN_INT_IF("device_type", device_type);
|
||||
ASSIGN_INT_IF("longer_channel_device", longer_channel_device);
|
||||
ASSIGN_INT_IF("interconnect_projection_type",
|
||||
interconnect_projection_type);
|
||||
ASSIGN_INT_IF("machine_bits", data_path_width);
|
||||
ASSIGN_INT_IF("virtual_address_width", virtual_address_width);
|
||||
ASSIGN_INT_IF("physical_address_width", physical_address_width);
|
||||
ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size);
|
||||
ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type);
|
||||
ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type);
|
||||
ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt);
|
||||
ASSIGN_INT_IF("area_wt", interface_ip.area_wt);
|
||||
ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt);
|
||||
ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt);
|
||||
ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt);
|
||||
ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev);
|
||||
ASSIGN_INT_IF("area_dev", interface_ip.area_dev);
|
||||
ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev);
|
||||
ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev);
|
||||
ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev);
|
||||
ASSIGN_INT_IF("ed", interface_ip.ed);
|
||||
ASSIGN_INT_IF("burst_len", interface_ip.burst_len);
|
||||
ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w);
|
||||
ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits);
|
||||
ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool);
|
||||
ASSIGN_INT_IF("ver_htree_wires_over_array",
|
||||
interface_ip.ver_htree_wires_over_array);
|
||||
ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees",
|
||||
interface_ip.broadcast_addr_din_over_ver_htrees);
|
||||
ASSIGN_INT_IF("nuca", interface_ip.nuca);
|
||||
ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count);
|
||||
ASSIGN_ENUM_IF("force_cache_config",
|
||||
interface_ip.force_cache_config, bool);
|
||||
ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type);
|
||||
ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype);
|
||||
ASSIGN_INT_IF("print_detail", interface_ip.print_detail);
|
||||
ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool);
|
||||
|
||||
else {
|
||||
warnUnrecognizedParam(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
// Change from MHz to Hz
|
||||
target_core_clockrate *= 1e6;
|
||||
interconnect_projection_type =
|
||||
(interconnect_projection_type == 0) ? 0 : 1;
|
||||
|
||||
num_children = xml_data->nChildNode("stat");
|
||||
for (i = 0; i < num_children; i++) {
|
||||
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||
XMLCSTR node_name = statNode->getAttribute("name");
|
||||
XMLCSTR value = statNode->getAttribute("value");
|
||||
|
||||
if (!node_name)
|
||||
warnMissingStatName(statNode->getAttribute("id"));
|
||||
|
||||
ASSIGN_FP_IF("total_cycles", total_cycles);
|
||||
|
||||
else {
|
||||
warnUnrecognizedStat(node_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (temperature < 0) {
|
||||
errorUnspecifiedParam("temperature");
|
||||
}
|
||||
|
||||
if (core_tech_node < 0) {
|
||||
errorUnspecifiedParam("core_tech_node");
|
||||
}
|
||||
|
||||
if (interconnect_projection_type < 0) {
|
||||
errorUnspecifiedParam("interconnect_projection_type");
|
||||
}
|
||||
|
||||
if (device_type < 0) {
|
||||
errorUnspecifiedParam("device_type");
|
||||
}
|
||||
|
||||
if (physical_address_width <= 0) {
|
||||
errorNonPositiveParam("physical_address_width");
|
||||
}
|
||||
|
||||
if (data_path_width <= 0) {
|
||||
errorNonPositiveParam("machine_bits");
|
||||
}
|
||||
|
||||
if (total_cycles <= 0) {
|
||||
fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ",
|
||||
"power numbers will be funky...\n");
|
||||
}
|
||||
|
||||
clockRate = target_core_clockrate;
|
||||
execution_time = total_cycles / (target_core_clockrate);
|
||||
|
||||
/* Basic parameters*/
|
||||
interface_ip.data_arr_ram_cell_tech_type = device_type;
|
||||
interface_ip.data_arr_peri_global_tech_type = device_type;
|
||||
interface_ip.tag_arr_ram_cell_tech_type = device_type;
|
||||
interface_ip.tag_arr_peri_global_tech_type = device_type;
|
||||
|
||||
interface_ip.ic_proj_type = interconnect_projection_type;
|
||||
interface_ip.temp = temperature;
|
||||
interface_ip.F_sz_nm = core_tech_node;
|
||||
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
|
||||
interface_ip.is_main_mem = false;
|
||||
|
||||
// These are there just to make CACTI's error_checking() happy.
|
||||
// They are either not actually used or overwritten by each component.
|
||||
interface_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||
interface_ip.nbanks = 1;
|
||||
interface_ip.out_w = 0;
|
||||
interface_ip.line_sz = 1;
|
||||
interface_ip.assoc = 1;
|
||||
interface_ip.num_rw_ports = 1;
|
||||
interface_ip.num_search_ports = 1;
|
||||
interface_ip.is_cache = true;
|
||||
interface_ip.pure_ram = false;
|
||||
interface_ip.pure_cam = false;
|
||||
|
||||
|
||||
//This section of code does not have real meaning; it is just to ensure
|
||||
//all data will have initial value to prevent errors.
|
||||
//They will be overridden during each components initialization
|
||||
interface_ip.specific_tag = 1;
|
||||
interface_ip.tag_w = 64;
|
||||
interface_ip.access_mode = 2;
|
||||
|
||||
interface_ip.obj_func_dyn_energy = 0;
|
||||
interface_ip.obj_func_dyn_power = 0;
|
||||
interface_ip.obj_func_leak_power = 0;
|
||||
interface_ip.obj_func_cycle_t = 1;
|
||||
interface_ip.num_rw_ports = 1;
|
||||
interface_ip.num_rd_ports = 0;
|
||||
interface_ip.num_wr_ports = 0;
|
||||
interface_ip.num_se_rd_ports = 0;
|
||||
}
|
||||
|
||||
System::~System() {
|
||||
// TODO: Delete children... do this in McPATComponent
|
||||
};
|
|
@ -1,7 +1,7 @@
|
|||
/*****************************************************************************
|
||||
* McPAT
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -25,19 +25,23 @@
|
|||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Joel Hestness
|
||||
* Yasuko Eckert
|
||||
*
|
||||
***************************************************************************/
|
||||
#ifndef PROCESSOR_H_
|
||||
#define PROCESSOR_H_
|
||||
|
||||
#include <vector>
|
||||
#ifndef SYSTEM_H_
|
||||
#define SYSTEM_H_
|
||||
|
||||
#include "XML_Parse.h"
|
||||
#include "arbiter.h"
|
||||
#include "area.h"
|
||||
#include "array.h"
|
||||
#include "basic_components.h"
|
||||
#include "bus_interconnect.h"
|
||||
#include "cachecontroller.h"
|
||||
#include "cacheunit.h"
|
||||
#include "core.h"
|
||||
#include "decoder.h"
|
||||
#include "iocontrollers.h"
|
||||
|
@ -45,35 +49,23 @@
|
|||
#include "noc.h"
|
||||
#include "parameter.h"
|
||||
#include "router.h"
|
||||
#include "sharedcache.h"
|
||||
|
||||
class Processor : public Component
|
||||
{
|
||||
class System : public McPATComponent {
|
||||
public:
|
||||
ParseXML *XML;
|
||||
vector<Core *> cores;
|
||||
vector<SharedCache *> l2array;
|
||||
vector<SharedCache *> l3array;
|
||||
vector<SharedCache *> l1dirarray;
|
||||
vector<SharedCache *> l2dirarray;
|
||||
vector<NoC *> nocs;
|
||||
MemoryController * mc;
|
||||
NIUController * niu;
|
||||
PCIeController * pcie;
|
||||
FlashController * flashcontroller;
|
||||
InputParameter interface_ip;
|
||||
ProcParam procdynp;
|
||||
//wire globalInterconnect;
|
||||
//clock_network globalClock;
|
||||
Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers;
|
||||
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir;
|
||||
Processor(ParseXML *XML_interface);
|
||||
void compute();
|
||||
|
||||
int device_type;
|
||||
double core_tech_node;
|
||||
int interconnect_projection_type;
|
||||
int temperature;
|
||||
|
||||
System(XMLNode* _xml_data);
|
||||
void set_proc_param();
|
||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
||||
// TODO: make this recursively compute energy on subcomponents
|
||||
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||
void displayDeviceType(int device_type_, uint32_t indent = 0);
|
||||
void displayInterconnectType(int interconnect_type_, uint32_t indent = 0);
|
||||
~Processor();
|
||||
~System();
|
||||
};
|
||||
|
||||
#endif /* PROCESSOR_H_ */
|
||||
#endif /* SYSTEM_H_ */
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -42,6 +42,7 @@
|
|||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Copyright (c) 2002, Business-Insight
|
||||
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||
* <a href="http://www.Business-Insight.com">Business-Insight</a>
|
||||
* All rights reserved.
|
||||
*
|
||||
|
@ -185,8 +186,7 @@
|
|||
|
||||
|
||||
/// Enumeration for XML parse errors.
|
||||
typedef enum XMLError
|
||||
{
|
||||
typedef enum XMLError {
|
||||
eXMLErrorNone = 0,
|
||||
eXMLErrorMissingEndTag,
|
||||
eXMLErrorNoXMLTagFound,
|
||||
|
@ -213,8 +213,7 @@ typedef enum XMLError
|
|||
|
||||
|
||||
/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents
|
||||
typedef enum XMLElementType
|
||||
{
|
||||
typedef enum XMLElementType {
|
||||
eNodeChild = 0,
|
||||
eNodeAttribute = 1,
|
||||
eNodeText = 2,
|
||||
|
@ -223,20 +222,23 @@ typedef enum XMLElementType
|
|||
} XMLElementType;
|
||||
|
||||
/// Structure used to obtain error details if the parse fails.
|
||||
typedef struct XMLResults
|
||||
{
|
||||
typedef struct XMLResults {
|
||||
enum XMLError error;
|
||||
int nLine,nColumn;
|
||||
int nLine;
|
||||
int nColumn;
|
||||
} XMLResults;
|
||||
|
||||
/// Structure for XML clear (unformatted) node (usually comments)
|
||||
typedef struct XMLClear {
|
||||
XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag;
|
||||
XMLCSTR lpszValue;
|
||||
XMLCSTR lpszOpenTag;
|
||||
XMLCSTR lpszCloseTag;
|
||||
} XMLClear;
|
||||
|
||||
/// Structure for XML attribute.
|
||||
typedef struct XMLAttribute {
|
||||
XMLCSTR lpszName; XMLCSTR lpszValue;
|
||||
XMLCSTR lpszName;
|
||||
XMLCSTR lpszValue;
|
||||
} XMLAttribute;
|
||||
|
||||
/// XMLElementPosition are not interchangeable with simple indexes
|
||||
|
@ -256,8 +258,7 @@ struct XMLNodeContents;
|
|||
* <li> XMLNode::openFileHelper </li>
|
||||
* <li> XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)</li>
|
||||
* </ul> */
|
||||
typedef struct XMLDLLENTRY XMLNode
|
||||
{
|
||||
typedef struct XMLDLLENTRY XMLNode {
|
||||
private:
|
||||
|
||||
struct XMLNodeDataTag;
|
||||
|
@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
* @{ */
|
||||
|
||||
/// Parse an XML string and return the root of a XMLNode tree representing the string.
|
||||
static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
|
||||
static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL,
|
||||
XMLResults *pResults = NULL);
|
||||
/**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is
|
||||
* the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the
|
||||
* "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
||||
|
@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
*/
|
||||
|
||||
/// Parse an XML file and return the root of a XMLNode tree representing the file.
|
||||
static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
|
||||
static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL,
|
||||
XMLResults *pResults = NULL);
|
||||
/**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is
|
||||
* the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the
|
||||
* "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
||||
|
@ -349,6 +352,7 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
XMLNode getChildNode(int i=0) const; ///< return ith child node
|
||||
XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name.
|
||||
XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing)
|
||||
XMLNode* getChildNodePtr(XMLCSTR name, int *j) const;
|
||||
XMLNode getChildNodeWithAttribute(XMLCSTR tagName,
|
||||
XMLCSTR attributeName,
|
||||
XMLCSTR attributeValue=NULL,
|
||||
|
@ -508,8 +512,7 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
/** @} */
|
||||
|
||||
/// Enumeration for XML character encoding.
|
||||
typedef enum XMLCharEncoding
|
||||
{
|
||||
typedef enum XMLCharEncoding {
|
||||
char_encoding_error = 0,
|
||||
char_encoding_UTF8 = 1,
|
||||
char_encoding_legacy = 2,
|
||||
|
@ -592,8 +595,7 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
private:
|
||||
// these are functions and structures used internally by the XMLNode class (don't bother about them):
|
||||
|
||||
typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
|
||||
{
|
||||
typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
|
||||
XMLCSTR lpszName; // Element name (=NULL if root)
|
||||
int nChild, // Number of child nodes
|
||||
nText, // Number of text fields
|
||||
|
@ -629,8 +631,7 @@ typedef struct XMLDLLENTRY XMLNode
|
|||
} XMLNode;
|
||||
|
||||
/// This structure is given by the function XMLNode::enumContents.
|
||||
typedef struct XMLNodeContents
|
||||
{
|
||||
typedef struct XMLNodeContents {
|
||||
/// This dictates what's the content of the XMLNodeContent
|
||||
enum XMLElementType etype;
|
||||
/**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */
|
||||
|
@ -685,8 +686,7 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
|
|||
* \note If you are creating from scratch an XML file using the provided XMLNode class
|
||||
* you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the
|
||||
* processing job for you during rendering).*/
|
||||
typedef struct XMLDLLENTRY ToXMLStringTool
|
||||
{
|
||||
typedef struct XMLDLLENTRY ToXMLStringTool {
|
||||
public:
|
||||
ToXMLStringTool(): buf(NULL), buflen(0){}
|
||||
~ToXMLStringTool();
|
||||
|
@ -718,8 +718,7 @@ private:
|
|||
* b64-encoded text included inside the XML file, use "decode". Alternatively, these
|
||||
* functions can also be used to "encrypt/decrypt" some critical data contained inside
|
||||
* the XML (it's not a strong encryption at all, but sometimes it can be useful). */
|
||||
typedef struct XMLDLLENTRY XMLParserBase64Tool
|
||||
{
|
||||
typedef struct XMLDLLENTRY XMLParserBase64Tool {
|
||||
public:
|
||||
XMLParserBase64Tool(): buf(NULL), buflen(0){}
|
||||
~XMLParserBase64Tool();
|
||||
|
|
Loading…
Reference in a new issue