ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
This commit is contained in:
parent
1104199115
commit
0deef376d9
71 changed files with 23147 additions and 28461 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,591 +0,0 @@
|
||||||
/*****************************************************************************
|
|
||||||
* McPAT
|
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
||||||
* All Rights Reserved
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are
|
|
||||||
* met: redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer;
|
|
||||||
* redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution;
|
|
||||||
* neither the name of the copyright holders nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
|
||||||
*
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
#ifndef XML_PARSE_H_
|
|
||||||
#define XML_PARSE_H_
|
|
||||||
|
|
||||||
|
|
||||||
//#ifdef WIN32
|
|
||||||
//#define _CRT_SECURE_NO_DEPRECATE
|
|
||||||
//#endif
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include "xmlParser.h"
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
/*
|
|
||||||
void myfree(char *t); // {free(t);}
|
|
||||||
ToXMLStringTool tx,tx2;
|
|
||||||
*/
|
|
||||||
//all subnodes at the level of system.core(0-n)
|
|
||||||
//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
|
|
||||||
typedef struct{
|
|
||||||
int prediction_width;
|
|
||||||
char prediction_scheme[20];
|
|
||||||
int predictor_size;
|
|
||||||
int predictor_entries;
|
|
||||||
int local_predictor_size[20];
|
|
||||||
int local_predictor_entries;
|
|
||||||
int global_predictor_entries;
|
|
||||||
int global_predictor_bits;
|
|
||||||
int chooser_predictor_entries;
|
|
||||||
int chooser_predictor_bits;
|
|
||||||
double predictor_accesses;
|
|
||||||
} predictor_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
int number_entries;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
double total_hits;
|
|
||||||
double total_accesses;
|
|
||||||
double total_misses;
|
|
||||||
double conflicts;
|
|
||||||
} itlb_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
double icache_config[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double read_misses;
|
|
||||||
double replacements;
|
|
||||||
double read_hits;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double miss_buffer_access;
|
|
||||||
double fill_buffer_accesses;
|
|
||||||
double prefetch_buffer_accesses;
|
|
||||||
double prefetch_buffer_writes;
|
|
||||||
double prefetch_buffer_reads;
|
|
||||||
double prefetch_buffer_hits;
|
|
||||||
double conflicts;
|
|
||||||
} icache_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int number_entries;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double write_hits;
|
|
||||||
double read_hits;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double conflicts;
|
|
||||||
} dtlb_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
double dcache_config[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double read_hits;
|
|
||||||
double write_hits;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double replacements;
|
|
||||||
double write_backs;
|
|
||||||
double miss_buffer_access;
|
|
||||||
double fill_buffer_accesses;
|
|
||||||
double prefetch_buffer_accesses;
|
|
||||||
double prefetch_buffer_writes;
|
|
||||||
double prefetch_buffer_reads;
|
|
||||||
double prefetch_buffer_hits;
|
|
||||||
double wbb_writes;
|
|
||||||
double wbb_reads;
|
|
||||||
double conflicts;
|
|
||||||
} dcache_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int BTB_config[20];
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double read_hits;
|
|
||||||
double write_hits;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double replacements;
|
|
||||||
} BTB_systemcore;
|
|
||||||
typedef struct{
|
|
||||||
//all params at the level of system.core(0-n)
|
|
||||||
int clock_rate;
|
|
||||||
bool opt_local;
|
|
||||||
bool x86;
|
|
||||||
int machine_bits;
|
|
||||||
int virtual_address_width;
|
|
||||||
int physical_address_width;
|
|
||||||
int opcode_width;
|
|
||||||
int micro_opcode_width;
|
|
||||||
int instruction_length;
|
|
||||||
int machine_type;
|
|
||||||
int internal_datapath_width;
|
|
||||||
int number_hardware_threads;
|
|
||||||
int fetch_width;
|
|
||||||
int number_instruction_fetch_ports;
|
|
||||||
int decode_width;
|
|
||||||
int issue_width;
|
|
||||||
int peak_issue_width;
|
|
||||||
int commit_width;
|
|
||||||
int pipelines_per_core[20];
|
|
||||||
int pipeline_depth[20];
|
|
||||||
char FPU[20];
|
|
||||||
char divider_multiplier[20];
|
|
||||||
int ALU_per_core;
|
|
||||||
double FPU_per_core;
|
|
||||||
int MUL_per_core;
|
|
||||||
int instruction_buffer_size;
|
|
||||||
int decoded_stream_buffer_size;
|
|
||||||
int instruction_window_scheme;
|
|
||||||
int instruction_window_size;
|
|
||||||
int fp_instruction_window_size;
|
|
||||||
int ROB_size;
|
|
||||||
int archi_Regs_IRF_size;
|
|
||||||
int archi_Regs_FRF_size;
|
|
||||||
int phy_Regs_IRF_size;
|
|
||||||
int phy_Regs_FRF_size;
|
|
||||||
int rename_scheme;
|
|
||||||
int register_windows_size;
|
|
||||||
char LSU_order[20];
|
|
||||||
int store_buffer_size;
|
|
||||||
int load_buffer_size;
|
|
||||||
int memory_ports;
|
|
||||||
char Dcache_dual_pump[20];
|
|
||||||
int RAS_size;
|
|
||||||
int fp_issue_width;
|
|
||||||
int prediction_width;
|
|
||||||
int number_of_BTB;
|
|
||||||
int number_of_BPT;
|
|
||||||
|
|
||||||
//all stats at the level of system.core(0-n)
|
|
||||||
double total_instructions;
|
|
||||||
double int_instructions;
|
|
||||||
double fp_instructions;
|
|
||||||
double branch_instructions;
|
|
||||||
double branch_mispredictions;
|
|
||||||
double committed_instructions;
|
|
||||||
double committed_int_instructions;
|
|
||||||
double committed_fp_instructions;
|
|
||||||
double load_instructions;
|
|
||||||
double store_instructions;
|
|
||||||
double total_cycles;
|
|
||||||
double idle_cycles;
|
|
||||||
double busy_cycles;
|
|
||||||
double instruction_buffer_reads;
|
|
||||||
double instruction_buffer_write;
|
|
||||||
double ROB_reads;
|
|
||||||
double ROB_writes;
|
|
||||||
double rename_accesses;
|
|
||||||
double fp_rename_accesses;
|
|
||||||
double rename_reads;
|
|
||||||
double rename_writes;
|
|
||||||
double fp_rename_reads;
|
|
||||||
double fp_rename_writes;
|
|
||||||
double inst_window_reads;
|
|
||||||
double inst_window_writes;
|
|
||||||
double inst_window_wakeup_accesses;
|
|
||||||
double inst_window_selections;
|
|
||||||
double fp_inst_window_reads;
|
|
||||||
double fp_inst_window_writes;
|
|
||||||
double fp_inst_window_wakeup_accesses;
|
|
||||||
double fp_inst_window_selections;
|
|
||||||
double archi_int_regfile_reads;
|
|
||||||
double archi_float_regfile_reads;
|
|
||||||
double phy_int_regfile_reads;
|
|
||||||
double phy_float_regfile_reads;
|
|
||||||
double phy_int_regfile_writes;
|
|
||||||
double phy_float_regfile_writes;
|
|
||||||
double archi_int_regfile_writes;
|
|
||||||
double archi_float_regfile_writes;
|
|
||||||
double int_regfile_reads;
|
|
||||||
double float_regfile_reads;
|
|
||||||
double int_regfile_writes;
|
|
||||||
double float_regfile_writes;
|
|
||||||
double windowed_reg_accesses;
|
|
||||||
double windowed_reg_transports;
|
|
||||||
double function_calls;
|
|
||||||
double context_switches;
|
|
||||||
double ialu_accesses;
|
|
||||||
double fpu_accesses;
|
|
||||||
double mul_accesses;
|
|
||||||
double cdb_alu_accesses;
|
|
||||||
double cdb_mul_accesses;
|
|
||||||
double cdb_fpu_accesses;
|
|
||||||
double load_buffer_reads;
|
|
||||||
double load_buffer_writes;
|
|
||||||
double load_buffer_cams;
|
|
||||||
double store_buffer_reads;
|
|
||||||
double store_buffer_writes;
|
|
||||||
double store_buffer_cams;
|
|
||||||
double store_buffer_forwards;
|
|
||||||
double main_memory_access;
|
|
||||||
double main_memory_read;
|
|
||||||
double main_memory_write;
|
|
||||||
double pipeline_duty_cycle;
|
|
||||||
|
|
||||||
double IFU_duty_cycle ;
|
|
||||||
double BR_duty_cycle ;
|
|
||||||
double LSU_duty_cycle ;
|
|
||||||
double MemManU_I_duty_cycle;
|
|
||||||
double MemManU_D_duty_cycle ;
|
|
||||||
double ALU_duty_cycle ;
|
|
||||||
double MUL_duty_cycle ;
|
|
||||||
double FPU_duty_cycle ;
|
|
||||||
double ALU_cdb_duty_cycle ;
|
|
||||||
double MUL_cdb_duty_cycle ;
|
|
||||||
double FPU_cdb_duty_cycle ;
|
|
||||||
|
|
||||||
//all subnodes at the level of system.core(0-n)
|
|
||||||
predictor_systemcore predictor;
|
|
||||||
itlb_systemcore itlb;
|
|
||||||
icache_systemcore icache;
|
|
||||||
dtlb_systemcore dtlb;
|
|
||||||
dcache_systemcore dcache;
|
|
||||||
BTB_systemcore BTB;
|
|
||||||
|
|
||||||
} system_core;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int Directory_type;
|
|
||||||
double Dir_config[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
int clockrate;
|
|
||||||
int ports[20];
|
|
||||||
int device_type;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
char threeD_stack[20];
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double conflicts;
|
|
||||||
double duty_cycle;
|
|
||||||
} system_L1Directory;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int Directory_type;
|
|
||||||
double Dir_config[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
int clockrate;
|
|
||||||
int ports[20];
|
|
||||||
int device_type;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
char threeD_stack[20];
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double conflicts;
|
|
||||||
double duty_cycle;
|
|
||||||
} system_L2Directory;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
double L2_config[20];
|
|
||||||
int clockrate;
|
|
||||||
int ports[20];
|
|
||||||
int device_type;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
char threeD_stack[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double read_hits;
|
|
||||||
double write_hits;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double replacements;
|
|
||||||
double write_backs;
|
|
||||||
double miss_buffer_accesses;
|
|
||||||
double fill_buffer_accesses;
|
|
||||||
double prefetch_buffer_accesses;
|
|
||||||
double prefetch_buffer_writes;
|
|
||||||
double prefetch_buffer_reads;
|
|
||||||
double prefetch_buffer_hits;
|
|
||||||
double wbb_writes;
|
|
||||||
double wbb_reads;
|
|
||||||
double conflicts;
|
|
||||||
double duty_cycle;
|
|
||||||
|
|
||||||
bool merged_dir;
|
|
||||||
double homenode_read_accesses;
|
|
||||||
double homenode_write_accesses;
|
|
||||||
double homenode_read_hits;
|
|
||||||
double homenode_write_hits;
|
|
||||||
double homenode_read_misses;
|
|
||||||
double homenode_write_misses;
|
|
||||||
double dir_duty_cycle;
|
|
||||||
} system_L2;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
double L3_config[20];
|
|
||||||
int clockrate;
|
|
||||||
int ports[20];
|
|
||||||
int device_type;
|
|
||||||
int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate
|
|
||||||
char threeD_stack[20];
|
|
||||||
int buffer_sizes[20];
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double read_accesses;
|
|
||||||
double write_accesses;
|
|
||||||
double total_hits;
|
|
||||||
double total_misses;
|
|
||||||
double read_hits;
|
|
||||||
double write_hits;
|
|
||||||
double read_misses;
|
|
||||||
double write_misses;
|
|
||||||
double replacements;
|
|
||||||
double write_backs;
|
|
||||||
double miss_buffer_accesses;
|
|
||||||
double fill_buffer_accesses;
|
|
||||||
double prefetch_buffer_accesses;
|
|
||||||
double prefetch_buffer_writes;
|
|
||||||
double prefetch_buffer_reads;
|
|
||||||
double prefetch_buffer_hits;
|
|
||||||
double wbb_writes;
|
|
||||||
double wbb_reads;
|
|
||||||
double conflicts;
|
|
||||||
double duty_cycle;
|
|
||||||
|
|
||||||
bool merged_dir;
|
|
||||||
double homenode_read_accesses;
|
|
||||||
double homenode_write_accesses;
|
|
||||||
double homenode_read_hits;
|
|
||||||
double homenode_write_hits;
|
|
||||||
double homenode_read_misses;
|
|
||||||
double homenode_write_misses;
|
|
||||||
double dir_duty_cycle;
|
|
||||||
} system_L3;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int number_of_inputs_of_crossbars;
|
|
||||||
int number_of_outputs_of_crossbars;
|
|
||||||
int flit_bits;
|
|
||||||
int input_buffer_entries_per_port;
|
|
||||||
int ports_of_input_buffer[20];
|
|
||||||
//stats
|
|
||||||
double crossbar_accesses;
|
|
||||||
} xbar0_systemNoC;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int clockrate;
|
|
||||||
bool type;
|
|
||||||
bool has_global_link;
|
|
||||||
char topology[20];
|
|
||||||
int horizontal_nodes;
|
|
||||||
int vertical_nodes;
|
|
||||||
int link_throughput;
|
|
||||||
int link_latency;
|
|
||||||
int input_ports;
|
|
||||||
int output_ports;
|
|
||||||
int virtual_channel_per_port;
|
|
||||||
int flit_bits;
|
|
||||||
int input_buffer_entries_per_vc;
|
|
||||||
int ports_of_input_buffer[20];
|
|
||||||
int dual_pump;
|
|
||||||
int number_of_crossbars;
|
|
||||||
char crossbar_type[20];
|
|
||||||
char crosspoint_type[20];
|
|
||||||
xbar0_systemNoC xbar0;
|
|
||||||
int arbiter_type;
|
|
||||||
double chip_coverage;
|
|
||||||
//stats
|
|
||||||
double total_accesses;
|
|
||||||
double duty_cycle;
|
|
||||||
double route_over_perc;
|
|
||||||
} system_NoC;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int mem_tech_node;
|
|
||||||
int device_clock;
|
|
||||||
int peak_transfer_rate;
|
|
||||||
int internal_prefetch_of_DRAM_chip;
|
|
||||||
int capacity_per_channel;
|
|
||||||
int number_ranks;
|
|
||||||
int num_banks_of_DRAM_chip;
|
|
||||||
int Block_width_of_DRAM_chip;
|
|
||||||
int output_width_of_DRAM_chip;
|
|
||||||
int page_size_of_DRAM_chip;
|
|
||||||
int burstlength_of_DRAM_chip;
|
|
||||||
//stats
|
|
||||||
double memory_accesses;
|
|
||||||
double memory_reads;
|
|
||||||
double memory_writes;
|
|
||||||
} system_mem;
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
//Common Param for mc and fc
|
|
||||||
double peak_transfer_rate;
|
|
||||||
int number_mcs;
|
|
||||||
bool withPHY;
|
|
||||||
int type;
|
|
||||||
|
|
||||||
//FCParam
|
|
||||||
//stats
|
|
||||||
double duty_cycle;
|
|
||||||
double total_load_perc;
|
|
||||||
|
|
||||||
//McParam
|
|
||||||
int mc_clock;
|
|
||||||
int llc_line_length;
|
|
||||||
int memory_channels_per_mc;
|
|
||||||
int number_ranks;
|
|
||||||
int req_window_size_per_channel;
|
|
||||||
int IO_buffer_size_per_channel;
|
|
||||||
int databus_width;
|
|
||||||
int addressbus_width;
|
|
||||||
bool LVDS;
|
|
||||||
|
|
||||||
//stats
|
|
||||||
double memory_accesses;
|
|
||||||
double memory_reads;
|
|
||||||
double memory_writes;
|
|
||||||
} system_mc;
|
|
||||||
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int clockrate;
|
|
||||||
int number_units;
|
|
||||||
int type;
|
|
||||||
//stats
|
|
||||||
double duty_cycle;
|
|
||||||
double total_load_perc;
|
|
||||||
} system_niu;
|
|
||||||
|
|
||||||
typedef struct{
|
|
||||||
//params
|
|
||||||
int clockrate;
|
|
||||||
int number_units;
|
|
||||||
int num_channels;
|
|
||||||
int type;
|
|
||||||
bool withPHY;
|
|
||||||
//stats
|
|
||||||
double duty_cycle;
|
|
||||||
double total_load_perc;
|
|
||||||
} system_pcie;
|
|
||||||
|
|
||||||
typedef struct{
|
|
||||||
//All number_of_* at the level of 'system' Ying 03/21/2009
|
|
||||||
int number_of_cores;
|
|
||||||
int number_of_L1Directories;
|
|
||||||
int number_of_L2Directories;
|
|
||||||
int number_of_L2s;
|
|
||||||
bool Private_L2;
|
|
||||||
int number_of_L3s;
|
|
||||||
int number_of_NoCs;
|
|
||||||
int number_of_dir_levels;
|
|
||||||
int domain_size;
|
|
||||||
int first_level_dir;
|
|
||||||
// All params at the level of 'system'
|
|
||||||
int homogeneous_cores;
|
|
||||||
int homogeneous_L1Directories;
|
|
||||||
int homogeneous_L2Directories;
|
|
||||||
double core_tech_node;
|
|
||||||
int target_core_clockrate;
|
|
||||||
int target_chip_area;
|
|
||||||
int temperature;
|
|
||||||
int number_cache_levels;
|
|
||||||
int L1_property;
|
|
||||||
int L2_property;
|
|
||||||
int homogeneous_L2s;
|
|
||||||
int L3_property;
|
|
||||||
int homogeneous_L3s;
|
|
||||||
int homogeneous_NoCs;
|
|
||||||
int homogeneous_ccs;
|
|
||||||
int Max_area_deviation;
|
|
||||||
int Max_power_deviation;
|
|
||||||
int device_type;
|
|
||||||
bool longer_channel_device;
|
|
||||||
bool Embedded;
|
|
||||||
bool opt_dynamic_power;
|
|
||||||
bool opt_lakage_power;
|
|
||||||
bool opt_clockrate;
|
|
||||||
bool opt_area;
|
|
||||||
int interconnect_projection_type;
|
|
||||||
int machine_bits;
|
|
||||||
int virtual_address_width;
|
|
||||||
int physical_address_width;
|
|
||||||
int virtual_memory_page_size;
|
|
||||||
double total_cycles;
|
|
||||||
//system.core(0-n):3rd level
|
|
||||||
system_core core[64];
|
|
||||||
system_L1Directory L1Directory[64];
|
|
||||||
system_L2Directory L2Directory[64];
|
|
||||||
system_L2 L2[64];
|
|
||||||
system_L3 L3[64];
|
|
||||||
system_NoC NoC[64];
|
|
||||||
system_mem mem;
|
|
||||||
system_mc mc;
|
|
||||||
system_mc flashc;
|
|
||||||
system_niu niu;
|
|
||||||
system_pcie pcie;
|
|
||||||
} root_system;
|
|
||||||
|
|
||||||
class ParseXML
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
void parse(char* filepath);
|
|
||||||
void initialize();
|
|
||||||
public:
|
|
||||||
root_system sys;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* XML_PARSE_H_ */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,232 +26,242 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
#define GLOBALVAR
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
#include "area.h"
|
#include "area.h"
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
|
#include "common.h"
|
||||||
#include "decoder.h"
|
#include "decoder.h"
|
||||||
#include "globalvar.h"
|
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
ArrayST::ArrayST(const InputParameter *configure_interface,
|
double ArrayST::area_efficiency_threshold = 20.0;
|
||||||
string _name,
|
int ArrayST::ed = 0;
|
||||||
enum Device_ty device_ty_,
|
//Fixed number, make sure timing can be satisfied.
|
||||||
bool opt_local_,
|
int ArrayST::delay_wt = 100;
|
||||||
enum Core_type core_ty_,
|
int ArrayST::cycle_time_wt = 1000;
|
||||||
bool _is_default)
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
:l_ip(*configure_interface),
|
int ArrayST::area_wt = 10;
|
||||||
name(_name),
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
device_ty(device_ty_),
|
int ArrayST::dynamic_power_wt = 10;
|
||||||
opt_local(opt_local_),
|
int ArrayST::leakage_power_wt = 10;
|
||||||
core_ty(core_ty_),
|
//Fixed number, make sure timing can be satisfied.
|
||||||
is_default(_is_default)
|
int ArrayST::delay_dev = 1000000;
|
||||||
{
|
int ArrayST::cycle_time_dev = 100;
|
||||||
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
if (l_ip.cache_sz<64) l_ip.cache_sz=64;
|
int ArrayST::area_dev = 1000000;
|
||||||
l_ip.error_checking();//not only do the error checking but also fill some missing parameters
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
optimize_array();
|
int ArrayST::dynamic_power_dev = 1000000;
|
||||||
|
int ArrayST::leakage_power_dev = 1000000;
|
||||||
}
|
int ArrayST::cycle_time_dev_threshold = 10;
|
||||||
|
|
||||||
|
|
||||||
void ArrayST::compute_base_power()
|
ArrayST::ArrayST(XMLNode* _xml_data,
|
||||||
{
|
const InputParameter *configure_interface, string _name,
|
||||||
//l_ip.out_w =l_ip.line_sz*8;
|
enum Device_ty device_ty_, double _clockRate,
|
||||||
local_result=cacti_interface(&l_ip);
|
bool opt_local_, enum Core_type core_ty_, bool _is_default)
|
||||||
|
: McPATComponent(_xml_data), l_ip(*configure_interface),
|
||||||
|
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
|
||||||
|
is_default(_is_default) {
|
||||||
|
name = _name;
|
||||||
|
clockRate = _clockRate;
|
||||||
|
if (l_ip.cache_sz < MIN_BUFFER_SIZE)
|
||||||
|
l_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||||
|
|
||||||
|
if (!l_ip.error_checking(name)) {
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArrayST::optimize_array()
|
output_data.reset();
|
||||||
{
|
|
||||||
list<uca_org_t > candidate_solutions(0);
|
|
||||||
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
|
||||||
|
|
||||||
uca_org_t * temp_res = 0;
|
computeEnergy();
|
||||||
local_result.valid=false;
|
computeArea();
|
||||||
|
}
|
||||||
|
|
||||||
double throughput=l_ip.throughput, latency=l_ip.latency;
|
void ArrayST::compute_base_power() {
|
||||||
double area_efficiency_threshold = 20.0;
|
local_result = cacti_interface(&l_ip);
|
||||||
bool throughput_overflow=true, latency_overflow=true;
|
}
|
||||||
compute_base_power();
|
|
||||||
|
|
||||||
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
void ArrayST::computeArea() {
|
||||||
throughput_overflow=false;
|
area.set_area(local_result.area);
|
||||||
if ((local_result.access_time - latency)<= 1e-10)
|
output_data.area = local_result.area / 1e6;
|
||||||
latency_overflow=false;
|
}
|
||||||
|
|
||||||
if (opt_for_clk && opt_local)
|
void ArrayST::computeEnergy() {
|
||||||
{
|
list<uca_org_t > candidate_solutions(0);
|
||||||
if (throughput_overflow || latency_overflow)
|
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
||||||
{
|
|
||||||
l_ip.ed=0;
|
|
||||||
|
|
||||||
l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
|
uca_org_t* temp_res = NULL;
|
||||||
l_ip.cycle_time_wt = 1000;
|
local_result.valid = false;
|
||||||
|
|
||||||
l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
|
double throughput = l_ip.throughput;
|
||||||
l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components.
|
double latency = l_ip.latency;
|
||||||
l_ip.leakage_power_wt = 10;
|
bool throughput_overflow = true;
|
||||||
|
bool latency_overflow = true;
|
||||||
|
compute_base_power();
|
||||||
|
|
||||||
l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied.
|
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
||||||
l_ip.cycle_time_dev = 100;
|
throughput_overflow = false;
|
||||||
|
if ((local_result.access_time - latency) <= 1e-10)
|
||||||
|
latency_overflow = false;
|
||||||
|
|
||||||
l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
|
if (opt_for_clk && opt_local) {
|
||||||
l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components.
|
if (throughput_overflow || latency_overflow) {
|
||||||
l_ip.leakage_power_dev = 1000000;
|
l_ip.ed = ed;
|
||||||
|
|
||||||
throughput_overflow=true; //Reset overflow flag before start optimization iterations
|
l_ip.delay_wt = delay_wt;
|
||||||
latency_overflow=true;
|
l_ip.cycle_time_wt = cycle_time_wt;
|
||||||
|
|
||||||
temp_res = &local_result; //Clean up the result for optimized for ED^2P
|
l_ip.area_wt = area_wt;
|
||||||
temp_res->cleanup();
|
l_ip.dynamic_power_wt = dynamic_power_wt;
|
||||||
}
|
l_ip.leakage_power_wt = leakage_power_wt;
|
||||||
|
|
||||||
|
l_ip.delay_dev = delay_dev;
|
||||||
|
l_ip.cycle_time_dev = cycle_time_dev;
|
||||||
|
|
||||||
while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10
|
l_ip.area_dev = area_dev;
|
||||||
{
|
l_ip.dynamic_power_dev = dynamic_power_dev;
|
||||||
compute_base_power();
|
l_ip.leakage_power_dev = leakage_power_dev;
|
||||||
|
|
||||||
l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration
|
//Reset overflow flag before start optimization iterations
|
||||||
|
throughput_overflow = true;
|
||||||
|
latency_overflow = true;
|
||||||
|
|
||||||
// from best area to worst area -->worst timing to best timing
|
//Clean up the result for optimized for ED^2P
|
||||||
if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)||
|
temp_res = &local_result;
|
||||||
(local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0))
|
temp_res->cleanup();
|
||||||
{ //if no satisfiable solution is found,the most aggressive one is left
|
|
||||||
candidate_solutions.push_back(local_result);
|
|
||||||
//output_data_csv(candidate_solutions.back());
|
|
||||||
if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10))
|
|
||||||
//ensure stop opt not because of cam
|
|
||||||
{
|
|
||||||
throughput_overflow=false;
|
|
||||||
latency_overflow=false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//TODO: whether checking the partial satisfied results too, or just change the mark???
|
|
||||||
if ((local_result.cycle_time - throughput) <= 1e-10)
|
|
||||||
throughput_overflow=false;
|
|
||||||
if ((local_result.access_time - latency)<= 1e-10)
|
|
||||||
latency_overflow=false;
|
|
||||||
|
|
||||||
if (l_ip.cycle_time_dev > 10)
|
|
||||||
{ //if not >10 local_result is the last result, it cannot be cleaned up
|
|
||||||
temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up
|
|
||||||
temp_res->cleanup();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// l_ip.cycle_time_dev-=10;
|
|
||||||
// l_ip.delay_dev-=10;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (l_ip.assoc > 0)
|
|
||||||
{
|
|
||||||
//For array structures except CAM and FA, Give warning but still provide a result with best timing found
|
|
||||||
if (throughput_overflow==true)
|
|
||||||
cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl;
|
|
||||||
if (latency_overflow==true)
|
|
||||||
cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
// /*According to "Content-Addressable Memory (CAM) Circuits and
|
|
||||||
// Architectures": A Tutorial and Survey
|
|
||||||
// by Kostas Pagiamtzis et al.
|
|
||||||
// CAM structures can be heavily pipelined and use look-ahead techniques,
|
|
||||||
// therefore timing can be relaxed. But McPAT does not model the advanced
|
|
||||||
// techniques. If continue optimizing, the area efficiency will be too low
|
|
||||||
// */
|
|
||||||
// //For CAM and FA, stop opt if area efficiency is too low
|
|
||||||
// if (throughput_overflow==true)
|
|
||||||
// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
|
|
||||||
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
|
|
||||||
// if (latency_overflow==true)
|
|
||||||
// cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
|
|
||||||
// <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
|
|
||||||
// }
|
|
||||||
|
|
||||||
//double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
|
while ((throughput_overflow || latency_overflow) &&
|
||||||
double min_dynamic_energy=BIGNUM;
|
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||||
if (candidate_solutions.empty()==false)
|
compute_base_power();
|
||||||
{
|
|
||||||
local_result.valid=true;
|
|
||||||
for (candidate_iter = candidate_solutions.begin(); candidate_iter != candidate_solutions.end(); ++candidate_iter)
|
|
||||||
|
|
||||||
{
|
|
||||||
if (min_dynamic_energy > (candidate_iter)->power.readOp.dynamic)
|
|
||||||
{
|
|
||||||
min_dynamic_energy = (candidate_iter)->power.readOp.dynamic;
|
|
||||||
min_dynamic_energy_iter = candidate_iter;
|
|
||||||
local_result = *(min_dynamic_energy_iter);
|
|
||||||
//TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
candidate_iter->cleanup() ;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
//This is the time_dev to be used for next iteration
|
||||||
|
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
|
||||||
|
|
||||||
|
// from best area to worst area -->worst timing to best timing
|
||||||
|
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
|
||||||
|
(local_result.access_time - latency) <= 1e-10) ||
|
||||||
|
(local_result.data_array2->area_efficiency <
|
||||||
|
area_efficiency_threshold && l_ip.assoc == 0)) {
|
||||||
|
//if no satisfiable solution is found,the most aggressive one
|
||||||
|
//is left
|
||||||
|
candidate_solutions.push_back(local_result);
|
||||||
|
if (((local_result.cycle_time - throughput) <= 1e-10) &&
|
||||||
|
((local_result.access_time - latency) <= 1e-10)) {
|
||||||
|
//ensure stop opt not because of cam
|
||||||
|
throughput_overflow = false;
|
||||||
|
latency_overflow = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ((local_result.cycle_time - throughput) <= 1e-10)
|
||||||
|
throughput_overflow = false;
|
||||||
|
if ((local_result.access_time - latency) <= 1e-10)
|
||||||
|
latency_overflow = false;
|
||||||
|
|
||||||
|
//if not >10 local_result is the last result, it cannot be
|
||||||
|
//cleaned up
|
||||||
|
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||||
|
//Only solutions not saved in the list need to be
|
||||||
|
//cleaned up
|
||||||
|
temp_res = &local_result;
|
||||||
|
temp_res->cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (l_ip.assoc > 0) {
|
||||||
|
//For array structures except CAM and FA, Give warning but still
|
||||||
|
//provide a result with best timing found
|
||||||
|
if (throughput_overflow == true)
|
||||||
|
cout << "Warning: " << name
|
||||||
|
<< " array structure cannot satisfy throughput constraint."
|
||||||
|
<< endl;
|
||||||
|
if (latency_overflow == true)
|
||||||
|
cout << "Warning: " << name
|
||||||
|
<< " array structure cannot satisfy latency constraint."
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
double min_dynamic_energy = BIGNUM;
|
||||||
|
if (candidate_solutions.empty() == false) {
|
||||||
|
local_result.valid = true;
|
||||||
|
for (candidate_iter = candidate_solutions.begin();
|
||||||
|
candidate_iter != candidate_solutions.end();
|
||||||
|
++candidate_iter) {
|
||||||
|
if (min_dynamic_energy >
|
||||||
|
(candidate_iter)->power.readOp.dynamic) {
|
||||||
|
min_dynamic_energy =
|
||||||
|
(candidate_iter)->power.readOp.dynamic;
|
||||||
|
min_dynamic_energy_iter = candidate_iter;
|
||||||
|
local_result = *(min_dynamic_energy_iter);
|
||||||
|
} else {
|
||||||
|
candidate_iter->cleanup() ;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
candidate_solutions.clear();
|
candidate_solutions.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(device_ty, core_ty);
|
||||||
|
|
||||||
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
||||||
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
||||||
double total_overhead = macro_layout_overhead*chip_PR_overhead;
|
double total_overhead = macro_layout_overhead * chip_PR_overhead;
|
||||||
local_result.area *= total_overhead;
|
local_result.area *= total_overhead;
|
||||||
|
|
||||||
//maintain constant power density
|
//maintain constant power density
|
||||||
double pppm_t[4] = {total_overhead,1,1,total_overhead};
|
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
|
||||||
|
|
||||||
double sckRation = g_tp.sckt_co_eff;
|
double sckRation = g_tp.sckt_co_eff;
|
||||||
local_result.power.readOp.dynamic *= sckRation;
|
local_result.power.readOp.dynamic *= sckRation;
|
||||||
local_result.power.writeOp.dynamic *= sckRation;
|
local_result.power.writeOp.dynamic *= sckRation;
|
||||||
local_result.power.searchOp.dynamic *= sckRation;
|
local_result.power.searchOp.dynamic *= sckRation;
|
||||||
local_result.power.readOp.leakage *= l_ip.nbanks;
|
local_result.power.readOp.leakage *= l_ip.nbanks;
|
||||||
local_result.power.readOp.longer_channel_leakage =
|
local_result.power.readOp.longer_channel_leakage =
|
||||||
local_result.power.readOp.leakage*long_channel_device_reduction;
|
local_result.power.readOp.leakage * long_channel_device_reduction;
|
||||||
local_result.power = local_result.power* pppm_t;
|
local_result.power = local_result.power * pppm_t;
|
||||||
|
|
||||||
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
||||||
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
||||||
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
||||||
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||||
local_result.data_array2->power.readOp.longer_channel_leakage =
|
local_result.data_array2->power.readOp.longer_channel_leakage =
|
||||||
local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
|
local_result.data_array2->power.readOp.leakage *
|
||||||
local_result.data_array2->power = local_result.data_array2->power* pppm_t;
|
long_channel_device_reduction;
|
||||||
|
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
|
||||||
|
|
||||||
|
|
||||||
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
|
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
|
||||||
{
|
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
||||||
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
||||||
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
||||||
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||||
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
||||||
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
local_result.tag_array2->power.readOp.leakage *
|
||||||
local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
|
long_channel_device_reduction;
|
||||||
local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
|
local_result.tag_array2->power =
|
||||||
}
|
local_result.tag_array2->power * pppm_t;
|
||||||
|
}
|
||||||
|
|
||||||
|
power = local_result.power;
|
||||||
|
|
||||||
|
output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
|
||||||
|
output_data.subthreshold_leakage_power = power.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArrayST::leakage_feedback(double temperature)
|
void ArrayST::leakage_feedback(double temperature)
|
||||||
|
@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ArrayST:: ~ArrayST()
|
ArrayST::~ArrayST() {
|
||||||
{
|
local_result.cleanup();
|
||||||
local_result.cleanup();
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -43,59 +44,42 @@
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class ArrayST :public Component{
|
class ArrayST : public McPATComponent {
|
||||||
public:
|
|
||||||
ArrayST(){};
|
|
||||||
ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true);
|
|
||||||
|
|
||||||
InputParameter l_ip;
|
|
||||||
string name;
|
|
||||||
enum Device_ty device_ty;
|
|
||||||
bool opt_local;
|
|
||||||
enum Core_type core_ty;
|
|
||||||
bool is_default;
|
|
||||||
uca_org_t local_result;
|
|
||||||
|
|
||||||
statsDef tdp_stats;
|
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
|
||||||
powerDef power_t;
|
|
||||||
|
|
||||||
virtual void optimize_array();
|
|
||||||
virtual void compute_base_power();
|
|
||||||
virtual ~ArrayST();
|
|
||||||
|
|
||||||
void leakage_feedback(double temperature);
|
|
||||||
};
|
|
||||||
|
|
||||||
class InstCache :public Component{
|
|
||||||
public:
|
public:
|
||||||
ArrayST* caches;
|
static double area_efficiency_threshold;
|
||||||
ArrayST* missb;
|
|
||||||
ArrayST* ifb;
|
// These are used for the CACTI interface.
|
||||||
ArrayST* prefetchb;
|
static int ed;
|
||||||
powerDef power_t;//temp value holder for both (max) power and runtime power
|
static int delay_wt;
|
||||||
InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;};
|
static int cycle_time_wt;
|
||||||
~InstCache(){
|
static int area_wt;
|
||||||
if (caches) {//caches->local_result.cleanup();
|
static int dynamic_power_wt;
|
||||||
delete caches; caches=0;}
|
static int leakage_power_wt;
|
||||||
if (missb) {//missb->local_result.cleanup();
|
static int delay_dev;
|
||||||
delete missb; missb=0;}
|
static int cycle_time_dev;
|
||||||
if (ifb) {//ifb->local_result.cleanup();
|
static int area_dev;
|
||||||
delete ifb; ifb=0;}
|
static int dynamic_power_dev;
|
||||||
if (prefetchb) {//prefetchb->local_result.cleanup();
|
static int leakage_power_dev;
|
||||||
delete prefetchb; prefetchb=0;}
|
static int cycle_time_dev_threshold;
|
||||||
};
|
|
||||||
|
InputParameter l_ip;
|
||||||
|
enum Device_ty device_ty;
|
||||||
|
bool opt_local;
|
||||||
|
enum Core_type core_ty;
|
||||||
|
bool is_default;
|
||||||
|
uca_org_t local_result;
|
||||||
|
statsDef stats_t;
|
||||||
|
|
||||||
|
ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||||
|
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
|
||||||
|
bool opt_local_ = true,
|
||||||
|
enum Core_type core_ty_ = Inorder, bool _is_default = true);
|
||||||
|
void computeArea();
|
||||||
|
void computeEnergy();
|
||||||
|
void compute_base_power();
|
||||||
|
~ArrayST();
|
||||||
|
|
||||||
|
void leakage_feedback(double temperature);
|
||||||
};
|
};
|
||||||
|
|
||||||
class DataCache :public InstCache{
|
#endif /* ARRAY_H_ */
|
||||||
public:
|
|
||||||
ArrayST* wbb;
|
|
||||||
DataCache(){wbb=0;};
|
|
||||||
~DataCache(){
|
|
||||||
if (wbb) {//wbb->local_result.cleanup();
|
|
||||||
delete wbb; wbb=0;}
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif /* TLB_H_ */
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -34,94 +35,327 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "cacheunit.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
// Turn this to true to get debugging messages
|
||||||
|
bool McPATComponent::debug = false;
|
||||||
|
|
||||||
|
bool McPATComponent::opt_for_clk = true;
|
||||||
|
int McPATComponent::longer_channel_device = 0;
|
||||||
|
// Number of cycles per second, 2GHz = 2e9
|
||||||
|
double McPATComponent::target_core_clockrate = 2e9;
|
||||||
|
double McPATComponent::total_cycles = 0.0f;
|
||||||
|
double McPATComponent::execution_time = 0.0f;
|
||||||
|
int McPATComponent::physical_address_width = 0;
|
||||||
|
int McPATComponent::virtual_address_width = 0;
|
||||||
|
int McPATComponent::virtual_memory_page_size = 0;
|
||||||
|
int McPATComponent::data_path_width = 0;
|
||||||
|
|
||||||
|
void McPATOutput::reset() {
|
||||||
|
storage = 0.0;
|
||||||
|
area = 0.0;
|
||||||
|
peak_dynamic_power = 0.0;
|
||||||
|
subthreshold_leakage_power = 0.0;
|
||||||
|
gate_leakage_power = 0.0;
|
||||||
|
runtime_dynamic_energy = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) {
|
||||||
|
McPATOutput to_return;
|
||||||
|
to_return.storage = lhs.storage + rhs.storage;
|
||||||
|
to_return.area = lhs.area + rhs.area;
|
||||||
|
to_return.peak_dynamic_power = lhs.peak_dynamic_power +
|
||||||
|
rhs.peak_dynamic_power;
|
||||||
|
to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power +
|
||||||
|
rhs.subthreshold_leakage_power;
|
||||||
|
to_return.gate_leakage_power = lhs.gate_leakage_power +
|
||||||
|
rhs.gate_leakage_power;
|
||||||
|
to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy +
|
||||||
|
rhs.runtime_dynamic_energy;
|
||||||
|
return to_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATOutput::operator+=(const McPATOutput &rhs) {
|
||||||
|
storage += rhs.storage;
|
||||||
|
area += rhs.area;
|
||||||
|
peak_dynamic_power += rhs.peak_dynamic_power;
|
||||||
|
subthreshold_leakage_power += rhs.subthreshold_leakage_power;
|
||||||
|
gate_leakage_power += rhs.gate_leakage_power;
|
||||||
|
runtime_dynamic_energy += rhs.runtime_dynamic_energy;
|
||||||
|
}
|
||||||
|
|
||||||
|
McPATComponent::McPATComponent()
|
||||||
|
: xml_data(NULL), name("") {
|
||||||
|
}
|
||||||
|
|
||||||
|
McPATComponent::McPATComponent(XMLNode* _xml_data)
|
||||||
|
: xml_data(_xml_data), name("") {
|
||||||
|
}
|
||||||
|
|
||||||
|
McPATComponent::McPATComponent(XMLNode* _xml_data,
|
||||||
|
InputParameter* _interface_ip)
|
||||||
|
: xml_data(_xml_data), interface_ip(*_interface_ip), name("") {
|
||||||
|
}
|
||||||
|
|
||||||
|
McPATComponent::~McPATComponent() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::recursiveInstantiate() {
|
||||||
|
if (debug) {
|
||||||
|
fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ",
|
||||||
|
"'type' %s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||||
|
}
|
||||||
|
int i;
|
||||||
|
int numChildren = xml_data->nChildNode("component");
|
||||||
|
for (i = 0; i < numChildren; i++ ) {
|
||||||
|
// For each child node of the system,
|
||||||
|
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
|
||||||
|
XMLCSTR type = childXML->getAttribute("type");
|
||||||
|
|
||||||
|
if (!type)
|
||||||
|
warnMissingComponentType(childXML->getAttribute("id"));
|
||||||
|
|
||||||
|
STRCMP(type, "Core")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "CacheUnit")
|
||||||
|
children.push_back(new CacheUnit(childXML, &interface_ip));
|
||||||
|
STRCMP(type, "CacheController")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "MemoryController")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "Memory")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "OnChipNetwork")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "BusInterconnect")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
STRCMP(type, "Directory")
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
|
||||||
|
else
|
||||||
|
warnUnrecognizedComponent(type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::computeArea() {
|
||||||
|
if (debug) {
|
||||||
|
fprintf(stderr, "WARNING: Called computeArea from %s, with 'type' ",
|
||||||
|
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: This calculation is incorrect and is overwritten by computeEnergy
|
||||||
|
// Fix it up so that the values are available at the correct times
|
||||||
|
int i;
|
||||||
|
int numChildren = children.size();
|
||||||
|
area.set_area(0.0);
|
||||||
|
output_data.area = 0.0;
|
||||||
|
for (i = 0; i < numChildren; i++) {
|
||||||
|
children[i]->computeArea();
|
||||||
|
output_data.area += area.get_area();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::computeEnergy() {
|
||||||
|
if (debug) {
|
||||||
|
fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ",
|
||||||
|
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||||
|
}
|
||||||
|
|
||||||
|
power.reset();
|
||||||
|
rt_power.reset();
|
||||||
|
memset(&output_data, 0, sizeof(McPATOutput));
|
||||||
|
int i;
|
||||||
|
int numChildren = children.size();
|
||||||
|
for (i = 0; i < numChildren; i++) {
|
||||||
|
children[i]->computeEnergy();
|
||||||
|
output_data += children[i]->output_data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::displayData(uint32_t indent, int plevel) {
|
||||||
|
if (debug) {
|
||||||
|
fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ",
|
||||||
|
"%s\n", name.c_str(), xml_data->getAttribute("type"));
|
||||||
|
}
|
||||||
|
|
||||||
|
string indent_str(indent, ' ');
|
||||||
|
string indent_str_next(indent + 2, ' ');
|
||||||
|
|
||||||
|
double leakage_power = output_data.subthreshold_leakage_power +
|
||||||
|
output_data.gate_leakage_power;
|
||||||
|
double total_runtime_energy = output_data.runtime_dynamic_energy +
|
||||||
|
leakage_power * execution_time;
|
||||||
|
cout << indent_str << name << ":" << endl;
|
||||||
|
cout << indent_str_next << "Area = " << output_data.area << " mm^2"
|
||||||
|
<< endl;
|
||||||
|
cout << indent_str_next << "Peak Dynamic Power = "
|
||||||
|
<< output_data.peak_dynamic_power << " W" << endl;
|
||||||
|
cout << indent_str_next << "Subthreshold Leakage Power = "
|
||||||
|
<< output_data.subthreshold_leakage_power << " W" << endl;
|
||||||
|
cout << indent_str_next << "Gate Leakage Power = "
|
||||||
|
<< output_data.gate_leakage_power << " W" << endl;
|
||||||
|
cout << indent_str_next << "Runtime Dynamic Power = "
|
||||||
|
<< (output_data.runtime_dynamic_energy / execution_time) << " W"
|
||||||
|
<< endl;
|
||||||
|
cout << indent_str_next << "Runtime Dynamic Energy = "
|
||||||
|
<< output_data.runtime_dynamic_energy << " J" << endl;
|
||||||
|
cout << indent_str_next << "Total Runtime Energy = "
|
||||||
|
<< total_runtime_energy << " J" << endl;
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
// Recursively print children
|
||||||
|
int i;
|
||||||
|
int numChildren = children.size();
|
||||||
|
for (i = 0; i < numChildren; i++) {
|
||||||
|
children[i]->displayData(indent + 4, plevel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::errorUnspecifiedParam(string param) {
|
||||||
|
fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n",
|
||||||
|
name.c_str(), param.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::errorNonPositiveParam(string param) {
|
||||||
|
fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n",
|
||||||
|
name.c_str(), param.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) {
|
||||||
|
fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n",
|
||||||
|
name.c_str(), component);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnUnrecognizedParam(XMLCSTR param) {
|
||||||
|
fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n",
|
||||||
|
name.c_str(), param);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) {
|
||||||
|
fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n",
|
||||||
|
name.c_str(), stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnIncompleteComponentType(XMLCSTR type) {
|
||||||
|
fprintf(stderr, " WARNING: %s handling not yet complete\n", type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnMissingComponentType(XMLCSTR id) {
|
||||||
|
if (id) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a component due to the missing type: %s\n",
|
||||||
|
id);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a component in %s due to the missing type\n",
|
||||||
|
name.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnMissingParamName(XMLCSTR id) {
|
||||||
|
if (id) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a parameter due to the missing name: %s\n",
|
||||||
|
id);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a parameter in %s due to the missing name\n",
|
||||||
|
name.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void McPATComponent::warnMissingStatName(XMLCSTR id) {
|
||||||
|
if (id) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a statistic due to the missing name: %s\n",
|
||||||
|
id);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,
|
||||||
|
"WARNING: Ignoring a statistic in %s due to the missing name\n",
|
||||||
|
name.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
double longer_channel_device_reduction(
|
double longer_channel_device_reduction(
|
||||||
enum Device_ty device_ty,
|
enum Device_ty device_ty,
|
||||||
enum Core_type core_ty)
|
enum Core_type core_ty) {
|
||||||
{
|
|
||||||
|
|
||||||
double longer_channel_device_percentage_core;
|
double longer_channel_device_percentage_core;
|
||||||
double longer_channel_device_percentage_uncore;
|
double longer_channel_device_percentage_uncore;
|
||||||
double longer_channel_device_percentage_llc;
|
double longer_channel_device_percentage_llc;
|
||||||
|
|
||||||
double long_channel_device_reduction;
|
double long_channel_device_reduction;
|
||||||
|
|
||||||
longer_channel_device_percentage_llc = 1.0;
|
longer_channel_device_percentage_llc = 1.0;
|
||||||
longer_channel_device_percentage_uncore = 0.82;
|
longer_channel_device_percentage_uncore = 0.82;
|
||||||
if (core_ty==OOO)
|
if (core_ty == OOO) {
|
||||||
{
|
//0.54 Xeon Tulsa //0.58 Nehelam
|
||||||
longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam
|
longer_channel_device_percentage_core = 0.56;
|
||||||
//longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam
|
} else {
|
||||||
|
//0.8;//Niagara
|
||||||
|
longer_channel_device_percentage_core = 0.8;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
if (device_ty == Core_device) {
|
||||||
else
|
long_channel_device_reduction =
|
||||||
{
|
(1 - longer_channel_device_percentage_core) +
|
||||||
longer_channel_device_percentage_core = 0.8;//0.8;//Niagara
|
longer_channel_device_percentage_core *
|
||||||
//longer_channel_device_percentage_uncore = 0.9;//Niagara
|
g_tp.peri_global.long_channel_leakage_reduction;
|
||||||
}
|
} else if (device_ty == Uncore_device) {
|
||||||
|
long_channel_device_reduction =
|
||||||
|
(1 - longer_channel_device_percentage_uncore) +
|
||||||
|
longer_channel_device_percentage_uncore *
|
||||||
|
g_tp.peri_global.long_channel_leakage_reduction;
|
||||||
|
} else if (device_ty == LLC_device) {
|
||||||
|
long_channel_device_reduction =
|
||||||
|
(1 - longer_channel_device_percentage_llc) +
|
||||||
|
longer_channel_device_percentage_llc *
|
||||||
|
g_tp.peri_global.long_channel_leakage_reduction;
|
||||||
|
} else {
|
||||||
|
cout << "ERROR: Unknown device category: " << device_ty << endl;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
if (device_ty==Core_device)
|
return long_channel_device_reduction;
|
||||||
{
|
|
||||||
long_channel_device_reduction = (1- longer_channel_device_percentage_core)
|
|
||||||
+ longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction;
|
|
||||||
}
|
|
||||||
else if (device_ty==Uncore_device)
|
|
||||||
{
|
|
||||||
long_channel_device_reduction = (1- longer_channel_device_percentage_uncore)
|
|
||||||
+ longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction;
|
|
||||||
}
|
|
||||||
else if (device_ty==LLC_device)
|
|
||||||
{
|
|
||||||
long_channel_device_reduction = (1- longer_channel_device_percentage_llc)
|
|
||||||
+ longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cout<<"unknown device category"<<endl;
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
return long_channel_device_reduction;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
statsComponents operator+(const statsComponents & x, const statsComponents & y)
|
statsComponents operator+(const statsComponents & x, const statsComponents & y) {
|
||||||
{
|
statsComponents z;
|
||||||
statsComponents z;
|
|
||||||
|
|
||||||
z.access = x.access + y.access;
|
z.access = x.access + y.access;
|
||||||
z.hit = x.hit + y.hit;
|
z.hit = x.hit + y.hit;
|
||||||
z.miss = x.miss + y.miss;
|
z.miss = x.miss + y.miss;
|
||||||
|
|
||||||
return z;
|
return z;
|
||||||
}
|
}
|
||||||
|
|
||||||
statsComponents operator*(const statsComponents & x, double const * const y)
|
statsComponents operator*(const statsComponents & x, double const * const y) {
|
||||||
{
|
statsComponents z;
|
||||||
statsComponents z;
|
|
||||||
|
|
||||||
z.access = x.access*y[0];
|
z.access = x.access * y[0];
|
||||||
z.hit = x.hit*y[1];
|
z.hit = x.hit * y[1];
|
||||||
z.miss = x.miss*y[2];
|
z.miss = x.miss * y[2];
|
||||||
|
|
||||||
return z;
|
return z;
|
||||||
}
|
}
|
||||||
|
|
||||||
statsDef operator+(const statsDef & x, const statsDef & y)
|
statsDef operator+(const statsDef & x, const statsDef & y) {
|
||||||
{
|
statsDef z;
|
||||||
statsDef z;
|
|
||||||
|
|
||||||
z.readAc = x.readAc + y.readAc;
|
z.readAc = x.readAc + y.readAc;
|
||||||
z.writeAc = x.writeAc + y.writeAc;
|
z.writeAc = x.writeAc + y.writeAc;
|
||||||
z.searchAc = x.searchAc + y.searchAc;
|
z.searchAc = x.searchAc + y.searchAc;
|
||||||
return z;
|
return z;
|
||||||
}
|
}
|
||||||
|
|
||||||
statsDef operator*(const statsDef & x, double const * const y)
|
statsDef operator*(const statsDef & x, double const * const y) {
|
||||||
{
|
statsDef z;
|
||||||
statsDef z;
|
|
||||||
|
|
||||||
z.readAc = x.readAc*y;
|
z.readAc = x.readAc * y;
|
||||||
z.writeAc = x.writeAc*y;
|
z.writeAc = x.writeAc * y;
|
||||||
z.searchAc = x.searchAc*y;
|
z.searchAc = x.searchAc * y;
|
||||||
return z;
|
return z;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -34,9 +35,15 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
#include "component.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
#include "xmlParser.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TODO: Since revisions to McPAT aim to make the component hierarchy more
|
||||||
|
* modular, many of the parameter and statistics classes/structs included in
|
||||||
|
* this file should be moved to the files for their respective components.
|
||||||
|
*/
|
||||||
const double cdb_overhead = 1.1;
|
const double cdb_overhead = 1.1;
|
||||||
|
|
||||||
enum FU_type {
|
enum FU_type {
|
||||||
|
@ -46,21 +53,28 @@ enum FU_type {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Core_type {
|
enum Core_type {
|
||||||
OOO,
|
OOO,
|
||||||
Inorder
|
Inorder
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Renaming_type {
|
enum Renaming_type {
|
||||||
RAMbased,
|
RAMbased,
|
||||||
CAMbased
|
CAMbased
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Scheduler_type {
|
enum Scheduler_type {
|
||||||
PhysicalRegFile,
|
PhysicalRegFile,
|
||||||
ReservationStation
|
ReservationStation
|
||||||
};
|
};
|
||||||
|
|
||||||
enum cache_level {
|
enum Cache_type {
|
||||||
|
DATA_CACHE,
|
||||||
|
INSTRUCTION_CACHE,
|
||||||
|
MIXED
|
||||||
|
};
|
||||||
|
|
||||||
|
enum CacheLevel {
|
||||||
|
L1,
|
||||||
L2,
|
L2,
|
||||||
L3,
|
L3,
|
||||||
L1Directory,
|
L1Directory,
|
||||||
|
@ -68,198 +82,408 @@ enum cache_level {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum MemoryCtrl_type {
|
enum MemoryCtrl_type {
|
||||||
MC, //memory controller
|
MC, //memory controller
|
||||||
FLASHC //flash controller
|
FLASHC //flash controller
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Dir_type {
|
enum Dir_type {
|
||||||
ST,//shadowed tag
|
ST,//shadowed tag
|
||||||
DC,//directory cache
|
DC,//directory cache
|
||||||
SBT,//static bank tag
|
SBT,//static bank tag
|
||||||
NonDir
|
NonDir
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Cache_policy {
|
enum Cache_policy {
|
||||||
Write_through,
|
Write_through,
|
||||||
Write_back
|
Write_back
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Device_ty {
|
enum Device_ty {
|
||||||
Core_device,
|
Core_device,
|
||||||
Uncore_device,
|
Uncore_device,
|
||||||
LLC_device
|
LLC_device
|
||||||
};
|
};
|
||||||
|
|
||||||
class statsComponents
|
enum Access_mode {
|
||||||
{
|
Normal,
|
||||||
public:
|
Sequential,
|
||||||
|
Fast
|
||||||
|
};
|
||||||
|
|
||||||
|
class statsComponents {
|
||||||
|
public:
|
||||||
double access;
|
double access;
|
||||||
double hit;
|
double hit;
|
||||||
double miss;
|
double miss;
|
||||||
|
|
||||||
statsComponents() : access(0), hit(0), miss(0) {}
|
statsComponents() : access(0), hit(0), miss(0) {}
|
||||||
statsComponents(const statsComponents & obj) { *this = obj; }
|
statsComponents(const statsComponents & obj) {
|
||||||
statsComponents & operator=(const statsComponents & rhs)
|
*this = obj;
|
||||||
{
|
}
|
||||||
access = rhs.access;
|
statsComponents & operator=(const statsComponents & rhs) {
|
||||||
hit = rhs.hit;
|
access = rhs.access;
|
||||||
miss = rhs.miss;
|
hit = rhs.hit;
|
||||||
return *this;
|
miss = rhs.miss;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
void reset() {
|
||||||
|
access = 0;
|
||||||
|
hit = 0;
|
||||||
|
miss = 0;
|
||||||
}
|
}
|
||||||
void reset() { access = 0; hit = 0; miss = 0;}
|
|
||||||
|
|
||||||
friend statsComponents operator+(const statsComponents & x, const statsComponents & y);
|
friend statsComponents operator+(const statsComponents & x,
|
||||||
friend statsComponents operator*(const statsComponents & x, double const * const y);
|
const statsComponents & y);
|
||||||
|
friend statsComponents operator*(const statsComponents & x,
|
||||||
|
double const * const y);
|
||||||
};
|
};
|
||||||
|
|
||||||
class statsDef
|
class statsDef {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
statsComponents readAc;
|
statsComponents readAc;
|
||||||
statsComponents writeAc;
|
statsComponents writeAc;
|
||||||
statsComponents searchAc;
|
statsComponents searchAc;
|
||||||
|
statsComponents dataReadAc;
|
||||||
|
statsComponents dataWriteAc;
|
||||||
|
statsComponents tagReadAc;
|
||||||
|
statsComponents tagWriteAc;
|
||||||
|
|
||||||
statsDef() : readAc(), writeAc(),searchAc() { }
|
statsDef() : readAc(), writeAc(), searchAc() { }
|
||||||
void reset() { readAc.reset(); writeAc.reset();searchAc.reset();}
|
void reset() {
|
||||||
|
readAc.reset();
|
||||||
|
writeAc.reset();
|
||||||
|
searchAc.reset();
|
||||||
|
}
|
||||||
|
|
||||||
friend statsDef operator+(const statsDef & x, const statsDef & y);
|
friend statsDef operator+(const statsDef & x, const statsDef & y);
|
||||||
friend statsDef operator*(const statsDef & x, double const * const y);
|
friend statsDef operator*(const statsDef & x, double const * const y);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An object to store the computed data that will be output from McPAT on a
|
||||||
|
* per-component-instance basis. Currently, this includes the amount of storage
|
||||||
|
* that the component comprises, its chip area, and power and energy
|
||||||
|
* calculations.
|
||||||
|
*/
|
||||||
|
class McPATOutput {
|
||||||
|
public:
|
||||||
|
// Storage is in bytes (B)
|
||||||
|
double storage;
|
||||||
|
// Area is in mm^2
|
||||||
|
double area;
|
||||||
|
// Peak Dynamic Power is in W
|
||||||
|
double peak_dynamic_power;
|
||||||
|
// Subthreshold Leakage Power is in W
|
||||||
|
double subthreshold_leakage_power;
|
||||||
|
// Gate Leakage Power is in W
|
||||||
|
double gate_leakage_power;
|
||||||
|
// Runtime Dynamic Energy is in J
|
||||||
|
double runtime_dynamic_energy;
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
|
||||||
|
friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs);
|
||||||
|
void operator+=(const McPATOutput &rhs);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A McPATComponent encompasses all the parts that are common to any component
|
||||||
|
* for which McPAT may compute and print power, area, and timing data. It
|
||||||
|
* includes a pointer to the XML data from which the component gathers its
|
||||||
|
* input parameters, it stores the variables that are commonly used in all
|
||||||
|
* components, and it maintains the hierarchical structure to recursively
|
||||||
|
* compute and print output. This is a base class from which all components
|
||||||
|
* should inherit these functionality (possibly through other descended
|
||||||
|
* classes.
|
||||||
|
*/
|
||||||
|
class McPATComponent : public Component {
|
||||||
|
public:
|
||||||
|
static bool debug;
|
||||||
|
|
||||||
|
// Variables shared across the system by all McPATComponents
|
||||||
|
static bool opt_for_clk;
|
||||||
|
static int longer_channel_device;
|
||||||
|
static double execution_time;
|
||||||
|
static int physical_address_width;
|
||||||
|
static int virtual_address_width;
|
||||||
|
static int virtual_memory_page_size;
|
||||||
|
static int data_path_width;
|
||||||
|
|
||||||
|
// Although these two variables are static right now, they need to be
|
||||||
|
// modulated on a per-frequency-domain basis eventually.
|
||||||
|
static double target_core_clockrate;
|
||||||
|
static double total_cycles;
|
||||||
|
|
||||||
|
XMLNode* xml_data;
|
||||||
|
InputParameter interface_ip;
|
||||||
|
string name;
|
||||||
|
// Number of cycles per second (consider changing name)
|
||||||
|
double clockRate;
|
||||||
|
vector<McPATComponent*> children;
|
||||||
|
// The data structure that is printed in displayData
|
||||||
|
McPATOutput output_data;
|
||||||
|
// Set this to contain the stats to calculate peak dynamic power
|
||||||
|
statsDef tdp_stats;
|
||||||
|
// Set this to contain the stats to calculate runtime dynamic energy/power
|
||||||
|
statsDef rtp_stats;
|
||||||
|
// Holds the peak dynamic power calculation
|
||||||
|
powerDef power_t;
|
||||||
|
// Holds the runtime dynamic power calculation
|
||||||
|
powerDef rt_power;
|
||||||
|
|
||||||
|
McPATComponent();
|
||||||
|
// Which of these is a better way of doing things?!
|
||||||
|
McPATComponent(XMLNode* _xml_data);
|
||||||
|
McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||||
|
virtual void recursiveInstantiate();
|
||||||
|
virtual void computeArea();
|
||||||
|
// This function should probably be pure virtual, but it's too early in
|
||||||
|
// the modifying process to know for sure. Note that each component has
|
||||||
|
// to calculate it's own power consumption
|
||||||
|
virtual void computeEnergy();
|
||||||
|
virtual void displayData(uint32_t indent, int plevel);
|
||||||
|
~McPATComponent();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void errorUnspecifiedParam(string param);
|
||||||
|
void errorNonPositiveParam(string param);
|
||||||
|
void warnUnrecognizedComponent(XMLCSTR component);
|
||||||
|
void warnUnrecognizedParam(XMLCSTR param);
|
||||||
|
void warnUnrecognizedStat(XMLCSTR stat);
|
||||||
|
void warnIncompleteComponentType(XMLCSTR type);
|
||||||
|
void warnMissingComponentType(XMLCSTR id);
|
||||||
|
void warnMissingParamName(XMLCSTR id);
|
||||||
|
void warnMissingStatName(XMLCSTR id);
|
||||||
|
};
|
||||||
|
|
||||||
double longer_channel_device_reduction(
|
double longer_channel_device_reduction(
|
||||||
enum Device_ty device_ty=Core_device,
|
enum Device_ty device_ty = Core_device,
|
||||||
enum Core_type core_ty=Inorder);
|
enum Core_type core_ty = Inorder);
|
||||||
|
|
||||||
class CoreDynParam {
|
class CoreParameters {
|
||||||
public:
|
public:
|
||||||
CoreDynParam(){};
|
bool opt_local;
|
||||||
CoreDynParam(ParseXML *XML_interface, int ithCore_);
|
bool x86;
|
||||||
// :XML(XML_interface),
|
bool Embedded;
|
||||||
// ithCore(ithCore_)
|
enum Core_type core_ty;
|
||||||
// core_ty(inorder),
|
enum Renaming_type rm_ty;
|
||||||
// rm_ty(CAMbased),
|
|
||||||
// scheu_ty(PhysicalRegFile),
|
|
||||||
// clockRate(1e9),//1GHz
|
|
||||||
// arch_ireg_width(32),
|
|
||||||
// arch_freg_width(32),
|
|
||||||
// phy_ireg_width(128),
|
|
||||||
// phy_freg_width(128),
|
|
||||||
// perThreadState(8),
|
|
||||||
// globalCheckpoint(32),
|
|
||||||
// instructionLength(32){};
|
|
||||||
//ParseXML * XML;
|
|
||||||
bool opt_local;
|
|
||||||
bool x86;
|
|
||||||
bool Embedded;
|
|
||||||
enum Core_type core_ty;
|
|
||||||
enum Renaming_type rm_ty;
|
|
||||||
enum Scheduler_type scheu_ty;
|
enum Scheduler_type scheu_ty;
|
||||||
double clockRate,executionTime;
|
double clockRate;
|
||||||
int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width;
|
int arch_ireg_width;
|
||||||
int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries;
|
int arch_freg_width;
|
||||||
int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW;
|
int archi_Regs_IRF_size;
|
||||||
int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length;
|
int archi_Regs_FRF_size;
|
||||||
int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines;
|
int phy_ireg_width;
|
||||||
int num_alus, num_muls;
|
int phy_freg_width;
|
||||||
|
int num_IRF_entry;
|
||||||
|
int num_FRF_entry;
|
||||||
|
int num_ifreelist_entries;
|
||||||
|
int num_ffreelist_entries;
|
||||||
|
int fetchW;
|
||||||
|
int decodeW;
|
||||||
|
int issueW;
|
||||||
|
int peak_issueW;
|
||||||
|
int commitW;
|
||||||
|
int peak_commitW;
|
||||||
|
int predictionW;
|
||||||
|
int fp_issueW;
|
||||||
|
int fp_decodeW;
|
||||||
|
int perThreadState;
|
||||||
|
int globalCheckpoint;
|
||||||
|
int instruction_length;
|
||||||
|
int pc_width;
|
||||||
|
int opcode_width;
|
||||||
|
int micro_opcode_length;
|
||||||
|
int num_hthreads;
|
||||||
|
int pipeline_stages;
|
||||||
|
int fp_pipeline_stages;
|
||||||
|
int num_pipelines;
|
||||||
|
int num_fp_pipelines;
|
||||||
|
int num_alus;
|
||||||
|
int num_muls;
|
||||||
double num_fpus;
|
double num_fpus;
|
||||||
int int_data_width, fp_data_width,v_address_width, p_address_width;
|
int int_data_width;
|
||||||
double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles;
|
int fp_data_width;
|
||||||
bool regWindowing,multithreaded;
|
int v_address_width;
|
||||||
|
int p_address_width;
|
||||||
|
bool regWindowing;
|
||||||
|
bool multithreaded;
|
||||||
double pppm_lkg_multhread[4];
|
double pppm_lkg_multhread[4];
|
||||||
double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle,
|
int ROB_size;
|
||||||
MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle,
|
int ROB_assoc;
|
||||||
FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle,
|
int ROB_nbanks;
|
||||||
FPU_cdb_duty_cycle;
|
int ROB_tag_width;
|
||||||
~CoreDynParam(){};
|
int scheduler_assoc;
|
||||||
|
int scheduler_nbanks;
|
||||||
|
int register_window_size;
|
||||||
|
double register_window_throughput;
|
||||||
|
double register_window_latency;
|
||||||
|
int register_window_assoc;
|
||||||
|
int register_window_nbanks;
|
||||||
|
int register_window_tag_width;
|
||||||
|
int register_window_rw_ports;
|
||||||
|
int phy_Regs_IRF_size;
|
||||||
|
int phy_Regs_IRF_assoc;
|
||||||
|
int phy_Regs_IRF_nbanks;
|
||||||
|
int phy_Regs_IRF_tag_width;
|
||||||
|
int phy_Regs_IRF_rd_ports;
|
||||||
|
int phy_Regs_IRF_wr_ports;
|
||||||
|
int phy_Regs_FRF_size;
|
||||||
|
int phy_Regs_FRF_assoc;
|
||||||
|
int phy_Regs_FRF_nbanks;
|
||||||
|
int phy_Regs_FRF_tag_width;
|
||||||
|
int phy_Regs_FRF_rd_ports;
|
||||||
|
int phy_Regs_FRF_wr_ports;
|
||||||
|
int front_rat_nbanks;
|
||||||
|
int front_rat_rw_ports;
|
||||||
|
int retire_rat_nbanks;
|
||||||
|
int retire_rat_rw_ports;
|
||||||
|
int freelist_nbanks;
|
||||||
|
int freelist_rw_ports;
|
||||||
|
int memory_ports;
|
||||||
|
int load_buffer_size;
|
||||||
|
int load_buffer_assoc;
|
||||||
|
int load_buffer_nbanks;
|
||||||
|
int store_buffer_size;
|
||||||
|
int store_buffer_assoc;
|
||||||
|
int store_buffer_nbanks;
|
||||||
|
int instruction_window_size;
|
||||||
|
int fp_instruction_window_size;
|
||||||
|
int instruction_buffer_size;
|
||||||
|
int instruction_buffer_assoc;
|
||||||
|
int instruction_buffer_nbanks;
|
||||||
|
int instruction_buffer_tag_width;
|
||||||
|
int number_instruction_fetch_ports;
|
||||||
|
int RAS_size;
|
||||||
|
int execu_int_bypass_ports;
|
||||||
|
int execu_mul_bypass_ports;
|
||||||
|
int execu_fp_bypass_ports;
|
||||||
|
Wire_type execu_bypass_wire_type;
|
||||||
|
Wire_type execu_broadcast_wt;
|
||||||
|
int execu_wire_mat_type;
|
||||||
|
double execu_bypass_base_width;
|
||||||
|
double execu_bypass_base_height;
|
||||||
|
int execu_bypass_start_wiring_level;
|
||||||
|
double execu_bypass_route_over_perc;
|
||||||
|
double broadcast_numerator;
|
||||||
};
|
};
|
||||||
|
|
||||||
class CacheDynParam {
|
class CoreStatistics {
|
||||||
public:
|
public:
|
||||||
CacheDynParam(){};
|
double pipeline_duty_cycle;
|
||||||
CacheDynParam(ParseXML *XML_interface, int ithCache_);
|
double total_cycles;
|
||||||
string name;
|
double busy_cycles;
|
||||||
enum Dir_type dir_ty;
|
double idle_cycles;
|
||||||
double clockRate,executionTime;
|
double IFU_duty_cycle;
|
||||||
double capacity, blockW, assoc, nbanks;
|
double BR_duty_cycle;
|
||||||
double throughput, latency;
|
double LSU_duty_cycle;
|
||||||
double duty_cycle, dir_duty_cycle;
|
double MemManU_I_duty_cycle;
|
||||||
//double duty_cycle;
|
double MemManU_D_duty_cycle;
|
||||||
int missb_size, fu_size, prefetchb_size, wbb_size;
|
double ALU_duty_cycle;
|
||||||
~CacheDynParam(){};
|
double MUL_duty_cycle;
|
||||||
|
double FPU_duty_cycle;
|
||||||
|
double ALU_cdb_duty_cycle;
|
||||||
|
double MUL_cdb_duty_cycle;
|
||||||
|
double FPU_cdb_duty_cycle;
|
||||||
|
double ROB_reads;
|
||||||
|
double ROB_writes;
|
||||||
|
double total_instructions;
|
||||||
|
double int_instructions;
|
||||||
|
double fp_instructions;
|
||||||
|
double branch_instructions;
|
||||||
|
double branch_mispredictions;
|
||||||
|
double load_instructions;
|
||||||
|
double store_instructions;
|
||||||
|
double committed_instructions;
|
||||||
|
double committed_int_instructions;
|
||||||
|
double committed_fp_instructions;
|
||||||
|
double rename_reads;
|
||||||
|
double rename_writes;
|
||||||
|
double fp_rename_reads;
|
||||||
|
double fp_rename_writes;
|
||||||
|
double inst_window_reads;
|
||||||
|
double inst_window_writes;
|
||||||
|
double inst_window_wakeup_accesses;
|
||||||
|
double fp_inst_window_reads;
|
||||||
|
double fp_inst_window_writes;
|
||||||
|
double fp_inst_window_wakeup_accesses;
|
||||||
|
double int_regfile_reads;
|
||||||
|
double float_regfile_reads;
|
||||||
|
double int_regfile_writes;
|
||||||
|
double float_regfile_writes;
|
||||||
|
double context_switches;
|
||||||
|
double ialu_accesses;
|
||||||
|
double fpu_accesses;
|
||||||
|
double mul_accesses;
|
||||||
|
double cdb_alu_accesses;
|
||||||
|
double cdb_fpu_accesses;
|
||||||
|
double cdb_mul_accesses;
|
||||||
|
double function_calls;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MCParam {
|
class MCParameters {
|
||||||
public:
|
public:
|
||||||
MCParam(){};
|
double clockRate;
|
||||||
MCParam(ParseXML *XML_interface, int ithCache_);
|
enum MemoryCtrl_type mc_type;
|
||||||
string name;
|
double num_mcs;
|
||||||
double clockRate,num_mcs, peakDataTransferRate, num_channels;
|
int num_channels;
|
||||||
// double mcTEPowerperGhz;
|
int llcBlockSize;
|
||||||
// double mcPHYperGbit;
|
int dataBusWidth;
|
||||||
// double area;
|
int databus_width;
|
||||||
int llcBlockSize, dataBusWidth, addressBusWidth;
|
int llc_line_length;
|
||||||
int opcodeW;
|
int req_window_size_per_channel;
|
||||||
int memAccesses;
|
int IO_buffer_size_per_channel;
|
||||||
int memRank;
|
int addressbus_width;
|
||||||
int type;
|
int opcodeW;
|
||||||
double frontend_duty_cycle, duty_cycle, perc_load;
|
int type;
|
||||||
double executionTime, reads, writes;
|
bool LVDS;
|
||||||
bool LVDS, withPHY;
|
bool withPHY;
|
||||||
|
int peak_transfer_rate;
|
||||||
~MCParam(){};
|
int number_ranks;
|
||||||
|
int reorder_buffer_assoc;
|
||||||
|
int reorder_buffer_nbanks;
|
||||||
|
int read_buffer_assoc;
|
||||||
|
int read_buffer_nbanks;
|
||||||
|
int read_buffer_tag_width;
|
||||||
|
int write_buffer_assoc;
|
||||||
|
int write_buffer_nbanks;
|
||||||
|
int write_buffer_tag_width;
|
||||||
};
|
};
|
||||||
|
|
||||||
class NoCParam {
|
class MCStatistics {
|
||||||
public:
|
public:
|
||||||
NoCParam(){};
|
double duty_cycle;
|
||||||
NoCParam(ParseXML *XML_interface, int ithCache_);
|
double perc_load;
|
||||||
string name;
|
double reads;
|
||||||
double clockRate;
|
double writes;
|
||||||
int flit_size;
|
|
||||||
int input_ports, output_ports, min_ports, global_linked_ports;
|
|
||||||
int virtual_channel_per_port,input_buffer_entries_per_vc;
|
|
||||||
int horizontal_nodes,vertical_nodes, total_nodes;
|
|
||||||
double executionTime, total_access, link_throughput,link_latency,
|
|
||||||
duty_cycle, chip_coverage, route_over_perc;
|
|
||||||
bool has_global_link, type;
|
|
||||||
|
|
||||||
~NoCParam(){};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class ProcParam {
|
class NIUParameters {
|
||||||
public:
|
public:
|
||||||
ProcParam(){};
|
double clockRate;
|
||||||
ProcParam(ParseXML *XML_interface, int ithCache_);
|
int num_units;
|
||||||
string name;
|
int type;
|
||||||
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel;
|
|
||||||
bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir;
|
|
||||||
|
|
||||||
~ProcParam(){};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class NIUParam {
|
class NIUStatistics {
|
||||||
public:
|
public:
|
||||||
NIUParam(){};
|
double duty_cycle;
|
||||||
NIUParam(ParseXML *XML_interface, int ithCache_);
|
double perc_load;
|
||||||
string name;
|
|
||||||
double clockRate;
|
|
||||||
int num_units;
|
|
||||||
int type;
|
|
||||||
double duty_cycle, perc_load;
|
|
||||||
~NIUParam(){};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class PCIeParam {
|
class PCIeParameters {
|
||||||
public:
|
public:
|
||||||
PCIeParam(){};
|
double clockRate;
|
||||||
PCIeParam(ParseXML *XML_interface, int ithCache_);
|
int num_channels;
|
||||||
string name;
|
int num_units;
|
||||||
double clockRate;
|
bool withPHY;
|
||||||
int num_channels, num_units;
|
int type;
|
||||||
bool withPHY;
|
};
|
||||||
int type;
|
|
||||||
double duty_cycle, perc_load;
|
class PCIeStatistics {
|
||||||
~PCIeParam(){};
|
public:
|
||||||
|
double duty_cycle;
|
||||||
|
double perc_load;
|
||||||
};
|
};
|
||||||
#endif /* BASIC_COMPONENTS_H_ */
|
#endif /* BASIC_COMPONENTS_H_ */
|
||||||
|
|
179
ext/mcpat/bus_interconnect.cc
Normal file
179
ext/mcpat/bus_interconnect.cc
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Author: Joel Hestness
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "basic_circuit.h"
|
||||||
|
#include "bus_interconnect.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "const.h"
|
||||||
|
#include "io.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
|
BusInterconnect::BusInterconnect(XMLNode* _xml_data,
|
||||||
|
InputParameter* interface_ip_)
|
||||||
|
: McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) {
|
||||||
|
name = "Bus Interconnect";
|
||||||
|
set_param_stats();
|
||||||
|
local_result = init_interface(&interface_ip, name);
|
||||||
|
scktRatio = g_tp.sckt_co_eff;
|
||||||
|
|
||||||
|
interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate;
|
||||||
|
interface_ip.latency = bus_params.link_latency / bus_params.clockRate;
|
||||||
|
|
||||||
|
link_len /= bus_params.total_nodes;
|
||||||
|
if (bus_params.total_nodes > 1) {
|
||||||
|
//All links are shared by neighbors
|
||||||
|
link_len /= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
|
||||||
|
bus_params.link_base_width,
|
||||||
|
bus_params.link_base_height,
|
||||||
|
bus_params.flit_size, link_len, &interface_ip,
|
||||||
|
bus_params.link_start_wiring_level,
|
||||||
|
bus_params.clockRate,
|
||||||
|
bus_params.pipelinable,
|
||||||
|
bus_params.route_over_perc);
|
||||||
|
children.push_back(link_bus);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BusInterconnect::computeEnergy() {
|
||||||
|
// Initialize stats for TDP
|
||||||
|
tdp_stats.reset();
|
||||||
|
tdp_stats.readAc.access = bus_stats.duty_cycle;
|
||||||
|
link_bus->int_params.active_ports = bus_params.min_ports - 1;
|
||||||
|
link_bus->int_stats.duty_cycle =
|
||||||
|
bus_params.M_traffic_pattern * bus_stats.duty_cycle;
|
||||||
|
|
||||||
|
// Initialize stats for runtime energy and power
|
||||||
|
rtp_stats.reset();
|
||||||
|
rtp_stats.readAc.access = bus_stats.total_access;
|
||||||
|
link_bus->int_stats.accesses = bus_stats.total_access;
|
||||||
|
|
||||||
|
// Recursively compute energy
|
||||||
|
McPATComponent::computeEnergy();
|
||||||
|
}
|
||||||
|
|
||||||
|
void BusInterconnect::set_param_stats() {
|
||||||
|
memset(&bus_params, 0, sizeof(BusInterconnectParameters));
|
||||||
|
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
int mat_type;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("clockrate", bus_params.clockRate);
|
||||||
|
ASSIGN_INT_IF("flit_bits", bus_params.flit_size);
|
||||||
|
ASSIGN_FP_IF("link_throughput", bus_params.link_throughput);
|
||||||
|
ASSIGN_FP_IF("link_latency", bus_params.link_latency);
|
||||||
|
ASSIGN_INT_IF("total_nodes", bus_params.total_nodes);
|
||||||
|
ASSIGN_INT_IF("input_ports", bus_params.input_ports);
|
||||||
|
ASSIGN_INT_IF("output_ports", bus_params.output_ports);
|
||||||
|
ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports);
|
||||||
|
ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage);
|
||||||
|
ASSIGN_INT_IF("pipelinable", bus_params.pipelinable);
|
||||||
|
ASSIGN_FP_IF("link_routing_over_percentage",
|
||||||
|
bus_params.route_over_perc);
|
||||||
|
ASSIGN_INT_IF("virtual_channel_per_port",
|
||||||
|
bus_params.virtual_channel_per_port);
|
||||||
|
ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern);
|
||||||
|
ASSIGN_FP_IF("link_len", link_len);
|
||||||
|
ASSIGN_FP_IF("link_base_width", bus_params.link_base_width);
|
||||||
|
ASSIGN_FP_IF("link_base_height", bus_params.link_base_height);
|
||||||
|
ASSIGN_FP_IF("link_start_wiring_level",
|
||||||
|
bus_params.link_start_wiring_level);
|
||||||
|
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||||
|
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change from MHz to Hz
|
||||||
|
bus_params.clockRate *= 1e6;
|
||||||
|
|
||||||
|
interface_ip.wire_is_mat_type = mat_type;
|
||||||
|
interface_ip.wire_os_mat_type = mat_type;
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("total_accesses", bus_stats.total_access);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clockRate = bus_params.clockRate;
|
||||||
|
bus_params.min_ports =
|
||||||
|
min(bus_params.input_ports, bus_params.output_ports);
|
||||||
|
|
||||||
|
assert(bus_params.chip_coverage <= 1);
|
||||||
|
assert(bus_params.route_over_perc <= 1);
|
||||||
|
assert(link_len > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BusInterconnect::set_duty_cycle(double duty_cycle) {
|
||||||
|
bus_stats.duty_cycle = duty_cycle;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
BusInterconnect::set_number_of_accesses(double total_accesses) {
|
||||||
|
bus_stats.total_access = total_accesses;
|
||||||
|
}
|
||||||
|
|
||||||
|
BusInterconnect::~BusInterconnect() {
|
||||||
|
delete link_bus;
|
||||||
|
link_bus = NULL;
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,65 +25,71 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Author: Joel Hestness
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
#ifndef SHAREDCACHE_H_
|
#ifndef BUS_INTERCONNECT_H_
|
||||||
#define SHAREDCACHE_H_
|
#define BUS_INTERCONNECT_H_
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "area.h"
|
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "interconnect.h"
|
||||||
#include "logic.h"
|
#include "logic.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
|
||||||
class SharedCache :public Component{
|
class BusInterconnectParameters {
|
||||||
public:
|
public:
|
||||||
ParseXML * XML;
|
double clockRate;
|
||||||
int ithCache;
|
int flit_size;
|
||||||
InputParameter interface_ip;
|
int input_ports;
|
||||||
enum cache_level cacheL;
|
int output_ports;
|
||||||
DataCache unicache;//Shared cache
|
int min_ports;
|
||||||
CacheDynParam cachep;
|
int global_linked_ports;
|
||||||
statsDef homenode_tdp_stats;
|
int virtual_channel_per_port;
|
||||||
statsDef homenode_rtp_stats;
|
int input_buffer_entries_per_vc;
|
||||||
statsDef homenode_stats_t;
|
int total_nodes;
|
||||||
double dir_overhead;
|
double link_throughput;
|
||||||
// cache_processor llCache,directory, directory1, inv_dir;
|
double link_latency;
|
||||||
|
double chip_coverage;
|
||||||
//pipeline pipeLogicCache, pipeLogicDirectory;
|
bool pipelinable;
|
||||||
//clock_network clockNetwork;
|
double route_over_perc;
|
||||||
double scktRatio, executionTime;
|
bool has_global_link;
|
||||||
// Component L2Tot, cc, cc1, ccTot;
|
bool type;
|
||||||
|
double M_traffic_pattern;
|
||||||
SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2);
|
double link_base_width;
|
||||||
void set_cache_param();
|
double link_base_height;
|
||||||
void computeEnergy(bool is_tdp=true);
|
int link_start_wiring_level;
|
||||||
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
|
|
||||||
~SharedCache(){};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class CCdir :public Component{
|
class BusInterconnectStatistics {
|
||||||
public:
|
public:
|
||||||
ParseXML * XML;
|
double duty_cycle;
|
||||||
int ithCache;
|
double total_access;
|
||||||
InputParameter interface_ip;
|
|
||||||
DataCache dc;//Shared cache
|
|
||||||
ArrayST * shadow_dir;
|
|
||||||
// cache_processor llCache,directory, directory1, inv_dir;
|
|
||||||
|
|
||||||
//pipeline pipeLogicCache, pipeLogicDirectory;
|
|
||||||
//clock_network clockNetwork;
|
|
||||||
double scktRatio, clockRate, executionTime;
|
|
||||||
Component L2Tot, cc, cc1, ccTot;
|
|
||||||
|
|
||||||
CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,bool is_tdp=true);
|
|
||||||
~CCdir();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* SHAREDCACHE_H_ */
|
class BusInterconnect : public McPATComponent {
|
||||||
|
public:
|
||||||
|
Interconnect* link_bus;
|
||||||
|
|
||||||
|
int ithNoC;
|
||||||
|
InputParameter interface_ip;
|
||||||
|
double link_len;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
BusInterconnectParameters bus_params;
|
||||||
|
BusInterconnectStatistics bus_stats;
|
||||||
|
uca_org_t local_result;
|
||||||
|
statsDef stats_t;
|
||||||
|
double M_traffic_pattern;
|
||||||
|
|
||||||
|
BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||||
|
void set_param_stats();
|
||||||
|
void set_duty_cycle(double duty_cycle);
|
||||||
|
void set_number_of_accesses(double total_accesses);
|
||||||
|
void computeEnergy();
|
||||||
|
~BusInterconnect();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* BUS_INTERCONNECT_H_ */
|
321
ext/mcpat/cachearray.cc
Normal file
321
ext/mcpat/cachearray.cc
Normal file
|
@ -0,0 +1,321 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "area.h"
|
||||||
|
#include "cachearray.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "decoder.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
double CacheArray::area_efficiency_threshold = 20.0;
|
||||||
|
int CacheArray::ed = 0;
|
||||||
|
//Fixed number, make sure timing can be satisfied.
|
||||||
|
int CacheArray::delay_wt = 100;
|
||||||
|
int CacheArray::cycle_time_wt = 1000;
|
||||||
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
|
int CacheArray::area_wt = 10;
|
||||||
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
|
int CacheArray::dynamic_power_wt = 10;
|
||||||
|
int CacheArray::leakage_power_wt = 10;
|
||||||
|
//Fixed number, make sure timing can be satisfied.
|
||||||
|
int CacheArray::delay_dev = 1000000;
|
||||||
|
int CacheArray::cycle_time_dev = 100;
|
||||||
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
|
int CacheArray::area_dev = 1000000;
|
||||||
|
//Fixed number, This is used to exhaustive search for individual components.
|
||||||
|
int CacheArray::dynamic_power_dev = 1000000;
|
||||||
|
int CacheArray::leakage_power_dev = 1000000;
|
||||||
|
int CacheArray::cycle_time_dev_threshold = 10;
|
||||||
|
|
||||||
|
CacheArray::CacheArray(XMLNode* _xml_data,
|
||||||
|
const InputParameter *configure_interface, string _name,
|
||||||
|
enum Device_ty device_ty_, double _clockRate,
|
||||||
|
bool opt_local_, enum Core_type core_ty_, bool _is_default)
|
||||||
|
: McPATComponent(_xml_data), l_ip(*configure_interface),
|
||||||
|
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
|
||||||
|
is_default(_is_default), sbt_dir_overhead(0) {
|
||||||
|
name = _name;
|
||||||
|
clockRate = _clockRate;
|
||||||
|
if (l_ip.cache_sz < MIN_BUFFER_SIZE) {
|
||||||
|
l_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!l_ip.error_checking(name)) {
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
sbt_tdp_stats.reset();
|
||||||
|
sbt_rtp_stats.reset();
|
||||||
|
|
||||||
|
// Compute initial search point
|
||||||
|
local_result.valid = false;
|
||||||
|
compute_base_power();
|
||||||
|
|
||||||
|
// Set up the cache by searching design space with cacti
|
||||||
|
list<uca_org_t > candidate_solutions(0);
|
||||||
|
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
||||||
|
uca_org_t* temp_res = NULL;
|
||||||
|
double throughput = l_ip.throughput;
|
||||||
|
double latency = l_ip.latency;
|
||||||
|
bool throughput_overflow = true;
|
||||||
|
bool latency_overflow = true;
|
||||||
|
|
||||||
|
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
||||||
|
throughput_overflow = false;
|
||||||
|
if ((local_result.access_time - latency) <= 1e-10)
|
||||||
|
latency_overflow = false;
|
||||||
|
|
||||||
|
if (opt_for_clk && opt_local) {
|
||||||
|
if (throughput_overflow || latency_overflow) {
|
||||||
|
l_ip.ed = ed;
|
||||||
|
|
||||||
|
l_ip.delay_wt = delay_wt;
|
||||||
|
l_ip.cycle_time_wt = cycle_time_wt;
|
||||||
|
|
||||||
|
l_ip.area_wt = area_wt;
|
||||||
|
l_ip.dynamic_power_wt = dynamic_power_wt;
|
||||||
|
l_ip.leakage_power_wt = leakage_power_wt;
|
||||||
|
|
||||||
|
l_ip.delay_dev = delay_dev;
|
||||||
|
l_ip.cycle_time_dev = cycle_time_dev;
|
||||||
|
|
||||||
|
l_ip.area_dev = area_dev;
|
||||||
|
l_ip.dynamic_power_dev = dynamic_power_dev;
|
||||||
|
l_ip.leakage_power_dev = leakage_power_dev;
|
||||||
|
|
||||||
|
//Reset overflow flag before start optimization iterations
|
||||||
|
throughput_overflow = true;
|
||||||
|
latency_overflow = true;
|
||||||
|
|
||||||
|
//Clean up the result for optimized for ED^2P
|
||||||
|
temp_res = &local_result;
|
||||||
|
temp_res->cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
while ((throughput_overflow || latency_overflow) &&
|
||||||
|
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||||
|
compute_base_power();
|
||||||
|
|
||||||
|
//This is the time_dev to be used for next iteration
|
||||||
|
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
|
||||||
|
|
||||||
|
// from best area to worst area -->worst timing to best timing
|
||||||
|
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
|
||||||
|
(local_result.access_time - latency) <= 1e-10) ||
|
||||||
|
(local_result.data_array2->area_efficiency <
|
||||||
|
area_efficiency_threshold && l_ip.assoc == 0)) {
|
||||||
|
//if no satisfiable solution is found,the most aggressive one
|
||||||
|
//is left
|
||||||
|
candidate_solutions.push_back(local_result);
|
||||||
|
if (((local_result.cycle_time - throughput) <= 1e-10) &&
|
||||||
|
((local_result.access_time - latency) <= 1e-10)) {
|
||||||
|
//ensure stop opt not because of cam
|
||||||
|
throughput_overflow = false;
|
||||||
|
latency_overflow = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if ((local_result.cycle_time - throughput) <= 1e-10)
|
||||||
|
throughput_overflow = false;
|
||||||
|
if ((local_result.access_time - latency) <= 1e-10)
|
||||||
|
latency_overflow = false;
|
||||||
|
|
||||||
|
//if not >10 local_result is the last result, it cannot be
|
||||||
|
//cleaned up
|
||||||
|
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
||||||
|
//Only solutions not saved in the list need to be
|
||||||
|
//cleaned up
|
||||||
|
temp_res = &local_result;
|
||||||
|
temp_res->cleanup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (l_ip.assoc > 0) {
|
||||||
|
//For array structures except CAM and FA, Give warning but still
|
||||||
|
//provide a result with best timing found
|
||||||
|
if (throughput_overflow == true)
|
||||||
|
cout << "Warning: " << name
|
||||||
|
<< " array structure cannot satisfy throughput constraint."
|
||||||
|
<< endl;
|
||||||
|
if (latency_overflow == true)
|
||||||
|
cout << "Warning: " << name
|
||||||
|
<< " array structure cannot satisfy latency constraint."
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
double min_dynamic_energy = BIGNUM;
|
||||||
|
if (candidate_solutions.empty() == false) {
|
||||||
|
local_result.valid = true;
|
||||||
|
for (candidate_iter = candidate_solutions.begin();
|
||||||
|
candidate_iter != candidate_solutions.end();
|
||||||
|
++candidate_iter) {
|
||||||
|
if (min_dynamic_energy >
|
||||||
|
(candidate_iter)->power.readOp.dynamic) {
|
||||||
|
min_dynamic_energy =
|
||||||
|
(candidate_iter)->power.readOp.dynamic;
|
||||||
|
min_dynamic_energy_iter = candidate_iter;
|
||||||
|
local_result = *(min_dynamic_energy_iter);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
candidate_iter->cleanup() ;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
candidate_solutions.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(device_ty, core_ty);
|
||||||
|
|
||||||
|
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
||||||
|
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
||||||
|
double total_overhead = macro_layout_overhead * chip_PR_overhead;
|
||||||
|
local_result.area *= total_overhead;
|
||||||
|
|
||||||
|
//maintain constant power density
|
||||||
|
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
|
||||||
|
|
||||||
|
double sckRation = g_tp.sckt_co_eff;
|
||||||
|
local_result.power.readOp.dynamic *= sckRation;
|
||||||
|
local_result.power.writeOp.dynamic *= sckRation;
|
||||||
|
local_result.power.searchOp.dynamic *= sckRation;
|
||||||
|
local_result.power.readOp.leakage *= l_ip.nbanks;
|
||||||
|
local_result.power.readOp.longer_channel_leakage =
|
||||||
|
local_result.power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
local_result.power = local_result.power * pppm_t;
|
||||||
|
|
||||||
|
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
||||||
|
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
||||||
|
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
||||||
|
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||||
|
local_result.data_array2->power.readOp.longer_channel_leakage =
|
||||||
|
local_result.data_array2->power.readOp.leakage *
|
||||||
|
long_channel_device_reduction;
|
||||||
|
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
|
||||||
|
|
||||||
|
|
||||||
|
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
|
||||||
|
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
||||||
|
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
||||||
|
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
||||||
|
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
||||||
|
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
||||||
|
local_result.tag_array2->power.readOp.leakage *
|
||||||
|
long_channel_device_reduction;
|
||||||
|
local_result.tag_array2->power =
|
||||||
|
local_result.tag_array2->power * pppm_t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheArray::compute_base_power() {
|
||||||
|
local_result = cacti_interface(&l_ip);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheArray::computeArea() {
|
||||||
|
area.set_area(local_result.area);
|
||||||
|
output_data.area = local_result.area / 1e6;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheArray::computeEnergy() {
|
||||||
|
// Set the leakage power numbers
|
||||||
|
output_data.subthreshold_leakage_power = local_result.power.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = local_result.power.readOp.gate_leakage;
|
||||||
|
|
||||||
|
if (l_ip.assoc && l_ip.is_cache) {
|
||||||
|
// This is a standard cache array with data and tags
|
||||||
|
// Calculate peak dynamic power
|
||||||
|
output_data.peak_dynamic_power =
|
||||||
|
(local_result.tag_array2->power.readOp.dynamic +
|
||||||
|
local_result.data_array2->power.readOp.dynamic) *
|
||||||
|
tdp_stats.readAc.hit +
|
||||||
|
(local_result.tag_array2->power.readOp.dynamic) *
|
||||||
|
tdp_stats.readAc.miss +
|
||||||
|
(local_result.tag_array2->power.readOp.dynamic +
|
||||||
|
local_result.data_array2->power.writeOp.dynamic) *
|
||||||
|
tdp_stats.writeAc.hit +
|
||||||
|
(local_result.tag_array2->power.readOp.dynamic) *
|
||||||
|
tdp_stats.writeAc.miss;
|
||||||
|
output_data.peak_dynamic_power *= clockRate;
|
||||||
|
|
||||||
|
// Calculate the runtime dynamic power
|
||||||
|
output_data.runtime_dynamic_energy =
|
||||||
|
local_result.data_array2->power.readOp.dynamic *
|
||||||
|
rtp_stats.dataReadAc.access +
|
||||||
|
local_result.data_array2->power.writeOp.dynamic *
|
||||||
|
rtp_stats.dataWriteAc.access +
|
||||||
|
(local_result.tag_array2->power.readOp.dynamic *
|
||||||
|
rtp_stats.tagReadAc.access +
|
||||||
|
local_result.tag_array2->power.writeOp.dynamic *
|
||||||
|
rtp_stats.tagWriteAc.access) * l_ip.assoc;
|
||||||
|
} else {
|
||||||
|
// Calculate peak dynamic power
|
||||||
|
output_data.peak_dynamic_power =
|
||||||
|
local_result.power.readOp.dynamic * tdp_stats.readAc.access +
|
||||||
|
local_result.power.writeOp.dynamic * tdp_stats.writeAc.access +
|
||||||
|
local_result.power.searchOp.dynamic * tdp_stats.searchAc.access;
|
||||||
|
output_data.peak_dynamic_power *= clockRate;
|
||||||
|
|
||||||
|
// Calculate the runtime dynamic power
|
||||||
|
output_data.runtime_dynamic_energy =
|
||||||
|
local_result.power.readOp.dynamic * rtp_stats.readAc.access +
|
||||||
|
local_result.power.writeOp.dynamic * rtp_stats.writeAc.access +
|
||||||
|
local_result.power.searchOp.dynamic * rtp_stats.searchAc.access;
|
||||||
|
}
|
||||||
|
|
||||||
|
// An SBT directory has more dynamic power
|
||||||
|
if (sbt_dir_overhead > 0) {
|
||||||
|
// Calculate peak dynamic power
|
||||||
|
output_data.peak_dynamic_power +=
|
||||||
|
(computeSBTDynEnergy(&sbt_tdp_stats) * clockRate);
|
||||||
|
|
||||||
|
// Calculate the runtime dynamic power
|
||||||
|
output_data.runtime_dynamic_energy +=
|
||||||
|
computeSBTDynEnergy(&sbt_rtp_stats);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CacheArray::~CacheArray() {
|
||||||
|
local_result.cleanup();
|
||||||
|
}
|
117
ext/mcpat/cachearray.h
Normal file
117
ext/mcpat/cachearray.h
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CACHEARRAY_H_
|
||||||
|
#define CACHEARRAY_H_
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "basic_components.h"
|
||||||
|
#include "cacti_interface.h"
|
||||||
|
#include "component.h"
|
||||||
|
#include "const.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
|
class CacheArray : public McPATComponent {
|
||||||
|
public:
|
||||||
|
static double area_efficiency_threshold;
|
||||||
|
|
||||||
|
// These are used for the CACTI interface.
|
||||||
|
static int ed;
|
||||||
|
static int delay_wt;
|
||||||
|
static int cycle_time_wt;
|
||||||
|
static int area_wt;
|
||||||
|
static int dynamic_power_wt;
|
||||||
|
static int leakage_power_wt;
|
||||||
|
static int delay_dev;
|
||||||
|
static int cycle_time_dev;
|
||||||
|
static int area_dev;
|
||||||
|
static int dynamic_power_dev;
|
||||||
|
static int leakage_power_dev;
|
||||||
|
static int cycle_time_dev_threshold;
|
||||||
|
|
||||||
|
InputParameter l_ip;
|
||||||
|
enum Device_ty device_ty;
|
||||||
|
bool opt_local;
|
||||||
|
enum Core_type core_ty;
|
||||||
|
bool is_default;
|
||||||
|
uca_org_t local_result;
|
||||||
|
|
||||||
|
// These are only used for static bank tag (SBT) directory type.
|
||||||
|
double sbt_dir_overhead;
|
||||||
|
// Set this to contain SBT peak power stats
|
||||||
|
statsDef sbt_tdp_stats;
|
||||||
|
// Set this to contain SBT runtime power stats
|
||||||
|
statsDef sbt_rtp_stats;
|
||||||
|
|
||||||
|
CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||||
|
string _name, enum Device_ty device_ty_, double _clockRate = 0.0f,
|
||||||
|
bool opt_local_ = true,
|
||||||
|
enum Core_type core_ty_ = Inorder, bool _is_default = true);
|
||||||
|
void computeArea();
|
||||||
|
void computeEnergy();
|
||||||
|
void compute_base_power();
|
||||||
|
void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; }
|
||||||
|
~CacheArray();
|
||||||
|
|
||||||
|
private:
|
||||||
|
double computeSBTDynEnergy(statsDef *sbt_stats_ptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
extern inline
|
||||||
|
double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) {
|
||||||
|
if (sbt_dir_overhead == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write miss on dynamic home node will generate a replacement write on
|
||||||
|
// whole cache block
|
||||||
|
double dynamic =
|
||||||
|
sbt_stats_p->readAc.hit *
|
||||||
|
(local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead +
|
||||||
|
local_result.tag_array2->power.readOp.dynamic) +
|
||||||
|
sbt_stats_p->readAc.miss *
|
||||||
|
local_result.tag_array2->power.readOp.dynamic +
|
||||||
|
sbt_stats_p->writeAc.miss *
|
||||||
|
local_result.tag_array2->power.readOp.dynamic +
|
||||||
|
sbt_stats_p->writeAc.hit *
|
||||||
|
(local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead +
|
||||||
|
local_result.tag_array2->power.readOp.dynamic+
|
||||||
|
sbt_stats_p->writeAc.miss *
|
||||||
|
local_result.power.writeOp.dynamic);
|
||||||
|
return dynamic;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CACHEARRAY_H_ */
|
42
ext/mcpat/cachecontroller.cc
Normal file
42
ext/mcpat/cachecontroller.cc
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Author: Joel Hestness
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include "cachecontroller.h"
|
||||||
|
|
||||||
|
CacheController::CacheController(XMLNode* _xml_data,
|
||||||
|
InputParameter* _interface_ip)
|
||||||
|
: McPATComponent(_xml_data, _interface_ip) {
|
||||||
|
name = "Cache Controller";
|
||||||
|
clockRate = target_core_clockrate;
|
||||||
|
McPATComponent::recursiveInstantiate();
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,24 +25,21 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Author: Joel Hestness
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CACHECONTROLLER_H_
|
||||||
|
#define CACHECONTROLLER_H_
|
||||||
|
|
||||||
#ifndef GLOBALVAR_H_
|
#include "basic_components.h"
|
||||||
#define GLOBALVAR_H_
|
|
||||||
|
|
||||||
#ifdef GLOBALVAR
|
|
||||||
#define EXTERN
|
|
||||||
#else
|
|
||||||
#define EXTERN extern
|
|
||||||
#endif
|
|
||||||
|
|
||||||
EXTERN bool opt_for_clk;
|
|
||||||
|
|
||||||
#endif /* GLOBALVAR_H_ */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CacheController : public McPATComponent {
|
||||||
|
public:
|
||||||
|
CacheController(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||||
|
~CacheController();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CACHECONTROLLER_H_ */
|
647
ext/mcpat/cacheunit.cc
Normal file
647
ext/mcpat/cacheunit.cc
Normal file
|
@ -0,0 +1,647 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "arbiter.h"
|
||||||
|
#include "array.h"
|
||||||
|
#include "basic_circuit.h"
|
||||||
|
#include "cachearray.h"
|
||||||
|
#include "cacheunit.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "const.h"
|
||||||
|
#include "io.h"
|
||||||
|
#include "logic.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
|
bool CacheUnit::is_cache = true;
|
||||||
|
bool CacheUnit::pure_cam = false;
|
||||||
|
bool CacheUnit::opt_local = true;
|
||||||
|
bool CacheUnit::force_cache_config = false;
|
||||||
|
|
||||||
|
CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip)
|
||||||
|
: dir_overhead(0), McPATComponent(_xml_data, _interface_ip) {
|
||||||
|
|
||||||
|
int tag;
|
||||||
|
int data;
|
||||||
|
|
||||||
|
name = "Cache Unit";
|
||||||
|
CacheArray* arrayPtr = NULL;
|
||||||
|
|
||||||
|
set_cache_param_from_xml_data();
|
||||||
|
|
||||||
|
//All lower level cache are physically indexed and tagged.
|
||||||
|
double size;
|
||||||
|
double line;
|
||||||
|
double assoc;
|
||||||
|
double banks;
|
||||||
|
size = cache_params.capacity;
|
||||||
|
line = cache_params.blockW;
|
||||||
|
assoc = cache_params.assoc;
|
||||||
|
banks = cache_params.nbanks;
|
||||||
|
if ((cache_params.dir_ty == ST &&
|
||||||
|
cache_params.cache_level == L1Directory) ||
|
||||||
|
(cache_params.dir_ty == ST &&
|
||||||
|
cache_params.cache_level == L2Directory)) {
|
||||||
|
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||||
|
} else {
|
||||||
|
tag = physical_address_width - int(ceil(log2(size / line / assoc))) -
|
||||||
|
int(ceil(log2(line))) + EXTRA_TAG_BITS;
|
||||||
|
|
||||||
|
if (cache_params.dir_ty == SBT) {
|
||||||
|
dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) *
|
||||||
|
BITS_PER_BYTE / (line * BITS_PER_BYTE);
|
||||||
|
line *= (1 + dir_overhead);
|
||||||
|
size *= (1 + dir_overhead);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.cache_sz = (int)size;
|
||||||
|
interface_ip.line_sz = (int)line;
|
||||||
|
interface_ip.assoc = (int)assoc;
|
||||||
|
interface_ip.nbanks = (int)banks;
|
||||||
|
interface_ip.specific_tag = tag > 0;
|
||||||
|
interface_ip.tag_w = tag;
|
||||||
|
|
||||||
|
if (cache_params.cache_level == L1) {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||||
|
} else {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.access_mode = cache_params.cache_access_mode;
|
||||||
|
interface_ip.throughput= cache_params.throughput;
|
||||||
|
interface_ip.latency = cache_params.latency;
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.is_cache = is_cache;
|
||||||
|
interface_ip.pure_ram = cache_params.pure_ram;
|
||||||
|
interface_ip.pure_cam = pure_cam;
|
||||||
|
interface_ip.num_rw_ports = cache_params.cache_rw_ports;
|
||||||
|
interface_ip.num_rd_ports = cache_params.cache_rd_ports;
|
||||||
|
interface_ip.num_wr_ports = cache_params.cache_wr_ports;
|
||||||
|
interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports;
|
||||||
|
interface_ip.num_search_ports = cache_params.cache_search_ports;
|
||||||
|
|
||||||
|
arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays",
|
||||||
|
cache_params.device_ty, clockRate, opt_local,
|
||||||
|
cache_params.core_ty);
|
||||||
|
children.push_back(arrayPtr);
|
||||||
|
|
||||||
|
// This is for calculating TDP, which depends on the number of
|
||||||
|
// available ports
|
||||||
|
int num_tdp_ports = arrayPtr->l_ip.num_rw_ports +
|
||||||
|
arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports;
|
||||||
|
|
||||||
|
// Set new array stats for calculating TDP and runtime power
|
||||||
|
arrayPtr->tdp_stats.reset();
|
||||||
|
arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar *
|
||||||
|
num_tdp_ports * cache_stats.duty_cycle *
|
||||||
|
cache_stats.homenode_access_scalar;
|
||||||
|
arrayPtr->tdp_stats.readAc.miss = 0;
|
||||||
|
arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access -
|
||||||
|
arrayPtr->tdp_stats.readAc.miss;
|
||||||
|
arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar *
|
||||||
|
num_tdp_ports * cache_stats.duty_cycle *
|
||||||
|
cache_stats.homenode_access_scalar;
|
||||||
|
arrayPtr->tdp_stats.writeAc.miss = 0;
|
||||||
|
arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access -
|
||||||
|
arrayPtr->tdp_stats.writeAc.miss;
|
||||||
|
arrayPtr->tdp_stats.searchAc.access = 0;
|
||||||
|
arrayPtr->tdp_stats.searchAc.miss = 0;
|
||||||
|
arrayPtr->tdp_stats.searchAc.hit = 0;
|
||||||
|
|
||||||
|
arrayPtr->rtp_stats.reset();
|
||||||
|
if (cache_stats.use_detailed_stats) {
|
||||||
|
arrayPtr->rtp_stats.dataReadAc.access =
|
||||||
|
cache_stats.num_data_array_reads;
|
||||||
|
arrayPtr->rtp_stats.dataWriteAc.access =
|
||||||
|
cache_stats.num_data_array_writes;
|
||||||
|
arrayPtr->rtp_stats.tagReadAc.access =
|
||||||
|
cache_stats.num_tag_array_reads;
|
||||||
|
arrayPtr->rtp_stats.tagWriteAc.access =
|
||||||
|
cache_stats.num_tag_array_writes;
|
||||||
|
} else {
|
||||||
|
// This code makes assumptions. For instance, it assumes that
|
||||||
|
// tag and data arrays are accessed in parallel on a read request and
|
||||||
|
// this is a write-allocate cache. It also ignores any coherence
|
||||||
|
// requests. Using detailed stats as above can avoid the ambiguity
|
||||||
|
// that is introduced here
|
||||||
|
arrayPtr->rtp_stats.dataReadAc.access =
|
||||||
|
cache_stats.read_accesses + cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.dataWriteAc.access =
|
||||||
|
cache_stats.write_accesses + cache_stats.read_misses;
|
||||||
|
arrayPtr->rtp_stats.tagReadAc.access =
|
||||||
|
cache_stats.read_accesses + cache_stats.write_accesses;
|
||||||
|
arrayPtr->rtp_stats.tagWriteAc.access =
|
||||||
|
cache_stats.read_misses + cache_stats.write_misses;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set SBT stats if this is an SBT directory type
|
||||||
|
if (dir_overhead > 0) {
|
||||||
|
arrayPtr->setSBTDirOverhead(dir_overhead);
|
||||||
|
|
||||||
|
// TDP stats
|
||||||
|
arrayPtr->sbt_tdp_stats.readAc.access =
|
||||||
|
cache_stats.tdp_read_access_scalar *
|
||||||
|
num_tdp_ports * cache_stats.dir_duty_cycle *
|
||||||
|
(1 - cache_stats.homenode_access_scalar);
|
||||||
|
arrayPtr->sbt_tdp_stats.readAc.miss = 0;
|
||||||
|
arrayPtr->sbt_tdp_stats.readAc.hit =
|
||||||
|
arrayPtr->sbt_tdp_stats.readAc.access -
|
||||||
|
arrayPtr->sbt_tdp_stats.readAc.miss;
|
||||||
|
arrayPtr->sbt_tdp_stats.writeAc.access =
|
||||||
|
cache_stats.tdp_sbt_write_access_scalar *
|
||||||
|
num_tdp_ports * cache_stats.dir_duty_cycle *
|
||||||
|
(1 - cache_stats.homenode_access_scalar);
|
||||||
|
arrayPtr->sbt_tdp_stats.writeAc.miss = 0;
|
||||||
|
arrayPtr->sbt_tdp_stats.writeAc.hit =
|
||||||
|
arrayPtr->sbt_tdp_stats.writeAc.access -
|
||||||
|
arrayPtr->sbt_tdp_stats.writeAc.miss;
|
||||||
|
|
||||||
|
// Runtime power stats
|
||||||
|
arrayPtr->sbt_rtp_stats.readAc.access =
|
||||||
|
cache_stats.homenode_read_accesses;
|
||||||
|
arrayPtr->sbt_rtp_stats.readAc.miss =
|
||||||
|
cache_stats.homenode_read_misses;
|
||||||
|
arrayPtr->sbt_rtp_stats.readAc.access =
|
||||||
|
cache_stats.homenode_read_accesses -
|
||||||
|
cache_stats.homenode_read_misses;
|
||||||
|
arrayPtr->sbt_rtp_stats.writeAc.access =
|
||||||
|
cache_stats.homenode_write_accesses;
|
||||||
|
arrayPtr->sbt_rtp_stats.writeAc.miss =
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
arrayPtr->sbt_rtp_stats.writeAc.hit =
|
||||||
|
cache_stats.homenode_write_accesses -
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.force_cache_config = force_cache_config;
|
||||||
|
if (!((cache_params.dir_ty == ST &&
|
||||||
|
cache_params.cache_level == L1Directory) ||
|
||||||
|
(cache_params.dir_ty == ST &&
|
||||||
|
cache_params.cache_level== L2Directory))) {
|
||||||
|
// Miss Buffer
|
||||||
|
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||||
|
data = (physical_address_width) +
|
||||||
|
int(ceil(log2(size / cache_params.blockW))) +
|
||||||
|
(cache_params.blockW * BITS_PER_BYTE);
|
||||||
|
line = int(ceil(data / BITS_PER_BYTE));
|
||||||
|
size = cache_params.missb_size * line;
|
||||||
|
|
||||||
|
interface_ip.cache_sz = size;
|
||||||
|
interface_ip.line_sz = line;
|
||||||
|
interface_ip.assoc = cache_params.missb_assoc;
|
||||||
|
interface_ip.nbanks = cache_params.missb_banks;
|
||||||
|
interface_ip.specific_tag = tag > 0;
|
||||||
|
interface_ip.tag_w = tag;
|
||||||
|
|
||||||
|
if (cache_params.cache_level == L1) {
|
||||||
|
interface_ip.out_w = line * BITS_PER_BYTE;
|
||||||
|
} else {
|
||||||
|
interface_ip.out_w = line * BITS_PER_BYTE / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.access_mode = cache_params.miss_buff_access_mode;
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.is_cache = is_cache;
|
||||||
|
interface_ip.pure_ram = cache_params.pure_ram;
|
||||||
|
interface_ip.pure_cam = pure_cam;
|
||||||
|
interface_ip.throughput = cache_params.throughput;
|
||||||
|
interface_ip.latency = cache_params.latency;
|
||||||
|
interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports;
|
||||||
|
interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports;
|
||||||
|
interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports;
|
||||||
|
interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports;
|
||||||
|
interface_ip.num_search_ports = cache_params.miss_buff_search_ports;
|
||||||
|
|
||||||
|
arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer",
|
||||||
|
cache_params.device_ty, clockRate, opt_local,
|
||||||
|
cache_params.core_ty);
|
||||||
|
children.push_back(arrayPtr);
|
||||||
|
|
||||||
|
arrayPtr->tdp_stats.reset();
|
||||||
|
arrayPtr->tdp_stats.readAc.access = 0;
|
||||||
|
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
|
||||||
|
arrayPtr->rtp_stats.reset();
|
||||||
|
arrayPtr->rtp_stats.readAc.access =
|
||||||
|
cache_stats.read_misses + cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access =
|
||||||
|
cache_stats.read_misses + cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||||
|
|
||||||
|
if (cache_params.dir_ty == SBT) {
|
||||||
|
arrayPtr->rtp_stats.readAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill Buffer
|
||||||
|
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||||
|
data = cache_params.blockW;
|
||||||
|
|
||||||
|
interface_ip.cache_sz = data * cache_params.fu_size;
|
||||||
|
interface_ip.line_sz = data;
|
||||||
|
interface_ip.assoc = cache_params.fu_assoc;
|
||||||
|
interface_ip.nbanks = cache_params.fu_banks;
|
||||||
|
interface_ip.specific_tag = tag > 0;
|
||||||
|
interface_ip.tag_w = tag;
|
||||||
|
|
||||||
|
if (cache_params.cache_level == L1) {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||||
|
} else {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.access_mode = cache_params.fetch_buff_access_mode;
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.is_cache = is_cache;
|
||||||
|
interface_ip.pure_cam = pure_cam;
|
||||||
|
interface_ip.throughput = cache_params.throughput;
|
||||||
|
interface_ip.latency = cache_params.latency;
|
||||||
|
interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports;
|
||||||
|
interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports;
|
||||||
|
interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports;
|
||||||
|
interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports;
|
||||||
|
interface_ip.num_search_ports = cache_params.fetch_buff_search_ports;
|
||||||
|
arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer",
|
||||||
|
cache_params.device_ty, clockRate, opt_local,
|
||||||
|
cache_params.core_ty);
|
||||||
|
children.push_back(arrayPtr);
|
||||||
|
|
||||||
|
arrayPtr->tdp_stats.reset();
|
||||||
|
arrayPtr->tdp_stats.readAc.access = 0;
|
||||||
|
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
|
||||||
|
arrayPtr->rtp_stats.reset();
|
||||||
|
arrayPtr->rtp_stats.readAc.access =
|
||||||
|
cache_stats.read_misses + cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access =
|
||||||
|
cache_stats.read_misses + cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||||
|
|
||||||
|
if (cache_params.dir_ty == SBT) {
|
||||||
|
arrayPtr->rtp_stats.readAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefetch Buffer
|
||||||
|
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||||
|
line = cache_params.blockW;
|
||||||
|
|
||||||
|
interface_ip.cache_sz = cache_params.prefetchb_size * line;
|
||||||
|
interface_ip.line_sz = line;
|
||||||
|
interface_ip.assoc = cache_params.prefetchb_assoc;
|
||||||
|
interface_ip.nbanks = cache_params.prefetchb_banks;
|
||||||
|
interface_ip.specific_tag = tag > 0;
|
||||||
|
interface_ip.tag_w = tag;
|
||||||
|
|
||||||
|
if (cache_params.cache_level == L1) {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||||
|
} else {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.access_mode = cache_params.prefetch_buff_access_mode;
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.is_cache = is_cache;
|
||||||
|
interface_ip.pure_ram = cache_params.pure_ram;
|
||||||
|
interface_ip.pure_cam = pure_cam;
|
||||||
|
interface_ip.throughput = cache_params.throughput;
|
||||||
|
interface_ip.latency = cache_params.latency;
|
||||||
|
interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports;
|
||||||
|
interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports;
|
||||||
|
interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports;
|
||||||
|
interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports;
|
||||||
|
interface_ip.num_search_ports = cache_params.pf_buff_search_ports;
|
||||||
|
arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer",
|
||||||
|
cache_params.device_ty, clockRate, opt_local,
|
||||||
|
cache_params.core_ty);
|
||||||
|
children.push_back(arrayPtr);
|
||||||
|
|
||||||
|
arrayPtr->tdp_stats.reset();
|
||||||
|
arrayPtr->tdp_stats.readAc.access = 0;
|
||||||
|
arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports;
|
||||||
|
|
||||||
|
arrayPtr->rtp_stats.reset();
|
||||||
|
arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses;
|
||||||
|
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||||
|
|
||||||
|
if (cache_params.dir_ty == SBT) {
|
||||||
|
arrayPtr->rtp_stats.readAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writeback Buffer
|
||||||
|
if (cache_params.wbb_size > 0) {
|
||||||
|
tag = physical_address_width + EXTRA_TAG_BITS;
|
||||||
|
line = cache_params.blockW;
|
||||||
|
|
||||||
|
interface_ip.cache_sz = cache_params.wbb_size * line;
|
||||||
|
interface_ip.line_sz = line;
|
||||||
|
interface_ip.assoc = cache_params.wbb_assoc;
|
||||||
|
interface_ip.nbanks = cache_params.wbb_banks;
|
||||||
|
interface_ip.specific_tag = tag > 0;
|
||||||
|
interface_ip.tag_w = tag;
|
||||||
|
|
||||||
|
if (cache_params.cache_level == L1) {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
|
||||||
|
} else {
|
||||||
|
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.access_mode = cache_params.writeback_buff_access_mode;
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.is_cache = is_cache;
|
||||||
|
interface_ip.pure_ram = cache_params.pure_ram;
|
||||||
|
interface_ip.pure_cam = pure_cam;
|
||||||
|
interface_ip.throughput = cache_params.throughput;
|
||||||
|
interface_ip.latency = cache_params.latency;
|
||||||
|
interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports;
|
||||||
|
interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports;
|
||||||
|
interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports;
|
||||||
|
interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports;
|
||||||
|
interface_ip.num_search_ports = cache_params.wb_buff_search_ports;
|
||||||
|
arrayPtr = new CacheArray(xml_data, &interface_ip,
|
||||||
|
"Writeback Buffer",
|
||||||
|
cache_params.device_ty, clockRate,
|
||||||
|
opt_local, cache_params.core_ty);
|
||||||
|
children.push_back(arrayPtr);
|
||||||
|
|
||||||
|
arrayPtr->tdp_stats.reset();
|
||||||
|
arrayPtr->tdp_stats.readAc.access = 0;
|
||||||
|
arrayPtr->tdp_stats.writeAc.access =
|
||||||
|
arrayPtr->l_ip.num_search_ports;
|
||||||
|
arrayPtr->tdp_stats.searchAc.access =
|
||||||
|
arrayPtr->l_ip.num_search_ports;
|
||||||
|
|
||||||
|
arrayPtr->rtp_stats.reset();
|
||||||
|
arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses;
|
||||||
|
arrayPtr->rtp_stats.searchAc.access = 0;
|
||||||
|
|
||||||
|
if (cache_params.dir_ty == SBT) {
|
||||||
|
arrayPtr->rtp_stats.readAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
arrayPtr->rtp_stats.writeAc.access +=
|
||||||
|
cache_stats.homenode_write_misses;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheUnit::computeEnergy() {
|
||||||
|
McPATComponent::computeEnergy();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheUnit::set_cache_param_from_xml_data() {
|
||||||
|
int level, type;
|
||||||
|
|
||||||
|
// Initialization... move this?
|
||||||
|
memset(&cache_params, 0, sizeof(CacheParameters));
|
||||||
|
memset(&cache_stats, 0, sizeof(CacheStatistics));
|
||||||
|
|
||||||
|
// By default, use the core clock frequency. This can be changed by
|
||||||
|
// setting the clockrate param in the XML definition of the CacheUnit
|
||||||
|
clockRate = target_core_clockrate;
|
||||||
|
XMLCSTR comp_name = xml_data->getAttribute("name");
|
||||||
|
if (comp_name) {
|
||||||
|
name = comp_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
int tech_type;
|
||||||
|
int mat_type;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_INT_IF("level", level);
|
||||||
|
ASSIGN_FP_IF("size", cache_params.capacity);
|
||||||
|
ASSIGN_FP_IF("block_size", cache_params.blockW);
|
||||||
|
ASSIGN_FP_IF("assoc", cache_params.assoc);
|
||||||
|
ASSIGN_FP_IF("num_banks", cache_params.nbanks);
|
||||||
|
ASSIGN_FP_IF("latency", cache_params.latency);
|
||||||
|
ASSIGN_FP_IF("throughput", cache_params.throughput);
|
||||||
|
ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size);
|
||||||
|
ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size);
|
||||||
|
ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size);
|
||||||
|
ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size);
|
||||||
|
ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc);
|
||||||
|
ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc);
|
||||||
|
ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc);
|
||||||
|
ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc);
|
||||||
|
ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks);
|
||||||
|
ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks);
|
||||||
|
ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks);
|
||||||
|
ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks);
|
||||||
|
ASSIGN_ENUM_IF("cache_access_mode",
|
||||||
|
cache_params.cache_access_mode, Access_mode);
|
||||||
|
ASSIGN_ENUM_IF("miss_buff_access_mode",
|
||||||
|
cache_params.miss_buff_access_mode, Access_mode);
|
||||||
|
ASSIGN_ENUM_IF("fetch_buff_access_mode",
|
||||||
|
cache_params.fetch_buff_access_mode, Access_mode);
|
||||||
|
ASSIGN_ENUM_IF("prefetch_buff_access_mode",
|
||||||
|
cache_params.prefetch_buff_access_mode, Access_mode);
|
||||||
|
ASSIGN_ENUM_IF("writeback_buff_access_mode",
|
||||||
|
cache_params.writeback_buff_access_mode, Access_mode);
|
||||||
|
ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports);
|
||||||
|
ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports);
|
||||||
|
ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports);
|
||||||
|
ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports);
|
||||||
|
ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports);
|
||||||
|
ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports);
|
||||||
|
ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports);
|
||||||
|
ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports);
|
||||||
|
ASSIGN_INT_IF("miss_buff_se_rd_ports" ,
|
||||||
|
cache_params.miss_buff_se_rd_ports);
|
||||||
|
ASSIGN_INT_IF("miss_buff_search_ports",
|
||||||
|
cache_params.miss_buff_search_ports);
|
||||||
|
ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports);
|
||||||
|
ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports);
|
||||||
|
ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports);
|
||||||
|
ASSIGN_INT_IF("fetch_buff_se_rd_ports",
|
||||||
|
cache_params.fetch_buff_se_rd_ports);
|
||||||
|
ASSIGN_INT_IF("fetch_buff_search_ports",
|
||||||
|
cache_params.fetch_buff_search_ports);
|
||||||
|
ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports);
|
||||||
|
ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports);
|
||||||
|
ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports);
|
||||||
|
ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports);
|
||||||
|
ASSIGN_INT_IF("pf_buff_search_ports",
|
||||||
|
cache_params.pf_buff_search_ports);
|
||||||
|
ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports);
|
||||||
|
ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports);
|
||||||
|
ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports);
|
||||||
|
ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports);
|
||||||
|
ASSIGN_INT_IF("wb_buff_search_ports",
|
||||||
|
cache_params.wb_buff_search_ports);
|
||||||
|
ASSIGN_FP_IF("clockrate", cache_params.clockRate);
|
||||||
|
ASSIGN_INT_IF("pure_ram", cache_params.pure_ram);
|
||||||
|
ASSIGN_INT_IF("tech_type", tech_type);
|
||||||
|
ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type);
|
||||||
|
ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty);
|
||||||
|
ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type);
|
||||||
|
ASSIGN_INT_IF("num_cores", cache_params.num_cores);
|
||||||
|
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||||
|
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change from MHz to Hz
|
||||||
|
cache_params.clockRate *= 1e6;
|
||||||
|
if (cache_params.clockRate > 0) {
|
||||||
|
clockRate = cache_params.clockRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.data_arr_ram_cell_tech_type = tech_type;
|
||||||
|
interface_ip.data_arr_peri_global_tech_type = tech_type;
|
||||||
|
interface_ip.tag_arr_ram_cell_tech_type = tech_type;
|
||||||
|
interface_ip.tag_arr_peri_global_tech_type = tech_type;
|
||||||
|
|
||||||
|
interface_ip.wire_is_mat_type = mat_type;
|
||||||
|
interface_ip.wire_os_mat_type = mat_type;
|
||||||
|
|
||||||
|
switch(level) {
|
||||||
|
case 1:
|
||||||
|
cache_params.cache_level = L1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
cache_params.cache_level = L2;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
cache_params.cache_level = L3;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
cache_params.cache_level = L1Directory;
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
cache_params.cache_level = L2Directory;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n",
|
||||||
|
name.c_str(), level);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
cache_stats.use_detailed_stats = false;
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads);
|
||||||
|
ASSIGN_FP_IF("num_data_array_writes",
|
||||||
|
cache_stats.num_data_array_writes);
|
||||||
|
ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads);
|
||||||
|
ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes);
|
||||||
|
ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses);
|
||||||
|
ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses);
|
||||||
|
ASSIGN_FP_IF("read_misses", cache_stats.read_misses);
|
||||||
|
ASSIGN_FP_IF("write_misses", cache_stats.write_misses);
|
||||||
|
ASSIGN_FP_IF("conflicts", cache_stats.conflicts);
|
||||||
|
ASSIGN_INT_IF("homenode_read_accesses",
|
||||||
|
cache_stats.homenode_read_accesses);
|
||||||
|
ASSIGN_INT_IF("homenode_write_accesses",
|
||||||
|
cache_stats.homenode_write_accesses);
|
||||||
|
ASSIGN_INT_IF("homenode_read_misses",
|
||||||
|
cache_stats.homenode_read_misses);
|
||||||
|
ASSIGN_INT_IF("homenode_write_misses",
|
||||||
|
cache_stats.homenode_write_misses);
|
||||||
|
ASSIGN_FP_IF("homenode_access_scalar",
|
||||||
|
cache_stats.homenode_access_scalar);
|
||||||
|
ASSIGN_FP_IF("tdp_read_access_scalar",
|
||||||
|
cache_stats.tdp_read_access_scalar);
|
||||||
|
ASSIGN_FP_IF("tdp_write_access_scalar",
|
||||||
|
cache_stats.tdp_write_access_scalar);
|
||||||
|
ASSIGN_FP_IF("tdp_sbt_write_access_scalar",
|
||||||
|
cache_stats.tdp_sbt_write_access_scalar);
|
||||||
|
ASSIGN_FP_IF("dir_duty_cycle",
|
||||||
|
cache_stats.dir_duty_cycle);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cache_stats.num_data_array_reads > 0 ||
|
||||||
|
cache_stats.num_data_array_writes > 0 ||
|
||||||
|
cache_stats.num_tag_array_reads > 0 ||
|
||||||
|
cache_stats.num_tag_array_writes > 0) {
|
||||||
|
cache_stats.use_detailed_stats = true;
|
||||||
|
calculate_runtime_data_and_tag = true;
|
||||||
|
}
|
||||||
|
}
|
167
ext/mcpat/cacheunit.h
Normal file
167
ext/mcpat/cacheunit.h
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#ifndef CACHEUNIT_H_
|
||||||
|
#define CACHEUNIT_H_
|
||||||
|
|
||||||
|
#include "area.h"
|
||||||
|
#include "array.h"
|
||||||
|
#include "basic_components.h"
|
||||||
|
#include "logic.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
|
class CacheParameters {
|
||||||
|
public:
|
||||||
|
enum Dir_type dir_ty;
|
||||||
|
double clockRate;
|
||||||
|
double capacity;
|
||||||
|
double blockW;
|
||||||
|
double assoc;
|
||||||
|
double nbanks;
|
||||||
|
double throughput;
|
||||||
|
double latency;
|
||||||
|
int missb_size;
|
||||||
|
int fu_size;
|
||||||
|
int prefetchb_size;
|
||||||
|
int wbb_size;
|
||||||
|
int missb_assoc;
|
||||||
|
int fu_assoc;
|
||||||
|
int prefetchb_assoc;
|
||||||
|
int wbb_assoc;
|
||||||
|
int missb_banks;
|
||||||
|
int fu_banks;
|
||||||
|
int prefetchb_banks;
|
||||||
|
int wbb_banks;
|
||||||
|
enum Access_mode cache_access_mode;
|
||||||
|
enum Access_mode miss_buff_access_mode;
|
||||||
|
enum Access_mode fetch_buff_access_mode;
|
||||||
|
enum Access_mode prefetch_buff_access_mode;
|
||||||
|
enum Access_mode writeback_buff_access_mode;
|
||||||
|
int cache_rw_ports;
|
||||||
|
int cache_rd_ports;
|
||||||
|
int cache_wr_ports;
|
||||||
|
int cache_se_rd_ports;
|
||||||
|
int cache_search_ports;
|
||||||
|
int miss_buff_rw_ports;
|
||||||
|
int miss_buff_rd_ports;
|
||||||
|
int miss_buff_wr_ports;
|
||||||
|
int miss_buff_se_rd_ports;
|
||||||
|
int miss_buff_search_ports;
|
||||||
|
int fetch_buff_rw_ports;
|
||||||
|
int fetch_buff_rd_ports;
|
||||||
|
int fetch_buff_wr_ports;
|
||||||
|
int fetch_buff_se_rd_ports;
|
||||||
|
int fetch_buff_search_ports;
|
||||||
|
int pf_buff_rw_ports;
|
||||||
|
int pf_buff_rd_ports;
|
||||||
|
int pf_buff_wr_ports;
|
||||||
|
int pf_buff_se_rd_ports;
|
||||||
|
int pf_buff_search_ports;
|
||||||
|
int wb_buff_rw_ports;
|
||||||
|
int wb_buff_rd_ports;
|
||||||
|
int wb_buff_wr_ports;
|
||||||
|
int wb_buff_se_rd_ports;
|
||||||
|
int wb_buff_search_ports;
|
||||||
|
bool pure_ram;
|
||||||
|
enum CacheLevel cache_level;
|
||||||
|
enum Device_ty device_ty;
|
||||||
|
enum Core_type core_ty;
|
||||||
|
int num_cores;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CacheStatistics {
|
||||||
|
public:
|
||||||
|
// Duty cycle is used for estimating TDP. It should reflect the highest
|
||||||
|
// sustainable rate of access to the cache unit in execution of a benchmark
|
||||||
|
// Default should be 1.0: one access per cycle
|
||||||
|
double duty_cycle;
|
||||||
|
// This duty cycle is only used for SBT directory types
|
||||||
|
double dir_duty_cycle;
|
||||||
|
// The following two stats are also used for estimating TDP.
|
||||||
|
double tdp_read_access_scalar;
|
||||||
|
double tdp_write_access_scalar;
|
||||||
|
// There are 2 ways to calculate dynamic power from activity statistics:
|
||||||
|
// Default is false
|
||||||
|
bool use_detailed_stats;
|
||||||
|
// 1) Count the number and type of accesses to each cache array
|
||||||
|
// splitting data and tag arrays (use_detailed_stats = true).
|
||||||
|
// These are extremely detailed statistics.
|
||||||
|
// read_misses and write_misses are still required for this method for
|
||||||
|
// various buffers associated with this cache.
|
||||||
|
double num_data_array_reads;
|
||||||
|
double num_data_array_writes;
|
||||||
|
double num_tag_array_reads;
|
||||||
|
double num_tag_array_writes;
|
||||||
|
// 2) Count the number and type of access to the cache unit and
|
||||||
|
// use them to extrapolate the number of accesses to the other
|
||||||
|
// subcomponents (cache arrays and buffers)
|
||||||
|
double read_accesses;
|
||||||
|
double write_accesses;
|
||||||
|
double read_misses;
|
||||||
|
double write_misses;
|
||||||
|
double conflicts;
|
||||||
|
// The following is only used for SBT directory types
|
||||||
|
int homenode_read_accesses;
|
||||||
|
int homenode_write_accesses;
|
||||||
|
int homenode_read_misses;
|
||||||
|
int homenode_write_misses;
|
||||||
|
double homenode_access_scalar;
|
||||||
|
double tdp_sbt_write_access_scalar;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CacheUnit : public McPATComponent {
|
||||||
|
public:
|
||||||
|
static bool is_cache;
|
||||||
|
static bool pure_cam;
|
||||||
|
// This is used for CacheArray objects
|
||||||
|
static bool opt_local;
|
||||||
|
static bool force_cache_config;
|
||||||
|
|
||||||
|
int ithCache;
|
||||||
|
CacheParameters cache_params;
|
||||||
|
CacheStatistics cache_stats;
|
||||||
|
Cache_type cacheType;
|
||||||
|
bool calculate_runtime_data_and_tag;
|
||||||
|
double dir_overhead;
|
||||||
|
|
||||||
|
double scktRatio;
|
||||||
|
|
||||||
|
// TODO: REMOVE _interface_ip... It promotes a mess. Find a better way...
|
||||||
|
CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip);
|
||||||
|
void set_cache_param_from_xml_data();
|
||||||
|
void computeEnergy();
|
||||||
|
~CacheUnit() {};
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* CACHEUNIT_H_ */
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -39,9 +40,8 @@
|
||||||
#include "nuca.h"
|
#include "nuca.h"
|
||||||
#include "router.h"
|
#include "router.h"
|
||||||
|
|
||||||
class min_values_t
|
class min_values_t {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
double min_delay;
|
double min_delay;
|
||||||
double min_dyn;
|
double min_dyn;
|
||||||
double min_leakage;
|
double min_leakage;
|
||||||
|
@ -58,17 +58,16 @@ class min_values_t
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct solution
|
struct solution {
|
||||||
{
|
int tag_array_index;
|
||||||
int tag_array_index;
|
int data_array_index;
|
||||||
int data_array_index;
|
list<mem_array *>::iterator tag_array_iter;
|
||||||
list<mem_array *>::iterator tag_array_iter;
|
list<mem_array *>::iterator data_array_iter;
|
||||||
list<mem_array *>::iterator data_array_iter;
|
double access_time;
|
||||||
double access_time;
|
double cycle_time;
|
||||||
double cycle_time;
|
double area;
|
||||||
double area;
|
double efficiency;
|
||||||
double efficiency;
|
powerDef total_power;
|
||||||
powerDef total_power;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res);
|
||||||
void init_tech_params(double tech, bool is_tag);
|
void init_tech_params(double tech, bool is_tag);
|
||||||
|
|
||||||
|
|
||||||
struct calc_time_mt_wrapper_struct
|
struct calc_time_mt_wrapper_struct {
|
||||||
{
|
uint32_t tid;
|
||||||
uint32_t tid;
|
bool is_tag;
|
||||||
bool is_tag;
|
bool pure_ram;
|
||||||
bool pure_ram;
|
bool pure_cam;
|
||||||
bool pure_cam;
|
bool is_main_mem;
|
||||||
bool is_main_mem;
|
double Nspd_min;
|
||||||
double Nspd_min;
|
|
||||||
|
|
||||||
min_values_t * data_res;
|
min_values_t * data_res;
|
||||||
min_values_t * tag_res;
|
min_values_t * tag_res;
|
||||||
|
|
||||||
list<mem_array *> data_arr;
|
list<mem_array *> data_arr;
|
||||||
list<mem_array *> tag_arr;
|
list<mem_array *> tag_arr;
|
||||||
};
|
};
|
||||||
|
|
||||||
void *calc_time_mt_wrapper(void * void_obj);
|
void *calc_time_mt_wrapper(void * void_obj);
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -36,95 +37,107 @@ Arbiter::Arbiter(
|
||||||
double flit_size_,
|
double flit_size_,
|
||||||
double output_len,
|
double output_len,
|
||||||
TechnologyParameter::DeviceType *dt
|
TechnologyParameter::DeviceType *dt
|
||||||
):R(n_req), flit_size(flit_size_),
|
): R(n_req), flit_size(flit_size_),
|
||||||
o_len (output_len), deviceType(dt)
|
o_len (output_len), deviceType(dt) {
|
||||||
{
|
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
Vdd = dt->Vdd;
|
||||||
Vdd = dt->Vdd;
|
double technology = g_ip->F_sz_um;
|
||||||
double technology = g_ip->F_sz_um;
|
NTn1 = 13.5 * technology / 2;
|
||||||
NTn1 = 13.5*technology/2;
|
PTn1 = 76 * technology / 2;
|
||||||
PTn1 = 76*technology/2;
|
NTn2 = 13.5 * technology / 2;
|
||||||
NTn2 = 13.5*technology/2;
|
PTn2 = 76 * technology / 2;
|
||||||
PTn2 = 76*technology/2;
|
NTi = 12.5 * technology / 2;
|
||||||
NTi = 12.5*technology/2;
|
PTi = 25 * technology / 2;
|
||||||
PTi = 25*technology/2;
|
NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/
|
||||||
NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/
|
PTtr = 20 * technology / 2; /* pmos tr. length*/
|
||||||
PTtr = 20*technology/2; /* pmos tr. length*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Arbiter::~Arbiter(){}
|
Arbiter::~Arbiter() {}
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::arb_req() {
|
Arbiter::arb_req() {
|
||||||
double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) +
|
double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 *
|
||||||
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
|
gate_C(NTn2, 0) +
|
||||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
|
gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) +
|
||||||
return temp;
|
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) +
|
||||||
|
drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def));
|
||||||
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::arb_pri() {
|
Arbiter::arb_pri() {
|
||||||
double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance
|
/* switching capacitance of flip-flop is ignored */
|
||||||
of flip-flop is ignored */
|
double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0));
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::arb_grant() {
|
Arbiter::arb_grant() {
|
||||||
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
|
double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
|
||||||
return temp;
|
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline();
|
||||||
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::arb_int() {
|
Arbiter::arb_int() {
|
||||||
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
|
double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 +
|
||||||
2*gate_C(NTn2, 0) + gate_C(PTn2, 0));
|
drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) +
|
||||||
return temp;
|
2 * gate_C(NTn2, 0) + gate_C(PTn2, 0));
|
||||||
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Arbiter::compute_power() {
|
Arbiter::compute_power() {
|
||||||
power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 +
|
power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() *
|
||||||
arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd);
|
Vdd * Vdd / 2 +
|
||||||
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd *
|
||||||
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
Vdd);
|
||||||
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
|
||||||
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor);
|
min_w_pmos * PTn1 * 2, 2, nor);
|
||||||
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor);
|
double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R,
|
||||||
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv);
|
min_w_pmos * PTn2 * R, 2, nor);
|
||||||
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage
|
double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi,
|
||||||
power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd;
|
min_w_pmos * PTi, 1, inv);
|
||||||
|
double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2,
|
||||||
|
min_w_pmos * PTn1 * 2, 2, nor);
|
||||||
|
double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R,
|
||||||
|
min_w_pmos * PTn2 * R, 2, nor);
|
||||||
|
double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi,
|
||||||
|
min_w_pmos * PTi, 1, inv);
|
||||||
|
//FIXME include priority table leakage
|
||||||
|
power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd;
|
||||||
|
power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd +
|
||||||
|
not_leak_gate * Vdd;
|
||||||
}
|
}
|
||||||
|
|
||||||
double //wire cap with triple spacing
|
double //wire cap with triple spacing
|
||||||
Arbiter::Cw3(double length) {
|
Arbiter::Cw3(double length) {
|
||||||
Wire wc(g_ip->wt, length, 1, 3, 3);
|
Wire wc(g_ip->wt, length, 1, 3, 3);
|
||||||
double temp = (wc.wire_cap(length,true));
|
double temp = (wc.wire_cap(length, true));
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::crossbar_ctrline() {
|
Arbiter::crossbar_ctrline() {
|
||||||
double temp = (Cw3(o_len * 1e-6 /* m */) +
|
double temp = (Cw3(o_len * 1e-6 /* m */) +
|
||||||
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
|
drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) +
|
||||||
gate_C(NTi, 0) + gate_C(PTi, 0));
|
gate_C(NTi, 0) + gate_C(PTi, 0));
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Arbiter::transmission_buf_ctrcap() {
|
Arbiter::transmission_buf_ctrcap() {
|
||||||
double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0);
|
double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0);
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Arbiter::print_arbiter()
|
void Arbiter::print_arbiter() {
|
||||||
{
|
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
|
||||||
cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n";
|
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
|
||||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl;
|
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
||||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
266
ext/mcpat/cacti/bank.cc
Executable file → Normal file
266
ext/mcpat/cacti/bank.cc
Executable file → Normal file
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -36,163 +37,174 @@
|
||||||
#include "bank.h"
|
#include "bank.h"
|
||||||
|
|
||||||
Bank::Bank(const DynamicParameter & dyn_p):
|
Bank::Bank(const DynamicParameter & dyn_p):
|
||||||
dp(dyn_p), mat(dp),
|
dp(dyn_p), mat(dp),
|
||||||
num_addr_b_mat(dyn_p.number_addr_bits_mat),
|
num_addr_b_mat(dyn_p.number_addr_bits_mat),
|
||||||
num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir)
|
num_mats_hor_dir(dyn_p.num_mats_h_dir),
|
||||||
{
|
num_mats_ver_dir(dyn_p.num_mats_v_dir) {
|
||||||
int RWP;
|
int RWP;
|
||||||
int ERP;
|
int ERP;
|
||||||
int EWP;
|
int EWP;
|
||||||
int SCHP;
|
int SCHP;
|
||||||
|
|
||||||
if (dp.use_inp_params)
|
if (dp.use_inp_params) {
|
||||||
{
|
RWP = dp.num_rw_ports;
|
||||||
RWP = dp.num_rw_ports;
|
ERP = dp.num_rd_ports;
|
||||||
ERP = dp.num_rd_ports;
|
EWP = dp.num_wr_ports;
|
||||||
EWP = dp.num_wr_ports;
|
SCHP = dp.num_search_ports;
|
||||||
SCHP = dp.num_search_ports;
|
} else {
|
||||||
}
|
RWP = g_ip->num_rw_ports;
|
||||||
else
|
ERP = g_ip->num_rd_ports;
|
||||||
{
|
EWP = g_ip->num_wr_ports;
|
||||||
RWP = g_ip->num_rw_ports;
|
SCHP = g_ip->num_search_ports;
|
||||||
ERP = g_ip->num_rd_ports;
|
|
||||||
EWP = g_ip->num_wr_ports;
|
|
||||||
SCHP = g_ip->num_search_ports;
|
|
||||||
}
|
|
||||||
|
|
||||||
int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
|
|
||||||
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
|
||||||
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
|
||||||
int searchinbits;
|
|
||||||
int searchoutbits;
|
|
||||||
|
|
||||||
if (dp.fully_assoc || dp.pure_cam)
|
|
||||||
{
|
|
||||||
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
|
||||||
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
|
||||||
searchinbits = dp.num_si_b_bank_per_port * SCHP;
|
|
||||||
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(dp.fully_assoc || dp.pure_cam))
|
|
||||||
{
|
|
||||||
if (g_ip->fast_access && dp.is_tag == false)
|
|
||||||
{
|
|
||||||
dataoutbits *= g_ip->data_assoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
int total_addrbits = (dp.number_addr_bits_mat +
|
||||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
dp.number_subbanks_decode) * (RWP + ERP + EWP);
|
||||||
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||||
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
int searchinbits;
|
||||||
total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
int searchoutbits;
|
||||||
|
|
||||||
|
if (dp.fully_assoc || dp.pure_cam) {
|
||||||
|
datainbits = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||||
|
dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||||
|
searchinbits = dp.num_si_b_bank_per_port * SCHP;
|
||||||
|
searchoutbits = dp.num_so_b_bank_per_port * SCHP;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(dp.fully_assoc || dp.pure_cam)) {
|
||||||
|
if (g_ip->fast_access && dp.is_tag == false) {
|
||||||
|
dataoutbits *= g_ip->data_assoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||||
|
(double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, 0, dataoutbits,
|
||||||
|
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
|
||||||
|
Add_htree);
|
||||||
|
htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||||
|
(double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, 0, dataoutbits,
|
||||||
|
0, num_mats_ver_dir * 2, num_mats_hor_dir * 2,
|
||||||
|
Data_in_htree);
|
||||||
|
htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w,
|
||||||
|
(double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, 0, dataoutbits,
|
||||||
|
0, num_mats_ver_dir * 2,
|
||||||
|
num_mats_hor_dir * 2, Data_out_htree);
|
||||||
|
|
||||||
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
|
// htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100,
|
||||||
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
||||||
|
|
||||||
area.w = htree_in_data->area.w;
|
area.w = htree_in_data->area.w;
|
||||||
area.h = htree_in_data->area.h;
|
area.h = htree_in_data->area.h;
|
||||||
}
|
} else {
|
||||||
else
|
htree_in_add =
|
||||||
{
|
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||||
htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||||
total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree);
|
searchoutbits, num_mats_ver_dir * 2,
|
||||||
htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
num_mats_hor_dir * 2, Add_htree);
|
||||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree);
|
htree_in_data =
|
||||||
htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree);
|
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||||
htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
searchoutbits, num_mats_ver_dir * 2,
|
||||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true);
|
num_mats_hor_dir * 2, Data_in_htree);
|
||||||
htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h,
|
htree_out_data =
|
||||||
total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true);
|
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||||
|
searchoutbits, num_mats_ver_dir * 2,
|
||||||
|
num_mats_hor_dir * 2, Data_out_htree);
|
||||||
|
htree_in_search =
|
||||||
|
new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||||
|
searchoutbits, num_mats_ver_dir * 2,
|
||||||
|
num_mats_hor_dir * 2, Data_in_htree, true, true);
|
||||||
|
htree_out_search =
|
||||||
|
new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h,
|
||||||
|
total_addrbits, datainbits, searchinbits, dataoutbits,
|
||||||
|
searchoutbits, num_mats_ver_dir * 2,
|
||||||
|
num_mats_hor_dir * 2, Data_out_htree, true);
|
||||||
|
|
||||||
area.w = htree_in_data->area.w;
|
area.w = htree_in_data->area.w;
|
||||||
area.h = htree_in_data->area.h;
|
area.h = htree_in_data->area.h;
|
||||||
}
|
}
|
||||||
|
|
||||||
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
|
num_addr_b_row_dec = _log2(mat.subarray.num_rows);
|
||||||
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
|
num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec;
|
||||||
num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec;
|
num_addr_b_routed_to_mat_for_rd_or_wr =
|
||||||
|
num_addr_b_mat - num_addr_b_row_dec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Bank::~Bank()
|
Bank::~Bank() {
|
||||||
{
|
delete htree_in_add;
|
||||||
delete htree_in_add;
|
delete htree_out_data;
|
||||||
delete htree_out_data;
|
delete htree_in_data;
|
||||||
delete htree_in_data;
|
if (dp.fully_assoc || dp.pure_cam) {
|
||||||
if (dp.fully_assoc || dp.pure_cam)
|
delete htree_in_search;
|
||||||
{
|
delete htree_out_search;
|
||||||
delete htree_in_search;
|
}
|
||||||
delete htree_out_search;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double Bank::compute_delays(double inrisetime)
|
double Bank::compute_delays(double inrisetime) {
|
||||||
{
|
return mat.compute_delays(inrisetime);
|
||||||
return mat.compute_delays(inrisetime);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void Bank::compute_power_energy()
|
void Bank::compute_power_energy() {
|
||||||
{
|
mat.compute_power_energy();
|
||||||
mat.compute_power_energy();
|
|
||||||
|
|
||||||
if (!(dp.fully_assoc || dp.pure_cam))
|
if (!(dp.fully_assoc || dp.pure_cam)) {
|
||||||
{
|
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||||
power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir;
|
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
||||||
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
|
||||||
|
|
||||||
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
||||||
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
||||||
|
|
||||||
power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
||||||
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
|
power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w
|
||||||
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats;
|
||||||
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats;
|
||||||
|
|
||||||
power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
|
power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats;
|
||||||
power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
|
power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic +
|
||||||
mat.power_sa.searchOp.dynamic +
|
mat.power_sa.searchOp.dynamic +
|
||||||
mat.power_bitline.searchOp.dynamic +
|
mat.power_bitline.searchOp.dynamic +
|
||||||
mat.power_subarray_out_drv.searchOp.dynamic+
|
mat.power_subarray_out_drv.searchOp.dynamic +
|
||||||
mat.ml_to_ram_wl_drv->power.readOp.dynamic;
|
mat.ml_to_ram_wl_drv->power.readOp.dynamic;
|
||||||
|
|
||||||
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
power.readOp.dynamic += htree_in_add->power.readOp.dynamic;
|
||||||
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
power.readOp.dynamic += htree_out_data->power.readOp.dynamic;
|
||||||
|
|
||||||
power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
|
power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic;
|
||||||
power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
|
power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic;
|
||||||
|
|
||||||
power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
power.readOp.leakage += htree_in_add->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
power.readOp.leakage += htree_in_data->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
power.readOp.leakage += htree_out_data->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_in_search->power.readOp.leakage;
|
power.readOp.leakage += htree_in_search->power.readOp.leakage;
|
||||||
power.readOp.leakage += htree_out_search->power.readOp.leakage;
|
power.readOp.leakage += htree_out_search->power.readOp.leakage;
|
||||||
|
|
||||||
|
|
||||||
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage;
|
||||||
power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
|
power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -39,9 +40,8 @@
|
||||||
#include "htree2.h"
|
#include "htree2.h"
|
||||||
#include "mat.h"
|
#include "mat.h"
|
||||||
|
|
||||||
class Bank : public Component
|
class Bank : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Bank(const DynamicParameter & dyn_p);
|
Bank(const DynamicParameter & dyn_p);
|
||||||
~Bank();
|
~Bank();
|
||||||
double compute_delays(double inrisetime); // return outrisetime
|
double compute_delays(double inrisetime); // return outrisetime
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -49,10 +50,10 @@ int combination(int n, int m);
|
||||||
|
|
||||||
//#define DBG
|
//#define DBG
|
||||||
#ifdef DBG
|
#ifdef DBG
|
||||||
#define PRINTDW(a);\
|
#define PRINTDW(a);\
|
||||||
a;
|
a;
|
||||||
#else
|
#else
|
||||||
#define PRINTDW(a);\
|
#define PRINTDW(a);\
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -76,7 +77,7 @@ enum Htree_type {
|
||||||
enum Gate_type {
|
enum Gate_type {
|
||||||
nmos,
|
nmos,
|
||||||
pmos,
|
pmos,
|
||||||
inv,
|
inv,
|
||||||
nand,
|
nand,
|
||||||
nor,
|
nor,
|
||||||
tri,
|
tri,
|
||||||
|
@ -164,13 +165,13 @@ double cmos_Ig_n(
|
||||||
double nWidth,
|
double nWidth,
|
||||||
bool _is_dram = false,
|
bool _is_dram = false,
|
||||||
bool _is_cell = false,
|
bool _is_cell = false,
|
||||||
bool _is_wl_tr= false);
|
bool _is_wl_tr = false);
|
||||||
|
|
||||||
double cmos_Ig_p(
|
double cmos_Ig_p(
|
||||||
double pWidth,
|
double pWidth,
|
||||||
bool _is_dram = false,
|
bool _is_dram = false,
|
||||||
bool _is_cell = false,
|
bool _is_cell = false,
|
||||||
bool _is_wl_tr= false);
|
bool _is_wl_tr = false);
|
||||||
|
|
||||||
|
|
||||||
double cmos_Isub_leakage(
|
double cmos_Isub_leakage(
|
||||||
|
@ -220,29 +221,29 @@ double shortcircuit_simple(
|
||||||
double vdd);
|
double vdd);
|
||||||
//set power point product mask; strictly speaking this is not real point product
|
//set power point product mask; strictly speaking this is not real point product
|
||||||
inline void set_pppm(
|
inline void set_pppm(
|
||||||
double * pppv,
|
double * pppv,
|
||||||
double a=1,
|
double a = 1,
|
||||||
double b=1,
|
double b = 1,
|
||||||
double c=1,
|
double c = 1,
|
||||||
double d=1
|
double d = 1
|
||||||
){
|
) {
|
||||||
pppv[0]= a;
|
pppv[0] = a;
|
||||||
pppv[1]= b;
|
pppv[1] = b;
|
||||||
pppv[2]= c;
|
pppv[2] = c;
|
||||||
pppv[3]= d;
|
pppv[3] = d;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void set_sppm(
|
inline void set_sppm(
|
||||||
double * sppv,
|
double * sppv,
|
||||||
double a=1,
|
double a = 1,
|
||||||
double b=1,
|
double b = 1,
|
||||||
double c=1,
|
double c = 1,
|
||||||
double d=1
|
double d = 1
|
||||||
){
|
) {
|
||||||
sppv[0]= a;
|
sppv[0] = a;
|
||||||
sppv[1]= b;
|
sppv[1] = b;
|
||||||
sppv[2]= c;
|
sppv[2] = c;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -47,127 +48,107 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
bool mem_array::lt(const mem_array * m1, const mem_array * m2)
|
bool mem_array::lt(const mem_array * m1, const mem_array * m2) {
|
||||||
{
|
if (m1->Nspd < m2->Nspd) return true;
|
||||||
if (m1->Nspd < m2->Nspd) return true;
|
else if (m1->Nspd > m2->Nspd) return false;
|
||||||
else if (m1->Nspd > m2->Nspd) return false;
|
else if (m1->Ndwl < m2->Ndwl) return true;
|
||||||
else if (m1->Ndwl < m2->Ndwl) return true;
|
else if (m1->Ndwl > m2->Ndwl) return false;
|
||||||
else if (m1->Ndwl > m2->Ndwl) return false;
|
else if (m1->Ndbl < m2->Ndbl) return true;
|
||||||
else if (m1->Ndbl < m2->Ndbl) return true;
|
else if (m1->Ndbl > m2->Ndbl) return false;
|
||||||
else if (m1->Ndbl > m2->Ndbl) return false;
|
else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
|
||||||
else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true;
|
else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
|
||||||
else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false;
|
else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
|
||||||
else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true;
|
else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
|
||||||
else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false;
|
else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
|
||||||
else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true;
|
else return false;
|
||||||
else return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void uca_org_t::find_delay()
|
void uca_org_t::find_delay() {
|
||||||
{
|
mem_array * data_arr = data_array2;
|
||||||
mem_array * data_arr = data_array2;
|
mem_array * tag_arr = tag_array2;
|
||||||
mem_array * tag_arr = tag_array2;
|
|
||||||
|
|
||||||
// check whether it is a regular cache or scratch ram
|
// check whether it is a regular cache or scratch ram
|
||||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||||
{
|
access_time = data_arr->access_time;
|
||||||
access_time = data_arr->access_time;
|
}
|
||||||
}
|
// Both tag and data lookup happen in parallel
|
||||||
// Both tag and data lookup happen in parallel
|
// and the entire set is sent over the data array h-tree without
|
||||||
// and the entire set is sent over the data array h-tree without
|
// waiting for the way-select signal --TODO add the corresponding
|
||||||
// waiting for the way-select signal --TODO add the corresponding
|
// power overhead Nav
|
||||||
// power overhead Nav
|
else if (g_ip->fast_access == true) {
|
||||||
else if (g_ip->fast_access == true)
|
access_time = MAX(tag_arr->access_time, data_arr->access_time);
|
||||||
{
|
}
|
||||||
access_time = MAX(tag_arr->access_time, data_arr->access_time);
|
// Tag is accessed first. On a hit, way-select signal along with the
|
||||||
}
|
// address is sent to read/write the appropriate block in the data
|
||||||
// Tag is accessed first. On a hit, way-select signal along with the
|
// array
|
||||||
// address is sent to read/write the appropriate block in the data
|
else if (g_ip->is_seq_acc == true) {
|
||||||
// array
|
access_time = tag_arr->access_time + data_arr->access_time;
|
||||||
else if (g_ip->is_seq_acc == true)
|
}
|
||||||
{
|
// Normal access: tag array access and data array access happen in parallel.
|
||||||
access_time = tag_arr->access_time + data_arr->access_time;
|
// But, the data array will wait for the way-select and transfer only the
|
||||||
}
|
// appropriate block over the h-tree.
|
||||||
// Normal access: tag array access and data array access happen in parallel.
|
else {
|
||||||
// But, the data array will wait for the way-select and transfer only the
|
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
|
||||||
// appropriate block over the h-tree.
|
data_arr->delay_before_subarray_output_driver) +
|
||||||
else
|
data_arr->delay_from_subarray_output_driver_to_output;
|
||||||
{
|
|
||||||
access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder,
|
|
||||||
data_arr->delay_before_subarray_output_driver) +
|
|
||||||
data_arr->delay_from_subarray_output_driver_to_output;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void uca_org_t::find_energy()
|
|
||||||
{
|
|
||||||
if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache)
|
|
||||||
power = data_array2->power + tag_array2->power;
|
|
||||||
else
|
|
||||||
power = data_array2->power;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void uca_org_t::find_area()
|
|
||||||
{
|
|
||||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false)
|
|
||||||
{
|
|
||||||
cache_ht = data_array2->height;
|
|
||||||
cache_len = data_array2->width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cache_ht = MAX(tag_array2->height, data_array2->height);
|
|
||||||
cache_len = tag_array2->width + data_array2->width;
|
|
||||||
}
|
|
||||||
area = cache_ht * cache_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
void uca_org_t::adjust_area()
|
|
||||||
{
|
|
||||||
double area_adjust;
|
|
||||||
if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)
|
|
||||||
{
|
|
||||||
if (data_array2->area_efficiency/100.0<0.2)
|
|
||||||
{
|
|
||||||
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
|
|
||||||
area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0));
|
|
||||||
cache_ht = cache_ht/area_adjust;
|
|
||||||
cache_len = cache_len/area_adjust;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
area = cache_ht * cache_len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void uca_org_t::find_cyc()
|
|
||||||
{
|
|
||||||
if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false)
|
void uca_org_t::find_energy() {
|
||||||
{
|
if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc))
|
||||||
cycle_time = data_array2->cycle_time;
|
power = data_array2->power + tag_array2->power;
|
||||||
}
|
else
|
||||||
else
|
power = data_array2->power;
|
||||||
{
|
}
|
||||||
cycle_time = MAX(tag_array2->cycle_time,
|
|
||||||
data_array2->cycle_time);
|
|
||||||
}
|
|
||||||
|
void uca_org_t::find_area() {
|
||||||
|
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||||
|
cache_ht = data_array2->height;
|
||||||
|
cache_len = data_array2->width;
|
||||||
|
} else {
|
||||||
|
cache_ht = MAX(tag_array2->height, data_array2->height);
|
||||||
|
cache_len = tag_array2->width + data_array2->width;
|
||||||
|
}
|
||||||
|
area = cache_ht * cache_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uca_org_t::adjust_area() {
|
||||||
|
double area_adjust;
|
||||||
|
if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) {
|
||||||
|
if (data_array2->area_efficiency / 100.0 < 0.2) {
|
||||||
|
//area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2));
|
||||||
|
area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0));
|
||||||
|
cache_ht = cache_ht / area_adjust;
|
||||||
|
cache_len = cache_len / area_adjust;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
area = cache_ht * cache_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uca_org_t::find_cyc() {
|
||||||
|
if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
|
||||||
|
cycle_time = data_array2->cycle_time;
|
||||||
|
} else {
|
||||||
|
cycle_time = MAX(tag_array2->cycle_time,
|
||||||
|
data_array2->cycle_time);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uca_org_t :: uca_org_t()
|
uca_org_t :: uca_org_t()
|
||||||
:tag_array2(0),
|
: tag_array2(0),
|
||||||
data_array2(0)
|
data_array2(0) {
|
||||||
{
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void uca_org_t :: cleanup()
|
void uca_org_t :: cleanup() {
|
||||||
{
|
if (data_array2 != 0)
|
||||||
if (data_array2!=0)
|
delete data_array2;
|
||||||
delete data_array2;
|
if (tag_array2 != 0)
|
||||||
if (tag_array2!=0)
|
delete tag_array2;
|
||||||
delete tag_array2;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -50,9 +51,8 @@ class mem_array;
|
||||||
class uca_org_t;
|
class uca_org_t;
|
||||||
|
|
||||||
|
|
||||||
class powerComponents
|
class powerComponents {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
double dynamic;
|
double dynamic;
|
||||||
double leakage;
|
double leakage;
|
||||||
double gate_leakage;
|
double gate_leakage;
|
||||||
|
@ -60,17 +60,24 @@ class powerComponents
|
||||||
double longer_channel_leakage;
|
double longer_channel_leakage;
|
||||||
|
|
||||||
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
|
powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
|
||||||
powerComponents(const powerComponents & obj) { *this = obj; }
|
powerComponents(const powerComponents & obj) {
|
||||||
powerComponents & operator=(const powerComponents & rhs)
|
*this = obj;
|
||||||
{
|
}
|
||||||
dynamic = rhs.dynamic;
|
powerComponents & operator=(const powerComponents & rhs) {
|
||||||
leakage = rhs.leakage;
|
dynamic = rhs.dynamic;
|
||||||
gate_leakage = rhs.gate_leakage;
|
leakage = rhs.leakage;
|
||||||
short_circuit = rhs.short_circuit;
|
gate_leakage = rhs.gate_leakage;
|
||||||
longer_channel_leakage = rhs.longer_channel_leakage;
|
short_circuit = rhs.short_circuit;
|
||||||
return *this;
|
longer_channel_leakage = rhs.longer_channel_leakage;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
void reset() {
|
||||||
|
dynamic = 0;
|
||||||
|
leakage = 0;
|
||||||
|
gate_leakage = 0;
|
||||||
|
short_circuit = 0;
|
||||||
|
longer_channel_leakage = 0;
|
||||||
}
|
}
|
||||||
void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;}
|
|
||||||
|
|
||||||
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
|
friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
|
||||||
friend powerComponents operator*(const powerComponents & x, double const * const y);
|
friend powerComponents operator*(const powerComponents & x, double const * const y);
|
||||||
|
@ -78,22 +85,24 @@ class powerComponents
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class powerDef
|
class powerDef {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
powerComponents readOp;
|
powerComponents readOp;
|
||||||
powerComponents writeOp;
|
powerComponents writeOp;
|
||||||
powerComponents searchOp;//Sheng: for CAM and FA
|
powerComponents searchOp;//Sheng: for CAM and FA
|
||||||
|
|
||||||
powerDef() : readOp(), writeOp(), searchOp() { }
|
powerDef() : readOp(), writeOp(), searchOp() { }
|
||||||
void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();}
|
void reset() {
|
||||||
|
readOp.reset();
|
||||||
|
writeOp.reset();
|
||||||
|
searchOp.reset();
|
||||||
|
}
|
||||||
|
|
||||||
friend powerDef operator+(const powerDef & x, const powerDef & y);
|
friend powerDef operator+(const powerDef & x, const powerDef & y);
|
||||||
friend powerDef operator*(const powerDef & x, double const * const y);
|
friend powerDef operator*(const powerDef & x, double const * const y);
|
||||||
};
|
};
|
||||||
|
|
||||||
enum Wire_type
|
enum Wire_type {
|
||||||
{
|
|
||||||
Global /* gloabl wires with repeaters */,
|
Global /* gloabl wires with repeaters */,
|
||||||
Global_5 /* 5% delay penalty */,
|
Global_5 /* 5% delay penalty */,
|
||||||
Global_10 /* 10% delay penalty */,
|
Global_10 /* 10% delay penalty */,
|
||||||
|
@ -108,12 +117,12 @@ enum Wire_type
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class InputParameter
|
class InputParameter {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
void parse_cfg(const string & infile);
|
void parse_cfg(const string & infile);
|
||||||
|
|
||||||
bool error_checking(); // return false if the input parameters are problematic
|
// return false if the input parameters are problematic
|
||||||
|
bool error_checking(string name = "CACTI");
|
||||||
void display_ip();
|
void display_ip();
|
||||||
|
|
||||||
unsigned int cache_sz; // in bytes
|
unsigned int cache_sz; // in bytes
|
||||||
|
@ -172,14 +181,14 @@ class InputParameter
|
||||||
int force_nuca_bank;
|
int force_nuca_bank;
|
||||||
|
|
||||||
int delay_wt, dynamic_power_wt, leakage_power_wt,
|
int delay_wt, dynamic_power_wt, leakage_power_wt,
|
||||||
cycle_time_wt, area_wt;
|
cycle_time_wt, area_wt;
|
||||||
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
|
int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
|
||||||
cycle_time_wt_nuca, area_wt_nuca;
|
cycle_time_wt_nuca, area_wt_nuca;
|
||||||
|
|
||||||
int delay_dev, dynamic_power_dev, leakage_power_dev,
|
int delay_dev, dynamic_power_dev, leakage_power_dev,
|
||||||
cycle_time_dev, area_dev;
|
cycle_time_dev, area_dev;
|
||||||
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
|
int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
|
||||||
cycle_time_dev_nuca, area_dev_nuca;
|
cycle_time_dev_nuca, area_dev_nuca;
|
||||||
int ed; //ED or ED2 optimization
|
int ed; //ED or ED2 optimization
|
||||||
int nuca;
|
int nuca;
|
||||||
|
|
||||||
|
@ -194,167 +203,113 @@ class InputParameter
|
||||||
|
|
||||||
|
|
||||||
bool add_ecc_b_;
|
bool add_ecc_b_;
|
||||||
//parameters for design constraint
|
//parameters for design constraint
|
||||||
double throughput;
|
double throughput;
|
||||||
double latency;
|
double latency;
|
||||||
bool pipelinable;
|
bool pipelinable;
|
||||||
int pipeline_stages;
|
int pipeline_stages;
|
||||||
int per_stage_vector;
|
int per_stage_vector;
|
||||||
bool with_clock_grid;
|
bool with_clock_grid;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
typedef struct{
|
typedef struct {
|
||||||
int Ndwl;
|
int Ndwl;
|
||||||
int Ndbl;
|
int Ndbl;
|
||||||
double Nspd;
|
double Nspd;
|
||||||
int deg_bl_muxing;
|
int deg_bl_muxing;
|
||||||
int Ndsam_lev_1;
|
int Ndsam_lev_1;
|
||||||
int Ndsam_lev_2;
|
int Ndsam_lev_2;
|
||||||
int number_activated_mats_horizontal_direction;
|
int number_activated_mats_horizontal_direction;
|
||||||
int number_subbanks;
|
int number_subbanks;
|
||||||
int page_size_in_bits;
|
int page_size_in_bits;
|
||||||
double delay_route_to_bank;
|
double delay_route_to_bank;
|
||||||
double delay_crossbar;
|
double delay_crossbar;
|
||||||
double delay_addr_din_horizontal_htree;
|
double delay_addr_din_horizontal_htree;
|
||||||
double delay_addr_din_vertical_htree;
|
double delay_addr_din_vertical_htree;
|
||||||
double delay_row_predecode_driver_and_block;
|
double delay_row_predecode_driver_and_block;
|
||||||
double delay_row_decoder;
|
double delay_row_decoder;
|
||||||
double delay_bitlines;
|
double delay_bitlines;
|
||||||
double delay_sense_amp;
|
double delay_sense_amp;
|
||||||
double delay_subarray_output_driver;
|
double delay_subarray_output_driver;
|
||||||
double delay_bit_mux_predecode_driver_and_block;
|
double delay_bit_mux_predecode_driver_and_block;
|
||||||
double delay_bit_mux_decoder;
|
double delay_bit_mux_decoder;
|
||||||
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
|
double delay_senseamp_mux_lev_1_predecode_driver_and_block;
|
||||||
double delay_senseamp_mux_lev_1_decoder;
|
double delay_senseamp_mux_lev_1_decoder;
|
||||||
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
|
double delay_senseamp_mux_lev_2_predecode_driver_and_block;
|
||||||
double delay_senseamp_mux_lev_2_decoder;
|
double delay_senseamp_mux_lev_2_decoder;
|
||||||
double delay_input_htree;
|
double delay_input_htree;
|
||||||
double delay_output_htree;
|
double delay_output_htree;
|
||||||
double delay_dout_vertical_htree;
|
double delay_dout_vertical_htree;
|
||||||
double delay_dout_horizontal_htree;
|
double delay_dout_horizontal_htree;
|
||||||
double delay_comparator;
|
double delay_comparator;
|
||||||
double access_time;
|
double access_time;
|
||||||
double cycle_time;
|
double cycle_time;
|
||||||
double multisubbank_interleave_cycle_time;
|
double multisubbank_interleave_cycle_time;
|
||||||
double delay_request_network;
|
double delay_request_network;
|
||||||
double delay_inside_mat;
|
double delay_inside_mat;
|
||||||
double delay_reply_network;
|
double delay_reply_network;
|
||||||
double trcd;
|
double trcd;
|
||||||
double cas_latency;
|
double cas_latency;
|
||||||
double precharge_delay;
|
double precharge_delay;
|
||||||
powerDef power_routing_to_bank;
|
powerDef power_routing_to_bank;
|
||||||
powerDef power_addr_input_htree;
|
powerDef power_addr_input_htree;
|
||||||
powerDef power_data_input_htree;
|
powerDef power_data_input_htree;
|
||||||
powerDef power_data_output_htree;
|
powerDef power_data_output_htree;
|
||||||
powerDef power_addr_horizontal_htree;
|
powerDef power_addr_horizontal_htree;
|
||||||
powerDef power_datain_horizontal_htree;
|
powerDef power_datain_horizontal_htree;
|
||||||
powerDef power_dataout_horizontal_htree;
|
powerDef power_dataout_horizontal_htree;
|
||||||
powerDef power_addr_vertical_htree;
|
powerDef power_addr_vertical_htree;
|
||||||
powerDef power_datain_vertical_htree;
|
powerDef power_datain_vertical_htree;
|
||||||
powerDef power_row_predecoder_drivers;
|
powerDef power_row_predecoder_drivers;
|
||||||
powerDef power_row_predecoder_blocks;
|
powerDef power_row_predecoder_blocks;
|
||||||
powerDef power_row_decoders;
|
powerDef power_row_decoders;
|
||||||
powerDef power_bit_mux_predecoder_drivers;
|
powerDef power_bit_mux_predecoder_drivers;
|
||||||
powerDef power_bit_mux_predecoder_blocks;
|
powerDef power_bit_mux_predecoder_blocks;
|
||||||
powerDef power_bit_mux_decoders;
|
powerDef power_bit_mux_decoders;
|
||||||
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
||||||
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
||||||
powerDef power_senseamp_mux_lev_1_decoders;
|
powerDef power_senseamp_mux_lev_1_decoders;
|
||||||
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
||||||
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
||||||
powerDef power_senseamp_mux_lev_2_decoders;
|
powerDef power_senseamp_mux_lev_2_decoders;
|
||||||
powerDef power_bitlines;
|
powerDef power_bitlines;
|
||||||
powerDef power_sense_amps;
|
powerDef power_sense_amps;
|
||||||
powerDef power_prechg_eq_drivers;
|
powerDef power_prechg_eq_drivers;
|
||||||
powerDef power_output_drivers_at_subarray;
|
powerDef power_output_drivers_at_subarray;
|
||||||
powerDef power_dataout_vertical_htree;
|
powerDef power_dataout_vertical_htree;
|
||||||
powerDef power_comparators;
|
powerDef power_comparators;
|
||||||
powerDef power_crossbar;
|
powerDef power_crossbar;
|
||||||
powerDef total_power;
|
powerDef total_power;
|
||||||
double area;
|
double area;
|
||||||
double all_banks_height;
|
double all_banks_height;
|
||||||
double all_banks_width;
|
double all_banks_width;
|
||||||
double bank_height;
|
double bank_height;
|
||||||
double bank_width;
|
double bank_width;
|
||||||
double subarray_memory_cell_area_height;
|
double subarray_memory_cell_area_height;
|
||||||
double subarray_memory_cell_area_width;
|
double subarray_memory_cell_area_width;
|
||||||
double mat_height;
|
double mat_height;
|
||||||
double mat_width;
|
double mat_width;
|
||||||
double routing_area_height_within_bank;
|
double routing_area_height_within_bank;
|
||||||
double routing_area_width_within_bank;
|
double routing_area_width_within_bank;
|
||||||
double area_efficiency;
|
double area_efficiency;
|
||||||
// double perc_power_dyn_routing_to_bank;
|
double refresh_power;
|
||||||
// double perc_power_dyn_addr_horizontal_htree;
|
double dram_refresh_period;
|
||||||
// double perc_power_dyn_datain_horizontal_htree;
|
double dram_array_availability;
|
||||||
// double perc_power_dyn_dataout_horizontal_htree;
|
double dyn_read_energy_from_closed_page;
|
||||||
// double perc_power_dyn_addr_vertical_htree;
|
double dyn_read_energy_from_open_page;
|
||||||
// double perc_power_dyn_datain_vertical_htree;
|
double leak_power_subbank_closed_page;
|
||||||
// double perc_power_dyn_row_predecoder_drivers;
|
double leak_power_subbank_open_page;
|
||||||
// double perc_power_dyn_row_predecoder_blocks;
|
double leak_power_request_and_reply_networks;
|
||||||
// double perc_power_dyn_row_decoders;
|
double activate_energy;
|
||||||
// double perc_power_dyn_bit_mux_predecoder_drivers;
|
double read_energy;
|
||||||
// double perc_power_dyn_bit_mux_predecoder_blocks;
|
double write_energy;
|
||||||
// double perc_power_dyn_bit_mux_decoders;
|
double precharge_energy;
|
||||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers;
|
|
||||||
// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks;
|
|
||||||
// double perc_power_dyn_senseamp_mux_lev_1_decoders;
|
|
||||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers;
|
|
||||||
// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks;
|
|
||||||
// double perc_power_dyn_senseamp_mux_lev_2_decoders;
|
|
||||||
// double perc_power_dyn_bitlines;
|
|
||||||
// double perc_power_dyn_sense_amps;
|
|
||||||
// double perc_power_dyn_prechg_eq_drivers;
|
|
||||||
// double perc_power_dyn_subarray_output_drivers;
|
|
||||||
// double perc_power_dyn_dataout_vertical_htree;
|
|
||||||
// double perc_power_dyn_comparators;
|
|
||||||
// double perc_power_dyn_crossbar;
|
|
||||||
// double perc_power_dyn_spent_outside_mats;
|
|
||||||
// double perc_power_leak_routing_to_bank;
|
|
||||||
// double perc_power_leak_addr_horizontal_htree;
|
|
||||||
// double perc_power_leak_datain_horizontal_htree;
|
|
||||||
// double perc_power_leak_dataout_horizontal_htree;
|
|
||||||
// double perc_power_leak_addr_vertical_htree;
|
|
||||||
// double perc_power_leak_datain_vertical_htree;
|
|
||||||
// double perc_power_leak_row_predecoder_drivers;
|
|
||||||
// double perc_power_leak_row_predecoder_blocks;
|
|
||||||
// double perc_power_leak_row_decoders;
|
|
||||||
// double perc_power_leak_bit_mux_predecoder_drivers;
|
|
||||||
// double perc_power_leak_bit_mux_predecoder_blocks;
|
|
||||||
// double perc_power_leak_bit_mux_decoders;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_1_decoders;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks;
|
|
||||||
// double perc_power_leak_senseamp_mux_lev_2_decoders;
|
|
||||||
// double perc_power_leak_bitlines;
|
|
||||||
// double perc_power_leak_sense_amps;
|
|
||||||
// double perc_power_leak_prechg_eq_drivers;
|
|
||||||
// double perc_power_leak_subarray_output_drivers;
|
|
||||||
// double perc_power_leak_dataout_vertical_htree;
|
|
||||||
// double perc_power_leak_comparators;
|
|
||||||
// double perc_power_leak_crossbar;
|
|
||||||
// double perc_leak_mats;
|
|
||||||
// double perc_active_mats;
|
|
||||||
double refresh_power;
|
|
||||||
double dram_refresh_period;
|
|
||||||
double dram_array_availability;
|
|
||||||
double dyn_read_energy_from_closed_page;
|
|
||||||
double dyn_read_energy_from_open_page;
|
|
||||||
double leak_power_subbank_closed_page;
|
|
||||||
double leak_power_subbank_open_page;
|
|
||||||
double leak_power_request_and_reply_networks;
|
|
||||||
double activate_energy;
|
|
||||||
double read_energy;
|
|
||||||
double write_energy;
|
|
||||||
double precharge_energy;
|
|
||||||
} results_mem_array;
|
} results_mem_array;
|
||||||
|
|
||||||
|
|
||||||
class uca_org_t
|
class uca_org_t {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
mem_array * tag_array2;
|
mem_array * tag_array2;
|
||||||
mem_array * data_array2;
|
mem_array * data_array2;
|
||||||
double access_time;
|
double access_time;
|
||||||
|
@ -378,7 +333,7 @@ class uca_org_t
|
||||||
void find_cyc();
|
void find_cyc();
|
||||||
void adjust_area();//for McPAT only to adjust routing overhead
|
void adjust_area();//for McPAT only to adjust routing overhead
|
||||||
void cleanup();
|
void cleanup();
|
||||||
~uca_org_t(){};
|
~uca_org_t() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
|
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
|
||||||
|
@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name);
|
||||||
//McPAT's plain interface, please keep !!!
|
//McPAT's plain interface, please keep !!!
|
||||||
uca_org_t cacti_interface(InputParameter * const local_interface);
|
uca_org_t cacti_interface(InputParameter * const local_interface);
|
||||||
//McPAT's plain interface, please keep !!!
|
//McPAT's plain interface, please keep !!!
|
||||||
uca_org_t init_interface(InputParameter * const local_interface);
|
uca_org_t init_interface(InputParameter * const local_interface,
|
||||||
|
const string &name);
|
||||||
//McPAT's plain interface, please keep !!!
|
//McPAT's plain interface, please keep !!!
|
||||||
uca_org_t cacti_interface(
|
uca_org_t cacti_interface(
|
||||||
int cache_size,
|
int cache_size,
|
||||||
int line_size,
|
int line_size,
|
||||||
int associativity,
|
int associativity,
|
||||||
int rw_ports,
|
int rw_ports,
|
||||||
int excl_read_ports,
|
int excl_read_ports,
|
||||||
int excl_write_ports,
|
int excl_write_ports,
|
||||||
int single_ended_read_ports,
|
int single_ended_read_ports,
|
||||||
int search_ports,
|
int search_ports,
|
||||||
int banks,
|
int banks,
|
||||||
double tech_node,
|
double tech_node,
|
||||||
int output_width,
|
int output_width,
|
||||||
int specific_tag,
|
int specific_tag,
|
||||||
int tag_width,
|
int tag_width,
|
||||||
int access_mode,
|
int access_mode,
|
||||||
int cache,
|
int cache,
|
||||||
int main_mem,
|
int main_mem,
|
||||||
int obj_func_delay,
|
int obj_func_delay,
|
||||||
int obj_func_dynamic_power,
|
int obj_func_dynamic_power,
|
||||||
int obj_func_leakage_power,
|
int obj_func_leakage_power,
|
||||||
int obj_func_cycle_time,
|
int obj_func_cycle_time,
|
||||||
int obj_func_area,
|
int obj_func_area,
|
||||||
int dev_func_delay,
|
int dev_func_delay,
|
||||||
int dev_func_dynamic_power,
|
int dev_func_dynamic_power,
|
||||||
int dev_func_leakage_power,
|
int dev_func_leakage_power,
|
||||||
int dev_func_area,
|
int dev_func_area,
|
||||||
int dev_func_cycle_time,
|
int dev_func_cycle_time,
|
||||||
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
|
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
|
||||||
int temp,
|
int temp,
|
||||||
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
|
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
|
||||||
int data_arr_ram_cell_tech_flavor_in,
|
int data_arr_ram_cell_tech_flavor_in,
|
||||||
int data_arr_peri_global_tech_flavor_in,
|
int data_arr_peri_global_tech_flavor_in,
|
||||||
int tag_arr_ram_cell_tech_flavor_in,
|
int tag_arr_ram_cell_tech_flavor_in,
|
||||||
int tag_arr_peri_global_tech_flavor_in,
|
int tag_arr_peri_global_tech_flavor_in,
|
||||||
int interconnect_projection_type_in,
|
int interconnect_projection_type_in,
|
||||||
int wire_inside_mat_type_in,
|
int wire_inside_mat_type_in,
|
||||||
int wire_outside_mat_type_in,
|
int wire_outside_mat_type_in,
|
||||||
int REPEATERS_IN_HTREE_SEGMENTS_in,
|
int REPEATERS_IN_HTREE_SEGMENTS_in,
|
||||||
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
||||||
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
||||||
int PAGE_SIZE_BITS_in,
|
int PAGE_SIZE_BITS_in,
|
||||||
int BURST_LENGTH_in,
|
int BURST_LENGTH_in,
|
||||||
int INTERNAL_PREFETCH_WIDTH_in,
|
int INTERNAL_PREFETCH_WIDTH_in,
|
||||||
int force_wiretype,
|
int force_wiretype,
|
||||||
int wiretype,
|
int wiretype,
|
||||||
int force_config,
|
int force_config,
|
||||||
int ndwl,
|
int ndwl,
|
||||||
int ndbl,
|
int ndbl,
|
||||||
int nspd,
|
int nspd,
|
||||||
int ndcm,
|
int ndcm,
|
||||||
int ndsam1,
|
int ndsam1,
|
||||||
int ndsam2,
|
int ndsam2,
|
||||||
int ecc);
|
int ecc);
|
||||||
// int cache_size,
|
|
||||||
// int line_size,
|
|
||||||
// int associativity,
|
|
||||||
// int rw_ports,
|
|
||||||
// int excl_read_ports,
|
|
||||||
// int excl_write_ports,
|
|
||||||
// int single_ended_read_ports,
|
|
||||||
// int banks,
|
|
||||||
// double tech_node,
|
|
||||||
// int output_width,
|
|
||||||
// int specific_tag,
|
|
||||||
// int tag_width,
|
|
||||||
// int access_mode,
|
|
||||||
// int cache,
|
|
||||||
// int main_mem,
|
|
||||||
// int obj_func_delay,
|
|
||||||
// int obj_func_dynamic_power,
|
|
||||||
// int obj_func_leakage_power,
|
|
||||||
// int obj_func_area,
|
|
||||||
// int obj_func_cycle_time,
|
|
||||||
// int dev_func_delay,
|
|
||||||
// int dev_func_dynamic_power,
|
|
||||||
// int dev_func_leakage_power,
|
|
||||||
// int dev_func_area,
|
|
||||||
// int dev_func_cycle_time,
|
|
||||||
// int temp,
|
|
||||||
// int data_arr_ram_cell_tech_flavor_in,
|
|
||||||
// int data_arr_peri_global_tech_flavor_in,
|
|
||||||
// int tag_arr_ram_cell_tech_flavor_in,
|
|
||||||
// int tag_arr_peri_global_tech_flavor_in,
|
|
||||||
// int interconnect_projection_type_in,
|
|
||||||
// int wire_inside_mat_type_in,
|
|
||||||
// int wire_outside_mat_type_in,
|
|
||||||
// int REPEATERS_IN_HTREE_SEGMENTS_in,
|
|
||||||
// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
|
|
||||||
// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
|
|
||||||
//// double MAXAREACONSTRAINT_PERC_in,
|
|
||||||
//// double MAXACCTIMECONSTRAINT_PERC_in,
|
|
||||||
//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in,
|
|
||||||
// int PAGE_SIZE_BITS_in,
|
|
||||||
// int BURST_LENGTH_in,
|
|
||||||
// int INTERNAL_PREFETCH_WIDTH_in);
|
|
||||||
|
|
||||||
//Naveen's interface
|
//Naveen's interface
|
||||||
uca_org_t cacti_interface(
|
uca_org_t cacti_interface(
|
||||||
|
@ -542,91 +456,90 @@ uca_org_t cacti_interface(
|
||||||
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
|
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
|
||||||
int p_input);
|
int p_input);
|
||||||
|
|
||||||
class mem_array
|
class mem_array {
|
||||||
{
|
public:
|
||||||
public:
|
int Ndcm;
|
||||||
int Ndcm;
|
int Ndwl;
|
||||||
int Ndwl;
|
int Ndbl;
|
||||||
int Ndbl;
|
double Nspd;
|
||||||
double Nspd;
|
int deg_bl_muxing;
|
||||||
int deg_bl_muxing;
|
int Ndsam_lev_1;
|
||||||
int Ndsam_lev_1;
|
int Ndsam_lev_2;
|
||||||
int Ndsam_lev_2;
|
double access_time;
|
||||||
double access_time;
|
double cycle_time;
|
||||||
double cycle_time;
|
double multisubbank_interleave_cycle_time;
|
||||||
double multisubbank_interleave_cycle_time;
|
double area_ram_cells;
|
||||||
double area_ram_cells;
|
double area;
|
||||||
double area;
|
powerDef power;
|
||||||
powerDef power;
|
double delay_senseamp_mux_decoder;
|
||||||
double delay_senseamp_mux_decoder;
|
double delay_before_subarray_output_driver;
|
||||||
double delay_before_subarray_output_driver;
|
double delay_from_subarray_output_driver_to_output;
|
||||||
double delay_from_subarray_output_driver_to_output;
|
double height;
|
||||||
double height;
|
double width;
|
||||||
double width;
|
|
||||||
|
|
||||||
double mat_height;
|
double mat_height;
|
||||||
double mat_length;
|
double mat_length;
|
||||||
double subarray_length;
|
double subarray_length;
|
||||||
double subarray_height;
|
double subarray_height;
|
||||||
|
|
||||||
double delay_route_to_bank,
|
double delay_route_to_bank,
|
||||||
delay_input_htree,
|
delay_input_htree,
|
||||||
delay_row_predecode_driver_and_block,
|
delay_row_predecode_driver_and_block,
|
||||||
delay_row_decoder,
|
delay_row_decoder,
|
||||||
delay_bitlines,
|
delay_bitlines,
|
||||||
delay_sense_amp,
|
delay_sense_amp,
|
||||||
delay_subarray_output_driver,
|
delay_subarray_output_driver,
|
||||||
delay_dout_htree,
|
delay_dout_htree,
|
||||||
delay_comparator,
|
delay_comparator,
|
||||||
delay_matchlines;
|
delay_matchlines;
|
||||||
|
|
||||||
double all_banks_height,
|
double all_banks_height,
|
||||||
all_banks_width,
|
all_banks_width,
|
||||||
area_efficiency;
|
area_efficiency;
|
||||||
|
|
||||||
powerDef power_routing_to_bank;
|
powerDef power_routing_to_bank;
|
||||||
powerDef power_addr_input_htree;
|
powerDef power_addr_input_htree;
|
||||||
powerDef power_data_input_htree;
|
powerDef power_data_input_htree;
|
||||||
powerDef power_data_output_htree;
|
powerDef power_data_output_htree;
|
||||||
powerDef power_htree_in_search;
|
powerDef power_htree_in_search;
|
||||||
powerDef power_htree_out_search;
|
powerDef power_htree_out_search;
|
||||||
powerDef power_row_predecoder_drivers;
|
powerDef power_row_predecoder_drivers;
|
||||||
powerDef power_row_predecoder_blocks;
|
powerDef power_row_predecoder_blocks;
|
||||||
powerDef power_row_decoders;
|
powerDef power_row_decoders;
|
||||||
powerDef power_bit_mux_predecoder_drivers;
|
powerDef power_bit_mux_predecoder_drivers;
|
||||||
powerDef power_bit_mux_predecoder_blocks;
|
powerDef power_bit_mux_predecoder_blocks;
|
||||||
powerDef power_bit_mux_decoders;
|
powerDef power_bit_mux_decoders;
|
||||||
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
powerDef power_senseamp_mux_lev_1_predecoder_drivers;
|
||||||
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
powerDef power_senseamp_mux_lev_1_predecoder_blocks;
|
||||||
powerDef power_senseamp_mux_lev_1_decoders;
|
powerDef power_senseamp_mux_lev_1_decoders;
|
||||||
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
powerDef power_senseamp_mux_lev_2_predecoder_drivers;
|
||||||
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
powerDef power_senseamp_mux_lev_2_predecoder_blocks;
|
||||||
powerDef power_senseamp_mux_lev_2_decoders;
|
powerDef power_senseamp_mux_lev_2_decoders;
|
||||||
powerDef power_bitlines;
|
powerDef power_bitlines;
|
||||||
powerDef power_sense_amps;
|
powerDef power_sense_amps;
|
||||||
powerDef power_prechg_eq_drivers;
|
powerDef power_prechg_eq_drivers;
|
||||||
powerDef power_output_drivers_at_subarray;
|
powerDef power_output_drivers_at_subarray;
|
||||||
powerDef power_dataout_vertical_htree;
|
powerDef power_dataout_vertical_htree;
|
||||||
powerDef power_comparators;
|
powerDef power_comparators;
|
||||||
|
|
||||||
powerDef power_cam_bitline_precharge_eq_drv;
|
powerDef power_cam_bitline_precharge_eq_drv;
|
||||||
powerDef power_searchline;
|
powerDef power_searchline;
|
||||||
powerDef power_searchline_precharge;
|
powerDef power_searchline_precharge;
|
||||||
powerDef power_matchlines;
|
powerDef power_matchlines;
|
||||||
powerDef power_matchline_precharge;
|
powerDef power_matchline_precharge;
|
||||||
powerDef power_matchline_to_wordline_drv;
|
powerDef power_matchline_to_wordline_drv;
|
||||||
|
|
||||||
min_values_t *arr_min;
|
min_values_t *arr_min;
|
||||||
enum Wire_type wt;
|
enum Wire_type wt;
|
||||||
|
|
||||||
// dram stats
|
// dram stats
|
||||||
double activate_energy, read_energy, write_energy, precharge_energy,
|
double activate_energy, read_energy, write_energy, precharge_energy,
|
||||||
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
|
refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
|
||||||
leak_power_request_and_reply_networks;
|
leak_power_request_and_reply_networks;
|
||||||
|
|
||||||
double precharge_delay;
|
double precharge_delay;
|
||||||
|
|
||||||
static bool lt(const mem_array * m1, const mem_array * m2);
|
static bool lt(const mem_array * m1, const mem_array * m2);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -45,34 +46,30 @@ using namespace std;
|
||||||
|
|
||||||
|
|
||||||
Component::Component()
|
Component::Component()
|
||||||
:area(), power(), rt_power(),delay(0)
|
: area(), power(), rt_power(), delay(0) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Component::~Component()
|
Component::~Component() {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr)
|
double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) {
|
||||||
{
|
double w_poly = g_ip->F_sz_um;
|
||||||
double w_poly = g_ip->F_sz_um;
|
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
||||||
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
|
||||||
double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain
|
num_stacked_in * w_poly +
|
||||||
num_stacked_in * w_poly +
|
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||||
(num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
|
||||||
|
|
||||||
if (num_folded_tr > 1)
|
if (num_folded_tr > 1) {
|
||||||
{
|
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
|
||||||
total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly +
|
(num_folded_tr - 1) * num_stacked_in * w_poly +
|
||||||
(num_folded_tr - 1) * num_stacked_in * w_poly +
|
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
||||||
(num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return total_diff_w;
|
return total_diff_w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,105 +79,96 @@ double Component::compute_gate_area(
|
||||||
int num_inputs,
|
int num_inputs,
|
||||||
double w_pmos,
|
double w_pmos,
|
||||||
double w_nmos,
|
double w_nmos,
|
||||||
double h_gate)
|
double h_gate) {
|
||||||
{
|
if (w_pmos <= 0.0 || w_nmos <= 0.0) {
|
||||||
if (w_pmos <= 0.0 || w_nmos <= 0.0)
|
return 0.0;
|
||||||
{
|
}
|
||||||
return 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
double w_folded_pmos, w_folded_nmos;
|
double w_folded_pmos, w_folded_nmos;
|
||||||
int num_folded_pmos, num_folded_nmos;
|
int num_folded_pmos, num_folded_nmos;
|
||||||
double total_ndiff_w, total_pdiff_w;
|
double total_ndiff_w, total_pdiff_w;
|
||||||
Area gate;
|
Area gate;
|
||||||
|
|
||||||
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
|
double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL;
|
||||||
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
|
double ratio_p_to_n = w_pmos / (w_pmos + w_nmos);
|
||||||
|
|
||||||
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0)
|
if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) {
|
||||||
{
|
return 0.0;
|
||||||
return 0.0;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
|
w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n;
|
||||||
w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
|
w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n);
|
||||||
assert(w_folded_pmos > 0);
|
assert(w_folded_pmos > 0);
|
||||||
|
|
||||||
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
|
num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos));
|
||||||
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
|
num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos));
|
||||||
|
|
||||||
switch (gate_type)
|
switch (gate_type) {
|
||||||
{
|
|
||||||
case INV:
|
case INV:
|
||||||
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
|
total_ndiff_w = compute_diffusion_width(1, num_folded_nmos);
|
||||||
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
|
total_pdiff_w = compute_diffusion_width(1, num_folded_pmos);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NOR:
|
case NOR:
|
||||||
total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
|
total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos);
|
||||||
total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
|
total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NAND:
|
case NAND:
|
||||||
total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
|
total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos);
|
||||||
total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
|
total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cout << "Unknown gate type: " << gate_type << endl;
|
cout << "Unknown gate type: " << gate_type << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
gate.w = MAX(total_ndiff_w, total_pdiff_w);
|
gate.w = MAX(total_ndiff_w, total_pdiff_w);
|
||||||
|
|
||||||
if (w_folded_nmos > w_nmos)
|
if (w_folded_nmos > w_nmos) {
|
||||||
{
|
//means that the height of the gate can
|
||||||
//means that the height of the gate can
|
//be made smaller than the input height specified, so calculate the height of the gate.
|
||||||
//be made smaller than the input height specified, so calculate the height of the gate.
|
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
|
||||||
gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL;
|
} else {
|
||||||
}
|
gate.h = h_gate;
|
||||||
else
|
}
|
||||||
{
|
return gate.get_area();
|
||||||
gate.h = h_gate;
|
|
||||||
}
|
|
||||||
return gate.get_area();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double Component::compute_tr_width_after_folding(
|
double Component::compute_tr_width_after_folding(
|
||||||
double input_width,
|
double input_width,
|
||||||
double threshold_folding_width)
|
double threshold_folding_width) {
|
||||||
{//This is actually the width of the cell not the width of a device.
|
//This is actually the width of the cell not the width of a device.
|
||||||
//The width of a cell and the width of a device is orthogonal.
|
//The width of a cell and the width of a device is orthogonal.
|
||||||
if (input_width <= 0)
|
if (input_width <= 0) {
|
||||||
{
|
return 0;
|
||||||
return 0;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
|
int num_folded_tr = (int) (ceil(input_width / threshold_folding_width));
|
||||||
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact;
|
||||||
double width_poly = g_ip->F_sz_um;
|
double width_poly = g_ip->F_sz_um;
|
||||||
double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
|
double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly;
|
||||||
|
|
||||||
return total_diff_width;
|
return total_diff_width;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double Component::height_sense_amplifier(double pitch_sense_amp)
|
double Component::height_sense_amplifier(double pitch_sense_amp) {
|
||||||
{
|
// compute the height occupied by all PMOS transistors
|
||||||
// compute the height occupied by all PMOS transistors
|
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
|
||||||
double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 +
|
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
|
||||||
compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) +
|
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
||||||
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
|
||||||
|
|
||||||
// compute the height occupied by all NMOS transistors
|
// compute the height occupied by all NMOS transistors
|
||||||
double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
|
double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 +
|
||||||
compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
|
compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) +
|
||||||
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS;
|
||||||
|
|
||||||
// compute total height by considering gap between the p and n diffusion areas
|
// compute total height by considering gap between the p and n diffusion areas
|
||||||
return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
|
return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -195,42 +183,39 @@ int Component::logical_effort(
|
||||||
double p_to_n_sz_ratio,
|
double p_to_n_sz_ratio,
|
||||||
bool is_dram_,
|
bool is_dram_,
|
||||||
bool is_wl_tr_,
|
bool is_wl_tr_,
|
||||||
double max_w_nmos)
|
double max_w_nmos) {
|
||||||
{
|
int num_gates = (int) (log(F) / log(fopt));
|
||||||
int num_gates = (int) (log(F) / log(fopt));
|
|
||||||
|
|
||||||
// check if num_gates is odd. if so, add 1 to make it even
|
// check if num_gates is odd. if so, add 1 to make it even
|
||||||
num_gates+= (num_gates % 2) ? 1 : 0;
|
num_gates += (num_gates % 2) ? 1 : 0;
|
||||||
num_gates = MAX(num_gates, num_gates_min);
|
|
||||||
|
|
||||||
// recalculate the effective fanout of each stage
|
|
||||||
double f = pow(F, 1.0 / num_gates);
|
|
||||||
int i = num_gates - 1;
|
|
||||||
double C_in = C_load / f;
|
|
||||||
w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
|
|
||||||
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
|
|
||||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
|
||||||
|
|
||||||
if (w_n[i] > max_w_nmos)
|
|
||||||
{
|
|
||||||
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
|
|
||||||
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
|
|
||||||
num_gates = (int) (log(F) / log(fopt)) + 1;
|
|
||||||
num_gates+= (num_gates % 2) ? 1 : 0;
|
|
||||||
num_gates = MAX(num_gates, num_gates_min);
|
num_gates = MAX(num_gates, num_gates_min);
|
||||||
f = pow(F, 1.0 / (num_gates - 1));
|
|
||||||
i = num_gates - 1;
|
// recalculate the effective fanout of each stage
|
||||||
w_n[i] = max_w_nmos;
|
double f = pow(F, 1.0 / num_gates);
|
||||||
|
int i = num_gates - 1;
|
||||||
|
double C_in = C_load / f;
|
||||||
|
w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_);
|
||||||
|
w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_);
|
||||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||||
}
|
|
||||||
|
|
||||||
for (i = num_gates - 2; i >= 1; i--)
|
if (w_n[i] > max_w_nmos) {
|
||||||
{
|
double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_);
|
||||||
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
|
F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_);
|
||||||
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
num_gates = (int) (log(F) / log(fopt)) + 1;
|
||||||
}
|
num_gates += (num_gates % 2) ? 1 : 0;
|
||||||
|
num_gates = MAX(num_gates, num_gates_min);
|
||||||
|
f = pow(F, 1.0 / (num_gates - 1));
|
||||||
|
i = num_gates - 1;
|
||||||
|
w_n[i] = max_w_nmos;
|
||||||
|
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||||
|
}
|
||||||
|
|
||||||
assert(num_gates <= MAX_NUMBER_GATES_STAGE);
|
for (i = num_gates - 2; i >= 1; i--) {
|
||||||
return num_gates;
|
w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_);
|
||||||
|
w_p[i] = p_to_n_sz_ratio * w_n[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(num_gates <= MAX_NUMBER_GATES_STAGE);
|
||||||
|
return num_gates;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -42,41 +43,32 @@ using namespace std;
|
||||||
class Crossbar;
|
class Crossbar;
|
||||||
class Bank;
|
class Bank;
|
||||||
|
|
||||||
class Component
|
class Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Component();
|
Component();
|
||||||
~Component();
|
~Component();
|
||||||
|
|
||||||
Area area;
|
Area area;
|
||||||
powerDef power,rt_power;
|
// TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE
|
||||||
|
// VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS
|
||||||
|
// MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER
|
||||||
|
// CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS
|
||||||
|
powerDef power, rt_power;
|
||||||
double delay;
|
double delay;
|
||||||
double cycle_time;
|
double cycle_time;
|
||||||
|
|
||||||
double compute_gate_area(
|
double compute_gate_area(int gate_type, int num_inputs, double w_pmos,
|
||||||
int gate_type,
|
double w_nmos, double h_gate);
|
||||||
int num_inputs,
|
double compute_tr_width_after_folding(double input_width,
|
||||||
double w_pmos,
|
double threshold_folding_width);
|
||||||
double w_nmos,
|
|
||||||
double h_gate);
|
|
||||||
|
|
||||||
double compute_tr_width_after_folding(double input_width, double threshold_folding_width);
|
|
||||||
double height_sense_amplifier(double pitch_sense_amp);
|
double height_sense_amplifier(double pitch_sense_amp);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int logical_effort(
|
int logical_effort(int num_gates_min, double g, double F, double * w_n,
|
||||||
int num_gates_min,
|
double * w_p, double C_load, double p_to_n_sz_ratio,
|
||||||
double g,
|
bool is_dram_, bool is_wl_tr_, double max_w_nmos);
|
||||||
double F,
|
|
||||||
double * w_n,
|
|
||||||
double * w_p,
|
|
||||||
double C_load,
|
|
||||||
double p_to_n_sz_ratio,
|
|
||||||
bool is_dram_,
|
|
||||||
bool is_wl_tr_,
|
|
||||||
double max_w_nmos);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
|
double compute_diffusion_width(int num_stacked_in, int num_folded_tr);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -249,21 +250,20 @@ const double bit_to_byte = 8.0;
|
||||||
// v : vertical or velocity
|
// v : vertical or velocity
|
||||||
|
|
||||||
|
|
||||||
enum ram_cell_tech_type_num
|
enum ram_cell_tech_type_num {
|
||||||
{
|
itrs_hp = 0,
|
||||||
itrs_hp = 0,
|
itrs_lstp = 1,
|
||||||
itrs_lstp = 1,
|
itrs_lop = 2,
|
||||||
itrs_lop = 2,
|
lp_dram = 3,
|
||||||
lp_dram = 3,
|
comm_dram = 4
|
||||||
comm_dram = 4
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const double pppm[4] = {1,1,1,1};
|
const double pppm[4] = {1, 1, 1, 1};
|
||||||
const double pppm_lkg[4] = {0,1,1,0};
|
const double pppm_lkg[4] = {0, 1, 1, 0};
|
||||||
const double pppm_dyn[4] = {1,0,0,0};
|
const double pppm_dyn[4] = {1, 0, 0, 0};
|
||||||
const double pppm_Isub[4] = {0,1,0,0};
|
const double pppm_Isub[4] = {0, 1, 0, 0};
|
||||||
const double pppm_Ig[4] = {0,0,1,0};
|
const double pppm_Ig[4] = {0, 0, 1, 0};
|
||||||
const double pppm_sc[4] = {0,0,0,1};
|
const double pppm_sc[4] = {0, 0, 0, 1};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -39,123 +40,140 @@ Crossbar::Crossbar(
|
||||||
double n_out_,
|
double n_out_,
|
||||||
double flit_size_,
|
double flit_size_,
|
||||||
TechnologyParameter::DeviceType *dt
|
TechnologyParameter::DeviceType *dt
|
||||||
):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt)
|
): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) {
|
||||||
{
|
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
Vdd = dt->Vdd;
|
||||||
Vdd = dt->Vdd;
|
CB_ADJ = 1;
|
||||||
CB_ADJ = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Crossbar::~Crossbar(){}
|
Crossbar::~Crossbar() {}
|
||||||
|
|
||||||
double Crossbar::output_buffer()
|
double Crossbar::output_buffer() {
|
||||||
{
|
|
||||||
|
|
||||||
//Wire winit(4, 4);
|
//Wire winit(4, 4);
|
||||||
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
|
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
|
||||||
Wire w1(g_ip->wt, l_eff);
|
Wire w1(g_ip->wt, l_eff);
|
||||||
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
|
//double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing;
|
||||||
double s1 = w1.repeater_size * (l_eff <w1.repeater_spacing? l_eff *ADJ/w1.repeater_spacing : ADJ);
|
double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ?
|
||||||
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
l_eff * ADJ / w1.repeater_spacing : ADJ);
|
||||||
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
|
double pton_size = deviceType->n_to_p_eff_curr_drv_ratio;
|
||||||
TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size);
|
// the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor
|
||||||
TriS2 = s1; //driver transistor
|
TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size);
|
||||||
|
TriS2 = s1; //driver transistor
|
||||||
|
|
||||||
if (TriS1 < 1)
|
if (TriS1 < 1)
|
||||||
TriS1 = 1;
|
TriS1 = 1;
|
||||||
|
|
||||||
double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) +
|
double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) +
|
||||||
gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0);
|
gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0);
|
||||||
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
// input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||||
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||||
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
|
// gate_C(TriS2*g_tp.min_w_nmos_, 0)+
|
||||||
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
// drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
||||||
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
// drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||||
// gate_C(TriS2*min_w_pmos, 0);
|
// gate_C(TriS2*min_w_pmos, 0);
|
||||||
tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
||||||
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 +
|
drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 +
|
||||||
gate_C(TriS2*g_tp.min_w_nmos_, 0)+
|
gate_C(TriS2 * g_tp.min_w_nmos_, 0) +
|
||||||
drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 +
|
drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 +
|
||||||
drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) +
|
||||||
gate_C(TriS2*min_w_pmos, 0);
|
gate_C(TriS2 * min_w_pmos, 0);
|
||||||
double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) +
|
double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1,
|
||||||
drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
|
g_tp.cell_h_def) +
|
||||||
double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0);
|
drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def);
|
||||||
|
double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0);
|
||||||
|
|
||||||
tri_inp_cap = input_cap;
|
tri_inp_cap = input_cap;
|
||||||
tri_out_cap = output_cap;
|
tri_out_cap = output_cap;
|
||||||
tri_ctr_cap = ctr_cap;
|
tri_ctr_cap = ctr_cap;
|
||||||
return input_cap + output_cap + ctr_cap;
|
return input_cap + output_cap + ctr_cap;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Crossbar::compute_power()
|
void Crossbar::compute_power() {
|
||||||
{
|
|
||||||
|
|
||||||
Wire winit(4, 4);
|
Wire winit(4, 4);
|
||||||
double tri_cap = output_buffer();
|
double tri_cap = output_buffer();
|
||||||
assert(tri_cap > 0);
|
assert(tri_cap > 0);
|
||||||
//area of a tristate logic
|
//area of a tristate logic
|
||||||
double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def);
|
double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_,
|
||||||
g_area *= 2; // to model area of output transistors
|
TriS2 * min_w_pmos, g_tp.cell_h_def);
|
||||||
g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def);
|
g_area *= 2; // to model area of output transistors
|
||||||
g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def);
|
g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_,
|
||||||
double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def);
|
TriS1 * min_w_pmos, g_tp.cell_h_def);
|
||||||
// effective no. of tristate buffers that need to be laid side by side
|
g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_,
|
||||||
int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch));
|
TriS1 * 2 * min_w_pmos, g_tp.cell_h_def);
|
||||||
double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out);
|
double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def);
|
||||||
Wire w1(g_ip->wt, wire_len);
|
// effective no. of tristate buffers that need to be laid side by side
|
||||||
|
int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch));
|
||||||
|
double wire_len = MAX(width * ntri * n_out,
|
||||||
|
flit_size * g_tp.wire_outside_mat.pitch * n_out);
|
||||||
|
Wire w1(g_ip->wt, wire_len);
|
||||||
|
|
||||||
area.w = wire_len;
|
area.w = wire_len;
|
||||||
area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ;
|
area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ;
|
||||||
Wire w2(g_ip->wt, area.h);
|
Wire w2(g_ip->wt, area.h);
|
||||||
|
|
||||||
double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp);
|
double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp);
|
||||||
if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb;
|
if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb;
|
||||||
|
|
||||||
if (aspect_ratio_cb < ASPECT_THRESHOLD) {
|
if (aspect_ratio_cb < ASPECT_THRESHOLD) {
|
||||||
if (n_out > 2 && n_inp > 2) {
|
if (n_out > 2 && n_inp > 2) {
|
||||||
CB_ADJ+=0.2;
|
CB_ADJ += 0.2;
|
||||||
//cout << "CB ADJ " << CB_ADJ << endl;
|
//cout << "CB ADJ " << CB_ADJ << endl;
|
||||||
if (CB_ADJ < 4) {
|
if (CB_ADJ < 4) {
|
||||||
this->compute_power();
|
this->compute_power();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size;
|
power.readOp.dynamic =
|
||||||
power.readOp.leakage = n_inp * n_out * flit_size * (
|
(w1.power.readOp.dynamic + w2.power.readOp.dynamic +
|
||||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
(tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap +
|
||||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
tri_int_cap) * Vdd * Vdd) * flit_size;
|
||||||
cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
power.readOp.leakage = n_inp * n_out * flit_size * (
|
||||||
w1.power.readOp.leakage + w2.power.readOp.leakage);
|
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
|
||||||
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
|
1, inv) * Vdd +
|
||||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+
|
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+
|
2, nand) * Vdd +
|
||||||
cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+
|
cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||||
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
|
2, nor) * Vdd +
|
||||||
|
w1.power.readOp.leakage + w2.power.readOp.leakage);
|
||||||
|
power.readOp.gate_leakage = n_inp * n_out * flit_size * (
|
||||||
|
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2,
|
||||||
|
1, inv) * Vdd +
|
||||||
|
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||||
|
2, nand) * Vdd +
|
||||||
|
cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3,
|
||||||
|
2, nor) * Vdd +
|
||||||
|
w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage);
|
||||||
|
|
||||||
// delay calculation
|
// delay calculation
|
||||||
double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch;
|
double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch;
|
||||||
Wire wdriver(g_ip->wt, l_eff);
|
Wire wdriver(g_ip->wt, l_eff);
|
||||||
double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1);
|
double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) +
|
||||||
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap;
|
tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1);
|
||||||
delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE);
|
double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out *
|
||||||
|
tri_inp_cap + n_inp * tri_out_cap;
|
||||||
|
delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth /
|
||||||
|
deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE);
|
||||||
|
|
||||||
Wire wreset();
|
Wire wreset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Crossbar::print_crossbar()
|
void Crossbar::print_crossbar() {
|
||||||
{
|
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
|
||||||
cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n";
|
cout << "Flit size : " << flit_size << " bits" << endl;
|
||||||
cout << "Flit size : " << flit_size << " bits" << endl;
|
cout << "Width : " << area.w << " u" << endl;
|
||||||
cout << "Width : " << area.w << " u" << endl;
|
cout << "Height : " << area.h << " u" << endl;
|
||||||
cout << "Height : " << area.h << " u" << endl;
|
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 *
|
||||||
cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl;
|
MIN(n_inp, n_out) << " (nJ)" << endl;
|
||||||
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl;
|
cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)"
|
||||||
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl;
|
<< endl;
|
||||||
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
|
cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3
|
||||||
|
<< " (mW)" << endl;
|
||||||
|
cout << "Crossbar Delay : " << delay*1e12 << " ps\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -44,14 +45,13 @@
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
class Crossbar : public Component
|
class Crossbar : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Crossbar(
|
Crossbar(
|
||||||
double in,
|
double in,
|
||||||
double out,
|
double out,
|
||||||
double flit_sz,
|
double flit_sz,
|
||||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
||||||
~Crossbar();
|
~Crossbar();
|
||||||
|
|
||||||
void print_crossbar();
|
void print_crossbar();
|
||||||
|
@ -62,18 +62,18 @@ class Crossbar : public Component
|
||||||
double flit_size;
|
double flit_size;
|
||||||
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
|
double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double CB_ADJ;
|
double CB_ADJ;
|
||||||
/*
|
/*
|
||||||
* Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
|
* Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar
|
||||||
* buffer is adjusted to get an aspect ratio of whole cross bar close to one;
|
* buffer is adjusted to get an aspect ratio of whole cross bar close to one;
|
||||||
* when adjust the ratio, the number of wires route over the tri-state buffers does not change,
|
* when adjust the ratio, the number of wires route over the tri-state buffers does not change,
|
||||||
* however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
|
* however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase
|
||||||
* during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
|
* during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch
|
||||||
* will increase. As a result, the height of the crossbar (area.h) will increase.
|
* will increase. As a result, the height of the crossbar (area.h) will increase.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TechnologyParameter::DeviceType *deviceType;
|
TechnologyParameter::DeviceType *deviceType;
|
||||||
double TriS1, TriS2;
|
double TriS1, TriS2;
|
||||||
double min_w_pmos, Vdd;
|
double min_w_pmos, Vdd;
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -42,9 +43,8 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
class Decoder : public Component
|
class Decoder : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Decoder(
|
Decoder(
|
||||||
int _num_dec_signals,
|
int _num_dec_signals,
|
||||||
bool flag_way_select,
|
bool flag_way_select,
|
||||||
|
@ -80,125 +80,120 @@ class Decoder : public Component
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PredecBlk : public Component
|
class PredecBlk : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
PredecBlk(
|
||||||
PredecBlk(
|
int num_dec_signals,
|
||||||
int num_dec_signals,
|
Decoder * dec,
|
||||||
Decoder * dec,
|
double C_wire_predec_blk_out,
|
||||||
double C_wire_predec_blk_out,
|
double R_wire_predec_blk_out,
|
||||||
double R_wire_predec_blk_out,
|
int num_dec_per_predec,
|
||||||
int num_dec_per_predec,
|
bool is_dram_,
|
||||||
bool is_dram_,
|
bool is_blk1);
|
||||||
bool is_blk1);
|
|
||||||
|
|
||||||
Decoder * dec;
|
Decoder * dec;
|
||||||
bool exist;
|
bool exist;
|
||||||
int number_input_addr_bits;
|
int number_input_addr_bits;
|
||||||
double C_ld_predec_blk_out;
|
double C_ld_predec_blk_out;
|
||||||
double R_wire_predec_blk_out;
|
double R_wire_predec_blk_out;
|
||||||
int branch_effort_nand2_gate_output;
|
int branch_effort_nand2_gate_output;
|
||||||
int branch_effort_nand3_gate_output;
|
int branch_effort_nand3_gate_output;
|
||||||
bool flag_two_unique_paths;
|
bool flag_two_unique_paths;
|
||||||
int flag_L2_gate;
|
int flag_L2_gate;
|
||||||
int number_inputs_L1_gate;
|
int number_inputs_L1_gate;
|
||||||
int number_gates_L1_nand2_path;
|
int number_gates_L1_nand2_path;
|
||||||
int number_gates_L1_nand3_path;
|
int number_gates_L1_nand3_path;
|
||||||
int number_gates_L2;
|
int number_gates_L2;
|
||||||
int min_number_gates_L1;
|
int min_number_gates_L1;
|
||||||
int min_number_gates_L2;
|
int min_number_gates_L2;
|
||||||
int num_L1_active_nand2_path;
|
int num_L1_active_nand2_path;
|
||||||
int num_L1_active_nand3_path;
|
int num_L1_active_nand3_path;
|
||||||
double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
|
double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
|
double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
|
double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
|
double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double w_L2_n[MAX_NUMBER_GATES_STAGE];
|
double w_L2_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double w_L2_p[MAX_NUMBER_GATES_STAGE];
|
double w_L2_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double delay_nand2_path;
|
double delay_nand2_path;
|
||||||
double delay_nand3_path;
|
double delay_nand3_path;
|
||||||
powerDef power_nand2_path;
|
powerDef power_nand2_path;
|
||||||
powerDef power_nand3_path;
|
powerDef power_nand3_path;
|
||||||
powerDef power_L2;
|
powerDef power_L2;
|
||||||
|
|
||||||
bool is_dram_;
|
bool is_dram_;
|
||||||
|
|
||||||
void compute_widths();
|
void compute_widths();
|
||||||
void compute_area();
|
void compute_area();
|
||||||
|
|
||||||
void leakage_feedback(double temperature);
|
void leakage_feedback(double temperature);
|
||||||
|
|
||||||
pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
|
pair<double, double> compute_delays(pair<double, double> inrisetime); // <nand2, nand3>
|
||||||
// return <outrise_nand2, outrise_nand3>
|
// return <outrise_nand2, outrise_nand3>
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class PredecBlkDrv : public Component
|
class PredecBlkDrv : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
PredecBlkDrv(
|
||||||
PredecBlkDrv(
|
int way_select,
|
||||||
int way_select,
|
PredecBlk * blk_,
|
||||||
PredecBlk * blk_,
|
bool is_dram);
|
||||||
bool is_dram);
|
|
||||||
|
|
||||||
int flag_driver_exists;
|
int flag_driver_exists;
|
||||||
int number_input_addr_bits;
|
int number_input_addr_bits;
|
||||||
int number_gates_nand2_path;
|
int number_gates_nand2_path;
|
||||||
int number_gates_nand3_path;
|
int number_gates_nand3_path;
|
||||||
int min_number_gates;
|
int min_number_gates;
|
||||||
int num_buffers_driving_1_nand2_load;
|
int num_buffers_driving_1_nand2_load;
|
||||||
int num_buffers_driving_2_nand2_load;
|
int num_buffers_driving_2_nand2_load;
|
||||||
int num_buffers_driving_4_nand2_load;
|
int num_buffers_driving_4_nand2_load;
|
||||||
int num_buffers_driving_2_nand3_load;
|
int num_buffers_driving_2_nand3_load;
|
||||||
int num_buffers_driving_8_nand3_load;
|
int num_buffers_driving_8_nand3_load;
|
||||||
int num_buffers_nand3_path;
|
int num_buffers_nand3_path;
|
||||||
double c_load_nand2_path_out;
|
double c_load_nand2_path_out;
|
||||||
double c_load_nand3_path_out;
|
double c_load_nand3_path_out;
|
||||||
double r_load_nand2_path_out;
|
double r_load_nand2_path_out;
|
||||||
double r_load_nand3_path_out;
|
double r_load_nand3_path_out;
|
||||||
double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
|
double width_nand2_path_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
|
double width_nand2_path_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
|
double width_nand3_path_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
|
double width_nand3_path_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double delay_nand2_path;
|
double delay_nand2_path;
|
||||||
double delay_nand3_path;
|
double delay_nand3_path;
|
||||||
powerDef power_nand2_path;
|
powerDef power_nand2_path;
|
||||||
powerDef power_nand3_path;
|
powerDef power_nand3_path;
|
||||||
|
|
||||||
PredecBlk * blk;
|
PredecBlk * blk;
|
||||||
Decoder * dec;
|
Decoder * dec;
|
||||||
bool is_dram_;
|
bool is_dram_;
|
||||||
int way_select;
|
int way_select;
|
||||||
|
|
||||||
void compute_widths();
|
void compute_widths();
|
||||||
void compute_area();
|
void compute_area();
|
||||||
|
|
||||||
void leakage_feedback(double temperature);
|
void leakage_feedback(double temperature);
|
||||||
|
|
||||||
|
|
||||||
pair<double, double> compute_delays(
|
pair<double, double> compute_delays(
|
||||||
double inrisetime_nand2_path,
|
double inrisetime_nand2_path,
|
||||||
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
|
double inrisetime_nand3_path); // return <outrise_nand2, outrise_nand3>
|
||||||
|
|
||||||
inline int num_addr_bits_nand2_path()
|
inline int num_addr_bits_nand2_path() {
|
||||||
{
|
return num_buffers_driving_1_nand2_load +
|
||||||
return num_buffers_driving_1_nand2_load +
|
num_buffers_driving_2_nand2_load +
|
||||||
num_buffers_driving_2_nand2_load +
|
num_buffers_driving_4_nand2_load;
|
||||||
num_buffers_driving_4_nand2_load;
|
}
|
||||||
}
|
inline int num_addr_bits_nand3_path() {
|
||||||
inline int num_addr_bits_nand3_path()
|
return num_buffers_driving_2_nand3_load +
|
||||||
{
|
num_buffers_driving_8_nand3_load;
|
||||||
return num_buffers_driving_2_nand3_load +
|
}
|
||||||
num_buffers_driving_8_nand3_load;
|
double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
|
||||||
}
|
|
||||||
double get_rdOp_dynamic_E(int num_act_mats_hor_dir);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Predec : public Component
|
class Predec : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Predec(
|
Predec(
|
||||||
PredecBlkDrv * drv1,
|
PredecBlkDrv * drv1,
|
||||||
PredecBlkDrv * drv2);
|
PredecBlkDrv * drv2);
|
||||||
|
@ -214,7 +209,7 @@ class Predec : public Component
|
||||||
powerDef block_power;
|
powerDef block_power;
|
||||||
powerDef driver_power;
|
powerDef driver_power;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// returns <delay, risetime>
|
// returns <delay, risetime>
|
||||||
pair<double, double> get_max_delay_before_decoder(
|
pair<double, double> get_max_delay_before_decoder(
|
||||||
pair<double, double> input_pair1,
|
pair<double, double> input_pair1,
|
||||||
|
@ -223,24 +218,23 @@ class Predec : public Component
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Driver : public Component
|
class Driver : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
|
||||||
Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram);
|
|
||||||
|
|
||||||
int number_gates;
|
int number_gates;
|
||||||
int min_number_gates;
|
int min_number_gates;
|
||||||
double width_n[MAX_NUMBER_GATES_STAGE];
|
double width_n[MAX_NUMBER_GATES_STAGE];
|
||||||
double width_p[MAX_NUMBER_GATES_STAGE];
|
double width_p[MAX_NUMBER_GATES_STAGE];
|
||||||
double c_gate_load;
|
double c_gate_load;
|
||||||
double c_wire_load;
|
double c_wire_load;
|
||||||
double r_wire_load;
|
double r_wire_load;
|
||||||
double delay;
|
double delay;
|
||||||
powerDef power;
|
powerDef power;
|
||||||
bool is_dram_;
|
bool is_dram_;
|
||||||
|
|
||||||
void compute_widths();
|
void compute_widths();
|
||||||
double compute_delay(double inrisetime);
|
double compute_delay(double inrisetime);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -44,13 +45,12 @@
|
||||||
// leakge power includes entire htree in a bank (when uca_tree == false)
|
// leakge power includes entire htree in a bank (when uca_tree == false)
|
||||||
// leakge power includes only part to one bank when uca_tree == true
|
// leakge power includes only part to one bank when uca_tree == true
|
||||||
|
|
||||||
class Htree2 : public Component
|
class Htree2 : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Htree2(enum Wire_type wire_model,
|
Htree2(enum Wire_type wire_model,
|
||||||
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
|
double mat_w, double mat_h, int add, int data_in, int search_data_in, int data_out, int search_data_out, int bl, int wl,
|
||||||
enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
|
enum Htree_type h_type, bool uca_tree_ = false, bool search_tree_ = false,
|
||||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
||||||
~Htree2() {};
|
~Htree2() {};
|
||||||
|
|
||||||
void in_htree();
|
void in_htree();
|
||||||
|
@ -64,16 +64,15 @@ class Htree2 : public Component
|
||||||
|
|
||||||
double in_rise_time, out_rise_time;
|
double in_rise_time, out_rise_time;
|
||||||
|
|
||||||
void set_in_rise_time(double rt)
|
void set_in_rise_time(double rt) {
|
||||||
{
|
in_rise_time = rt;
|
||||||
in_rise_time = rt;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double max_unpipelined_link_delay;
|
double max_unpipelined_link_delay;
|
||||||
powerDef power_bit;
|
powerDef power_bit;
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double wire_bw;
|
double wire_bw;
|
||||||
double init_wire_bw; // bus width at root
|
double init_wire_bw; // bus width at root
|
||||||
enum Htree_type tree_type;
|
enum Htree_type tree_type;
|
||||||
|
@ -81,7 +80,11 @@ class Htree2 : public Component
|
||||||
double htree_vnodes;
|
double htree_vnodes;
|
||||||
double mat_width;
|
double mat_width;
|
||||||
double mat_height;
|
double mat_height;
|
||||||
int add_bits, data_in_bits,search_data_in_bits,data_out_bits, search_data_out_bits;
|
int add_bits;
|
||||||
|
int data_in_bits;
|
||||||
|
int search_data_in_bits;
|
||||||
|
int data_out_bits;
|
||||||
|
int search_data_out_bits;
|
||||||
int ndbl, ndwl;
|
int ndbl, ndwl;
|
||||||
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
|
bool uca_tree; // should have full bandwidth to access all banks in the array simultaneously
|
||||||
bool search_tree;
|
bool search_tree;
|
||||||
|
|
File diff suppressed because it is too large
Load diff
2878
ext/mcpat/cacti/mat.cc
Executable file → Normal file
2878
ext/mcpat/cacti/mat.cc
Executable file → Normal file
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -39,9 +40,8 @@
|
||||||
#include "subarray.h"
|
#include "subarray.h"
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
class Mat : public Component
|
class Mat : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Mat(const DynamicParameter & dyn_p);
|
Mat(const DynamicParameter & dyn_p);
|
||||||
~Mat();
|
~Mat();
|
||||||
double compute_delays(double inrisetime); // return outrisetime
|
double compute_delays(double inrisetime); // return outrisetime
|
||||||
|
@ -106,8 +106,8 @@ class Mat : public Component
|
||||||
int deg_bl_muxing;
|
int deg_bl_muxing;
|
||||||
int num_act_mats_hor_dir;
|
int num_act_mats_hor_dir;
|
||||||
double delay_writeback;
|
double delay_writeback;
|
||||||
Area cell,cam_cell;
|
Area cell, cam_cell;
|
||||||
bool is_dram,is_fa, pure_cam, camFlag;
|
bool is_dram, is_fa, pure_cam, camFlag;
|
||||||
int num_mats;
|
int num_mats;
|
||||||
powerDef power_sa;
|
powerDef power_sa;
|
||||||
double delay_sa;
|
double delay_sa;
|
||||||
|
@ -127,7 +127,7 @@ class Mat : public Component
|
||||||
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
|
uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
|
double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
|
||||||
double width_write_driver_or_write_mux();
|
double width_write_driver_or_write_mux();
|
||||||
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
|
double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w);
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -46,8 +47,8 @@
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
class nuca_org_t {
|
class nuca_org_t {
|
||||||
public:
|
public:
|
||||||
~nuca_org_t();
|
~nuca_org_t();
|
||||||
// int size;
|
// int size;
|
||||||
/* area, power, access time, and cycle time stats */
|
/* area, power, access time, and cycle time stats */
|
||||||
Component nuca_pda;
|
Component nuca_pda;
|
||||||
|
@ -71,9 +72,8 @@ class nuca_org_t {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Nuca : public Component
|
class Nuca : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Nuca(
|
Nuca(
|
||||||
TechnologyParameter::DeviceType *dt);
|
TechnologyParameter::DeviceType *dt);
|
||||||
void print_router();
|
void print_router();
|
||||||
|
@ -87,12 +87,12 @@ class Nuca : public Component
|
||||||
void print_nuca(nuca_org_t *n);
|
void print_nuca(nuca_org_t *n);
|
||||||
void print_cont_stats();
|
void print_cont_stats();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
TechnologyParameter::DeviceType *deviceType;
|
TechnologyParameter::DeviceType *deviceType;
|
||||||
int wt_min, wt_max;
|
int wt_min, wt_max;
|
||||||
Wire *wire_vertical[WIRE_TYPES],
|
Wire *wire_vertical[WIRE_TYPES],
|
||||||
*wire_horizontal[WIRE_TYPES];
|
*wire_horizontal[WIRE_TYPES];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -40,251 +41,242 @@
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
|
|
||||||
// parameters which are functions of certain device technology
|
// parameters which are functions of certain device technology
|
||||||
class TechnologyParameter
|
class TechnologyParameter {
|
||||||
{
|
public:
|
||||||
public:
|
class DeviceType {
|
||||||
class DeviceType
|
public:
|
||||||
{
|
double C_g_ideal;
|
||||||
public:
|
double C_fringe;
|
||||||
double C_g_ideal;
|
double C_overlap;
|
||||||
double C_fringe;
|
double C_junc; // C_junc_area
|
||||||
double C_overlap;
|
double C_junc_sidewall;
|
||||||
double C_junc; // C_junc_area
|
double l_phy;
|
||||||
double C_junc_sidewall;
|
double l_elec;
|
||||||
double l_phy;
|
double R_nch_on;
|
||||||
double l_elec;
|
double R_pch_on;
|
||||||
double R_nch_on;
|
double Vdd;
|
||||||
double R_pch_on;
|
double Vth;
|
||||||
double Vdd;
|
double I_on_n;
|
||||||
double Vth;
|
double I_on_p;
|
||||||
double I_on_n;
|
double I_off_n;
|
||||||
double I_on_p;
|
double I_off_p;
|
||||||
double I_off_n;
|
double I_g_on_n;
|
||||||
double I_off_p;
|
double I_g_on_p;
|
||||||
double I_g_on_n;
|
double C_ox;
|
||||||
double I_g_on_p;
|
double t_ox;
|
||||||
double C_ox;
|
double n_to_p_eff_curr_drv_ratio;
|
||||||
double t_ox;
|
double long_channel_leakage_reduction;
|
||||||
double n_to_p_eff_curr_drv_ratio;
|
|
||||||
double long_channel_leakage_reduction;
|
|
||||||
|
|
||||||
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
|
DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0),
|
||||||
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
|
C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0),
|
||||||
Vdd(0), Vth(0),
|
Vdd(0), Vth(0),
|
||||||
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0),
|
I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0),
|
||||||
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { };
|
I_g_on_p(0),
|
||||||
void reset()
|
C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0),
|
||||||
{
|
long_channel_leakage_reduction(0) { };
|
||||||
C_g_ideal = 0;
|
void reset() {
|
||||||
C_fringe = 0;
|
C_g_ideal = 0;
|
||||||
C_overlap = 0;
|
C_fringe = 0;
|
||||||
C_junc = 0;
|
C_overlap = 0;
|
||||||
l_phy = 0;
|
C_junc = 0;
|
||||||
l_elec = 0;
|
l_phy = 0;
|
||||||
R_nch_on = 0;
|
l_elec = 0;
|
||||||
R_pch_on = 0;
|
R_nch_on = 0;
|
||||||
Vdd = 0;
|
R_pch_on = 0;
|
||||||
Vth = 0;
|
Vdd = 0;
|
||||||
I_on_n = 0;
|
Vth = 0;
|
||||||
I_on_p = 0;
|
I_on_n = 0;
|
||||||
I_off_n = 0;
|
I_on_p = 0;
|
||||||
I_off_p = 0;
|
I_off_n = 0;
|
||||||
I_g_on_n = 0;
|
I_off_p = 0;
|
||||||
I_g_on_p = 0;
|
I_g_on_n = 0;
|
||||||
C_ox = 0;
|
I_g_on_p = 0;
|
||||||
t_ox = 0;
|
C_ox = 0;
|
||||||
n_to_p_eff_curr_drv_ratio = 0;
|
t_ox = 0;
|
||||||
long_channel_leakage_reduction = 0;
|
n_to_p_eff_curr_drv_ratio = 0;
|
||||||
}
|
long_channel_leakage_reduction = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void display(uint32_t indent = 0);
|
||||||
|
};
|
||||||
|
class InterconnectType {
|
||||||
|
public:
|
||||||
|
double pitch;
|
||||||
|
double R_per_um;
|
||||||
|
double C_per_um;
|
||||||
|
double horiz_dielectric_constant;
|
||||||
|
double vert_dielectric_constant;
|
||||||
|
double aspect_ratio;
|
||||||
|
double miller_value;
|
||||||
|
double ild_thickness;
|
||||||
|
|
||||||
|
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
pitch = 0;
|
||||||
|
R_per_um = 0;
|
||||||
|
C_per_um = 0;
|
||||||
|
horiz_dielectric_constant = 0;
|
||||||
|
vert_dielectric_constant = 0;
|
||||||
|
aspect_ratio = 0;
|
||||||
|
miller_value = 0;
|
||||||
|
ild_thickness = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void display(uint32_t indent = 0);
|
||||||
|
};
|
||||||
|
class MemoryType {
|
||||||
|
public:
|
||||||
|
double b_w;
|
||||||
|
double b_h;
|
||||||
|
double cell_a_w;
|
||||||
|
double cell_pmos_w;
|
||||||
|
double cell_nmos_w;
|
||||||
|
double Vbitpre;
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
b_w = 0;
|
||||||
|
b_h = 0;
|
||||||
|
cell_a_w = 0;
|
||||||
|
cell_pmos_w = 0;
|
||||||
|
cell_nmos_w = 0;
|
||||||
|
Vbitpre = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void display(uint32_t indent = 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
class ScalingFactor {
|
||||||
|
public:
|
||||||
|
double logic_scaling_co_eff;
|
||||||
|
double core_tx_density;
|
||||||
|
double long_channel_leakage_reduction;
|
||||||
|
|
||||||
|
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
|
||||||
|
long_channel_leakage_reduction(0) { };
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
logic_scaling_co_eff = 0;
|
||||||
|
core_tx_density = 0;
|
||||||
|
long_channel_leakage_reduction = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void display(uint32_t indent = 0);
|
||||||
|
};
|
||||||
|
|
||||||
|
double ram_wl_stitching_overhead_;
|
||||||
|
double min_w_nmos_;
|
||||||
|
double max_w_nmos_;
|
||||||
|
double max_w_nmos_dec;
|
||||||
|
double unit_len_wire_del;
|
||||||
|
double FO4;
|
||||||
|
double kinv;
|
||||||
|
double vpp;
|
||||||
|
double w_sense_en;
|
||||||
|
double w_sense_n;
|
||||||
|
double w_sense_p;
|
||||||
|
double sense_delay;
|
||||||
|
double sense_dy_power;
|
||||||
|
double w_iso;
|
||||||
|
double w_poly_contact;
|
||||||
|
double spacing_poly_to_poly;
|
||||||
|
double spacing_poly_to_contact;
|
||||||
|
|
||||||
|
double w_comp_inv_p1;
|
||||||
|
double w_comp_inv_p2;
|
||||||
|
double w_comp_inv_p3;
|
||||||
|
double w_comp_inv_n1;
|
||||||
|
double w_comp_inv_n2;
|
||||||
|
double w_comp_inv_n3;
|
||||||
|
double w_eval_inv_p;
|
||||||
|
double w_eval_inv_n;
|
||||||
|
double w_comp_n;
|
||||||
|
double w_comp_p;
|
||||||
|
|
||||||
|
double dram_cell_I_on;
|
||||||
|
double dram_cell_Vdd;
|
||||||
|
double dram_cell_I_off_worst_case_len_temp;
|
||||||
|
double dram_cell_C;
|
||||||
|
double gm_sense_amp_latch;
|
||||||
|
|
||||||
|
double w_nmos_b_mux;
|
||||||
|
double w_nmos_sa_mux;
|
||||||
|
double w_pmos_bl_precharge;
|
||||||
|
double w_pmos_bl_eq;
|
||||||
|
double MIN_GAP_BET_P_AND_N_DIFFS;
|
||||||
|
double MIN_GAP_BET_SAME_TYPE_DIFFS;
|
||||||
|
double HPOWERRAIL;
|
||||||
|
double cell_h_def;
|
||||||
|
|
||||||
|
double chip_layout_overhead;
|
||||||
|
double macro_layout_overhead;
|
||||||
|
double sckt_co_eff;
|
||||||
|
|
||||||
|
double fringe_cap;
|
||||||
|
|
||||||
|
uint64_t h_dec;
|
||||||
|
|
||||||
|
DeviceType sram_cell; // SRAM cell transistor
|
||||||
|
DeviceType dram_acc; // DRAM access transistor
|
||||||
|
DeviceType dram_wl; // DRAM wordline transistor
|
||||||
|
DeviceType peri_global; // peripheral global
|
||||||
|
DeviceType cam_cell; // SRAM cell transistor
|
||||||
|
|
||||||
|
InterconnectType wire_local;
|
||||||
|
InterconnectType wire_inside_mat;
|
||||||
|
InterconnectType wire_outside_mat;
|
||||||
|
|
||||||
|
ScalingFactor scaling_factor;
|
||||||
|
|
||||||
|
MemoryType sram;
|
||||||
|
MemoryType dram;
|
||||||
|
MemoryType cam;
|
||||||
|
|
||||||
void display(uint32_t indent = 0);
|
void display(uint32_t indent = 0);
|
||||||
};
|
|
||||||
class InterconnectType
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
double pitch;
|
|
||||||
double R_per_um;
|
|
||||||
double C_per_um;
|
|
||||||
double horiz_dielectric_constant;
|
|
||||||
double vert_dielectric_constant;
|
|
||||||
double aspect_ratio;
|
|
||||||
double miller_value;
|
|
||||||
double ild_thickness;
|
|
||||||
|
|
||||||
InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { };
|
void reset() {
|
||||||
|
dram_cell_Vdd = 0;
|
||||||
|
dram_cell_I_on = 0;
|
||||||
|
dram_cell_C = 0;
|
||||||
|
vpp = 0;
|
||||||
|
|
||||||
void reset()
|
sense_delay = 0;
|
||||||
{
|
sense_dy_power = 0;
|
||||||
pitch = 0;
|
fringe_cap = 0;
|
||||||
R_per_um = 0;
|
|
||||||
C_per_um = 0;
|
|
||||||
horiz_dielectric_constant = 0;
|
|
||||||
vert_dielectric_constant = 0;
|
|
||||||
aspect_ratio = 0;
|
|
||||||
miller_value = 0;
|
|
||||||
ild_thickness = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void display(uint32_t indent = 0);
|
|
||||||
};
|
|
||||||
class MemoryType
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
double b_w;
|
|
||||||
double b_h;
|
|
||||||
double cell_a_w;
|
|
||||||
double cell_pmos_w;
|
|
||||||
double cell_nmos_w;
|
|
||||||
double Vbitpre;
|
|
||||||
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
b_w = 0;
|
|
||||||
b_h = 0;
|
|
||||||
cell_a_w = 0;
|
|
||||||
cell_pmos_w = 0;
|
|
||||||
cell_nmos_w = 0;
|
|
||||||
Vbitpre = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void display(uint32_t indent = 0);
|
|
||||||
};
|
|
||||||
|
|
||||||
class ScalingFactor
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
double logic_scaling_co_eff;
|
|
||||||
double core_tx_density;
|
|
||||||
double long_channel_leakage_reduction;
|
|
||||||
|
|
||||||
ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0),
|
|
||||||
long_channel_leakage_reduction(0) { };
|
|
||||||
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
logic_scaling_co_eff= 0;
|
|
||||||
core_tx_density = 0;
|
|
||||||
long_channel_leakage_reduction= 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void display(uint32_t indent = 0);
|
|
||||||
};
|
|
||||||
|
|
||||||
double ram_wl_stitching_overhead_;
|
|
||||||
double min_w_nmos_;
|
|
||||||
double max_w_nmos_;
|
|
||||||
double max_w_nmos_dec;
|
|
||||||
double unit_len_wire_del;
|
|
||||||
double FO4;
|
|
||||||
double kinv;
|
|
||||||
double vpp;
|
|
||||||
double w_sense_en;
|
|
||||||
double w_sense_n;
|
|
||||||
double w_sense_p;
|
|
||||||
double sense_delay;
|
|
||||||
double sense_dy_power;
|
|
||||||
double w_iso;
|
|
||||||
double w_poly_contact;
|
|
||||||
double spacing_poly_to_poly;
|
|
||||||
double spacing_poly_to_contact;
|
|
||||||
|
|
||||||
double w_comp_inv_p1;
|
|
||||||
double w_comp_inv_p2;
|
|
||||||
double w_comp_inv_p3;
|
|
||||||
double w_comp_inv_n1;
|
|
||||||
double w_comp_inv_n2;
|
|
||||||
double w_comp_inv_n3;
|
|
||||||
double w_eval_inv_p;
|
|
||||||
double w_eval_inv_n;
|
|
||||||
double w_comp_n;
|
|
||||||
double w_comp_p;
|
|
||||||
|
|
||||||
double dram_cell_I_on;
|
|
||||||
double dram_cell_Vdd;
|
|
||||||
double dram_cell_I_off_worst_case_len_temp;
|
|
||||||
double dram_cell_C;
|
|
||||||
double gm_sense_amp_latch;
|
|
||||||
|
|
||||||
double w_nmos_b_mux;
|
|
||||||
double w_nmos_sa_mux;
|
|
||||||
double w_pmos_bl_precharge;
|
|
||||||
double w_pmos_bl_eq;
|
|
||||||
double MIN_GAP_BET_P_AND_N_DIFFS;
|
|
||||||
double MIN_GAP_BET_SAME_TYPE_DIFFS;
|
|
||||||
double HPOWERRAIL;
|
|
||||||
double cell_h_def;
|
|
||||||
|
|
||||||
double chip_layout_overhead;
|
|
||||||
double macro_layout_overhead;
|
|
||||||
double sckt_co_eff;
|
|
||||||
|
|
||||||
double fringe_cap;
|
|
||||||
|
|
||||||
uint64_t h_dec;
|
|
||||||
|
|
||||||
DeviceType sram_cell; // SRAM cell transistor
|
|
||||||
DeviceType dram_acc; // DRAM access transistor
|
|
||||||
DeviceType dram_wl; // DRAM wordline transistor
|
|
||||||
DeviceType peri_global; // peripheral global
|
|
||||||
DeviceType cam_cell; // SRAM cell transistor
|
|
||||||
|
|
||||||
InterconnectType wire_local;
|
|
||||||
InterconnectType wire_inside_mat;
|
|
||||||
InterconnectType wire_outside_mat;
|
|
||||||
|
|
||||||
ScalingFactor scaling_factor;
|
|
||||||
|
|
||||||
MemoryType sram;
|
|
||||||
MemoryType dram;
|
|
||||||
MemoryType cam;
|
|
||||||
|
|
||||||
void display(uint32_t indent = 0);
|
|
||||||
|
|
||||||
void reset()
|
|
||||||
{
|
|
||||||
dram_cell_Vdd = 0;
|
|
||||||
dram_cell_I_on = 0;
|
|
||||||
dram_cell_C = 0;
|
|
||||||
vpp = 0;
|
|
||||||
|
|
||||||
sense_delay = 0;
|
|
||||||
sense_dy_power = 0;
|
|
||||||
fringe_cap = 0;
|
|
||||||
// horiz_dielectric_constant = 0;
|
// horiz_dielectric_constant = 0;
|
||||||
// vert_dielectric_constant = 0;
|
// vert_dielectric_constant = 0;
|
||||||
// aspect_ratio = 0;
|
// aspect_ratio = 0;
|
||||||
// miller_value = 0;
|
// miller_value = 0;
|
||||||
// ild_thickness = 0;
|
// ild_thickness = 0;
|
||||||
|
|
||||||
dram_cell_I_off_worst_case_len_temp = 0;
|
dram_cell_I_off_worst_case_len_temp = 0;
|
||||||
|
|
||||||
sram_cell.reset();
|
sram_cell.reset();
|
||||||
dram_acc.reset();
|
dram_acc.reset();
|
||||||
dram_wl.reset();
|
dram_wl.reset();
|
||||||
peri_global.reset();
|
peri_global.reset();
|
||||||
cam_cell.reset();
|
cam_cell.reset();
|
||||||
|
|
||||||
scaling_factor.reset();
|
scaling_factor.reset();
|
||||||
|
|
||||||
wire_local.reset();
|
wire_local.reset();
|
||||||
wire_inside_mat.reset();
|
wire_inside_mat.reset();
|
||||||
wire_outside_mat.reset();
|
wire_outside_mat.reset();
|
||||||
|
|
||||||
sram.reset();
|
sram.reset();
|
||||||
dram.reset();
|
dram.reset();
|
||||||
cam.reset();
|
cam.reset();
|
||||||
|
|
||||||
chip_layout_overhead = 0;
|
chip_layout_overhead = 0;
|
||||||
macro_layout_overhead = 0;
|
macro_layout_overhead = 0;
|
||||||
sckt_co_eff = 0;
|
sckt_co_eff = 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DynamicParameter
|
class DynamicParameter {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
bool is_tag;
|
bool is_tag;
|
||||||
bool pure_ram;
|
bool pure_ram;
|
||||||
bool pure_cam;
|
bool pure_cam;
|
||||||
|
@ -313,8 +305,8 @@ class DynamicParameter
|
||||||
int num_so_b_mat;
|
int num_so_b_mat;
|
||||||
int num_si_b_subbank;
|
int num_si_b_subbank;
|
||||||
int num_so_b_subbank;
|
int num_so_b_subbank;
|
||||||
int num_si_b_bank_per_port;
|
int num_si_b_bank_per_port;
|
||||||
int num_so_b_bank_per_port;
|
int num_so_b_bank_per_port;
|
||||||
|
|
||||||
int number_way_select_signals_mat;
|
int number_way_select_signals_mat;
|
||||||
int num_act_mats_hor_dir;
|
int num_act_mats_hor_dir;
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -41,57 +42,56 @@ Router::Router(
|
||||||
double I_,
|
double I_,
|
||||||
double O_,
|
double O_,
|
||||||
double M_
|
double M_
|
||||||
):flit_size(flit_size_),
|
): flit_size(flit_size_),
|
||||||
deviceType(dt),
|
deviceType(dt),
|
||||||
I(I_),
|
I(I_),
|
||||||
O(O_),
|
O(O_),
|
||||||
M(M_)
|
M(M_) {
|
||||||
{
|
vc_buffer_size = vc_buf;
|
||||||
vc_buffer_size = vc_buf;
|
vc_count = vc_c;
|
||||||
vc_count = vc_c;
|
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_;
|
||||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_;
|
double technology = g_ip->F_sz_um;
|
||||||
double technology = g_ip->F_sz_um;
|
|
||||||
|
|
||||||
Vdd = dt->Vdd;
|
Vdd = dt->Vdd;
|
||||||
|
|
||||||
/*Crossbar parameters. Transmisson gate is employed for connector*/
|
/*Crossbar parameters. Transmisson gate is employed for connector*/
|
||||||
NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/
|
NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/
|
||||||
PTtr = 20*technology*1e-6/2; /* pmos tr. length*/
|
PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/
|
||||||
wt = 15*technology*1e-6/2; /*track width*/
|
wt = 15 * technology * 1e-6 / 2; /*track width*/
|
||||||
ht = 15*technology*1e-6/2; /*track height*/
|
ht = 15 * technology * 1e-6 / 2; /*track height*/
|
||||||
// I = 5; /*Number of crossbar input ports*/
|
// I = 5; /*Number of crossbar input ports*/
|
||||||
// O = 5; /*Number of crossbar output ports*/
|
// O = 5; /*Number of crossbar output ports*/
|
||||||
NTi = 12.5*technology*1e-6/2;
|
NTi = 12.5 * technology * 1e-6 / 2;
|
||||||
PTi = 25*technology*1e-6/2;
|
PTi = 25 * technology * 1e-6 / 2;
|
||||||
|
|
||||||
NTid = 60*technology*1e-6/2; //m
|
NTid = 60 * technology * 1e-6 / 2; //m
|
||||||
PTid = 120*technology*1e-6/2; // m
|
PTid = 120 * technology * 1e-6 / 2; // m
|
||||||
NTod = 60*technology*1e-6/2; // m
|
NTod = 60 * technology * 1e-6 / 2; // m
|
||||||
PTod = 120*technology*1e-6/2; // m
|
PTod = 120 * technology * 1e-6 / 2; // m
|
||||||
|
|
||||||
calc_router_parameters();
|
calc_router_parameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
Router::~Router(){}
|
Router::~Router() {}
|
||||||
|
|
||||||
|
|
||||||
double //wire cap with triple spacing
|
double //wire cap with triple spacing
|
||||||
Router::Cw3(double length) {
|
Router::Cw3(double length) {
|
||||||
Wire wc(g_ip->wt, length, 1, 3, 3);
|
Wire wc(g_ip->wt, length, 1, 3, 3);
|
||||||
return (wc.wire_cap(length));
|
return (wc.wire_cap(length));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Function to calculate the gate capacitance*/
|
/*Function to calculate the gate capacitance*/
|
||||||
double
|
double
|
||||||
Router::gate_cap(double w) {
|
Router::gate_cap(double w) {
|
||||||
return (double) gate_C (w*1e6 /*u*/, 0);
|
return (double) gate_C (w*1e6 /*u*/, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*Function to calculate the diffusion capacitance*/
|
/*Function to calculate the diffusion capacitance*/
|
||||||
double
|
double
|
||||||
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
|
Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
|
||||||
double s /*number of stacking transistors*/) {
|
double s /*number of stacking transistors*/) {
|
||||||
return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
|
return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/,
|
||||||
// Model for simple transmission gate
|
// Model for simple transmission gate
|
||||||
double
|
double
|
||||||
Router::transmission_buf_inpcap() {
|
Router::transmission_buf_inpcap() {
|
||||||
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
|
return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::transmission_buf_outcap() {
|
Router::transmission_buf_outcap() {
|
||||||
return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1);
|
return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::transmission_buf_ctrcap() {
|
Router::transmission_buf_ctrcap() {
|
||||||
return gate_cap(NTtr)+gate_cap(PTtr);
|
return gate_cap(NTtr) + gate_cap(PTtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::crossbar_inpline() {
|
Router::crossbar_inpline() {
|
||||||
return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
|
return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) +
|
||||||
gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
|
gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::crossbar_outline() {
|
Router::crossbar_outline() {
|
||||||
return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
|
return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) +
|
||||||
gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
|
gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::crossbar_ctrline() {
|
Router::crossbar_ctrline() {
|
||||||
return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
|
return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() +
|
||||||
diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
|
diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) +
|
||||||
gate_cap(NTi) + gate_cap(PTi));
|
gate_cap(NTi) + gate_cap(PTi));
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
Router::tr_crossbar_power() {
|
Router::tr_crossbar_power() {
|
||||||
return (crossbar_inpline()*Vdd*Vdd*flit_size/2 +
|
return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 +
|
||||||
crossbar_outline()*Vdd*Vdd*flit_size/2)*2;
|
crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Router::buffer_stats()
|
void Router::buffer_stats() {
|
||||||
{
|
DynamicParameter dyn_p;
|
||||||
DynamicParameter dyn_p;
|
dyn_p.is_tag = false;
|
||||||
dyn_p.is_tag = false;
|
dyn_p.pure_cam = false;
|
||||||
dyn_p.pure_cam = false;
|
dyn_p.fully_assoc = false;
|
||||||
dyn_p.fully_assoc = false;
|
dyn_p.pure_ram = true;
|
||||||
dyn_p.pure_ram = true;
|
dyn_p.is_dram = false;
|
||||||
dyn_p.is_dram = false;
|
dyn_p.is_main_mem = false;
|
||||||
dyn_p.is_main_mem = false;
|
dyn_p.num_subarrays = 1;
|
||||||
dyn_p.num_subarrays = 1;
|
dyn_p.num_mats = 1;
|
||||||
dyn_p.num_mats = 1;
|
dyn_p.Ndbl = 1;
|
||||||
dyn_p.Ndbl = 1;
|
dyn_p.Ndwl = 1;
|
||||||
dyn_p.Ndwl = 1;
|
dyn_p.Nspd = 1;
|
||||||
dyn_p.Nspd = 1;
|
dyn_p.deg_bl_muxing = 1;
|
||||||
dyn_p.deg_bl_muxing = 1;
|
dyn_p.deg_senseamp_muxing_non_associativity = 1;
|
||||||
dyn_p.deg_senseamp_muxing_non_associativity = 1;
|
dyn_p.Ndsam_lev_1 = 1;
|
||||||
dyn_p.Ndsam_lev_1 = 1;
|
dyn_p.Ndsam_lev_2 = 1;
|
||||||
dyn_p.Ndsam_lev_2 = 1;
|
dyn_p.Ndcm = 1;
|
||||||
dyn_p.Ndcm = 1;
|
dyn_p.number_addr_bits_mat = 8;
|
||||||
dyn_p.number_addr_bits_mat = 8;
|
dyn_p.number_way_select_signals_mat = 1;
|
||||||
dyn_p.number_way_select_signals_mat = 1;
|
dyn_p.number_subbanks_decode = 0;
|
||||||
dyn_p.number_subbanks_decode = 0;
|
dyn_p.num_act_mats_hor_dir = 1;
|
||||||
dyn_p.num_act_mats_hor_dir = 1;
|
dyn_p.V_b_sense = Vdd; // FIXME check power calc.
|
||||||
dyn_p.V_b_sense = Vdd; // FIXME check power calc.
|
dyn_p.ram_cell_tech_type = 0;
|
||||||
dyn_p.ram_cell_tech_type = 0;
|
dyn_p.num_r_subarray = (int) vc_buffer_size;
|
||||||
dyn_p.num_r_subarray = (int) vc_buffer_size;
|
dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
|
||||||
dyn_p.num_c_subarray = (int) flit_size * (int) vc_count;
|
dyn_p.num_mats_h_dir = 1;
|
||||||
dyn_p.num_mats_h_dir = 1;
|
dyn_p.num_mats_v_dir = 1;
|
||||||
dyn_p.num_mats_v_dir = 1;
|
dyn_p.num_do_b_subbank = (int)flit_size;
|
||||||
dyn_p.num_do_b_subbank = (int)flit_size;
|
dyn_p.num_di_b_subbank = (int)flit_size;
|
||||||
dyn_p.num_di_b_subbank = (int)flit_size;
|
dyn_p.num_do_b_mat = (int) flit_size;
|
||||||
dyn_p.num_do_b_mat = (int) flit_size;
|
dyn_p.num_di_b_mat = (int) flit_size;
|
||||||
dyn_p.num_di_b_mat = (int) flit_size;
|
dyn_p.num_do_b_mat = (int) flit_size;
|
||||||
dyn_p.num_do_b_mat = (int) flit_size;
|
dyn_p.num_di_b_mat = (int) flit_size;
|
||||||
dyn_p.num_di_b_mat = (int) flit_size;
|
dyn_p.num_do_b_bank_per_port = (int) flit_size;
|
||||||
dyn_p.num_do_b_bank_per_port = (int) flit_size;
|
dyn_p.num_di_b_bank_per_port = (int) flit_size;
|
||||||
dyn_p.num_di_b_bank_per_port = (int) flit_size;
|
dyn_p.out_w = (int) flit_size;
|
||||||
dyn_p.out_w = (int) flit_size;
|
|
||||||
|
|
||||||
dyn_p.use_inp_params = 1;
|
dyn_p.use_inp_params = 1;
|
||||||
dyn_p.num_wr_ports = (unsigned int) vc_count;
|
dyn_p.num_wr_ports = (unsigned int) vc_count;
|
||||||
dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
|
dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book
|
||||||
dyn_p.num_rw_ports = 0;
|
dyn_p.num_rw_ports = 0;
|
||||||
dyn_p.num_se_rd_ports =0;
|
dyn_p.num_se_rd_ports = 0;
|
||||||
dyn_p.num_search_ports =0;
|
dyn_p.num_search_ports = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
|
dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports +
|
||||||
dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
|
dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports);
|
||||||
dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
|
dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 +
|
||||||
(dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
|
(dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) +
|
||||||
dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
|
dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports;
|
||||||
|
|
||||||
Mat buff(dyn_p);
|
Mat buff(dyn_p);
|
||||||
buff.compute_delays(0);
|
buff.compute_delays(0);
|
||||||
buff.compute_power_energy();
|
buff.compute_power_energy();
|
||||||
buffer.power.readOp = buff.power.readOp;
|
buffer.power.readOp = buff.power.readOp;
|
||||||
buffer.power.writeOp = buffer.power.readOp; //FIXME
|
buffer.power.writeOp = buffer.power.readOp; //FIXME
|
||||||
buffer.area = buff.area;
|
buffer.area = buff.area;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::cb_stats ()
|
Router::cb_stats () {
|
||||||
{
|
if (1) {
|
||||||
if (1) {
|
Crossbar c_b(I, O, flit_size);
|
||||||
Crossbar c_b(I, O, flit_size);
|
c_b.compute_power();
|
||||||
c_b.compute_power();
|
crossbar.delay = c_b.delay;
|
||||||
crossbar.delay = c_b.delay;
|
crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
|
||||||
crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic;
|
crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
|
||||||
crossbar.power.readOp.leakage = c_b.power.readOp.leakage;
|
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
|
||||||
crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage;
|
crossbar.area = c_b.area;
|
||||||
crossbar.area = c_b.area;
|
|
||||||
// c_b.print_crossbar();
|
// c_b.print_crossbar();
|
||||||
}
|
} else {
|
||||||
else {
|
crossbar.power.readOp.dynamic = tr_crossbar_power();
|
||||||
crossbar.power.readOp.dynamic = tr_crossbar_power();
|
crossbar.power.readOp.leakage = flit_size * I * O *
|
||||||
crossbar.power.readOp.leakage = flit_size * I * O *
|
cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
|
||||||
cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
|
crossbar.power.readOp.gate_leakage = flit_size * I * O *
|
||||||
crossbar.power.readOp.gate_leakage = flit_size * I * O *
|
cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg);
|
||||||
cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg);
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::get_router_power()
|
Router::get_router_power() {
|
||||||
{
|
/* calculate buffer stats */
|
||||||
/* calculate buffer stats */
|
buffer_stats();
|
||||||
buffer_stats();
|
|
||||||
|
|
||||||
/* calculate cross-bar stats */
|
/* calculate cross-bar stats */
|
||||||
cb_stats();
|
cb_stats();
|
||||||
|
|
||||||
/* calculate arbiter stats */
|
/* calculate arbiter stats */
|
||||||
Arbiter vcarb(vc_count, flit_size, buffer.area.w);
|
Arbiter vcarb(vc_count, flit_size, buffer.area.w);
|
||||||
Arbiter cbarb(I, flit_size, crossbar.area.w);
|
Arbiter cbarb(I, flit_size, crossbar.area.w);
|
||||||
vcarb.compute_power();
|
vcarb.compute_power();
|
||||||
cbarb.compute_power();
|
cbarb.compute_power();
|
||||||
arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
|
arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I +
|
||||||
cbarb.power.readOp.dynamic * O;
|
cbarb.power.readOp.dynamic * O;
|
||||||
arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
|
arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I +
|
||||||
cbarb.power.readOp.leakage * O;
|
cbarb.power.readOp.leakage * O;
|
||||||
arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
|
arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I +
|
||||||
cbarb.power.readOp.gate_leakage * O;
|
cbarb.power.readOp.gate_leakage * O;
|
||||||
|
|
||||||
// arb_stats();
|
// arb_stats();
|
||||||
power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) +
|
power.readOp.dynamic = ((buffer.power.readOp.dynamic +
|
||||||
crossbar.power.readOp.dynamic +
|
buffer.power.writeOp.dynamic) +
|
||||||
arbiter.power.readOp.dynamic)*MIN(I, O)*M;
|
crossbar.power.readOp.dynamic +
|
||||||
double pppm_t[4] = {1,I,I,1};
|
arbiter.power.readOp.dynamic) * MIN(I, O) * M;
|
||||||
power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg;
|
double pppm_t[4] = {1, I, I, 1};
|
||||||
|
power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) *
|
||||||
|
pppm_lkg;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::get_router_delay ()
|
Router::get_router_delay () {
|
||||||
{
|
FREQUENCY = 5; // move this to config file --TODO
|
||||||
FREQUENCY=5; // move this to config file --TODO
|
cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps
|
||||||
cycle_time = (1/(double)FREQUENCY)*1e3; //ps
|
delay = 4;
|
||||||
delay = 4;
|
max_cyc = 17 * g_tp.FO4; //s
|
||||||
max_cyc = 17 * g_tp.FO4; //s
|
max_cyc *= 1e12; //ps
|
||||||
max_cyc *= 1e12; //ps
|
if (cycle_time < max_cyc) {
|
||||||
if (cycle_time < max_cyc) {
|
FREQUENCY = (1 / max_cyc) * 1e3; //GHz
|
||||||
FREQUENCY = (1/max_cyc)*1e3; //GHz
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::get_router_area()
|
Router::get_router_area() {
|
||||||
{
|
area.h = I * buffer.area.h;
|
||||||
area.h = I*buffer.area.h;
|
area.w = buffer.area.w + crossbar.area.w;
|
||||||
area.w = buffer.area.w+crossbar.area.w;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::calc_router_parameters()
|
Router::calc_router_parameters() {
|
||||||
{
|
/* calculate router frequency and pipeline cycles */
|
||||||
/* calculate router frequency and pipeline cycles */
|
get_router_delay();
|
||||||
get_router_delay();
|
|
||||||
|
|
||||||
/* router power stats */
|
/* router power stats */
|
||||||
get_router_power();
|
get_router_power();
|
||||||
|
|
||||||
/* area stats */
|
/* area stats */
|
||||||
get_router_area();
|
get_router_area();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
Router::print_router()
|
Router::print_router() {
|
||||||
{
|
cout << "\n\nRouter stats:\n";
|
||||||
cout << "\n\nRouter stats:\n";
|
cout << "\tRouter Area - " << area.get_area()*1e-6 << "(mm^2)\n";
|
||||||
cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n";
|
cout << "\tMaximum possible network frequency - " << (1 / max_cyc)*1e3
|
||||||
cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n";
|
<< "GHz\n";
|
||||||
cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n";
|
cout << "\tNetwork frequency - " << FREQUENCY << " GHz\n";
|
||||||
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
|
cout << "\tNo. of Virtual channels - " << vc_count << "\n";
|
||||||
cout << "\tNo. of pipeline stages - " << delay << endl;
|
cout << "\tNo. of pipeline stages - " << delay << endl;
|
||||||
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
|
cout << "\tLink bandwidth - " << flit_size << " (bits)\n";
|
||||||
cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n";
|
cout << "\tNo. of buffer entries per virtual channel - "
|
||||||
cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n";
|
<< vc_buffer_size << "\n";
|
||||||
cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
cout << "\tSimple buffer Area - " << buffer.area.get_area()*1e-6
|
||||||
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n";
|
<< "(mm^2)\n";
|
||||||
cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n";
|
cout << "\tSimple buffer access (Read) - "
|
||||||
cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n";
|
<< buffer.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||||
cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n";
|
cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3
|
||||||
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<<arbiter.power.readOp.dynamic * 1e9 <<" (nJ)\n";
|
<< " (mW)\n";
|
||||||
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "<<arbiter.power.readOp.leakage * 1e3 <<" (mW)\n";
|
cout << "\tCrossbar Area - " << crossbar.area.get_area()*1e-6
|
||||||
|
<< "(mm^2)\n";
|
||||||
|
cout << "\tCross bar access energy - "
|
||||||
|
<< crossbar.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||||
|
cout << "\tCross bar leakage power - "
|
||||||
|
<< crossbar.power.readOp.leakage * 1e3 << " (mW)\n";
|
||||||
|
cout << "\tArbiter access energy (VC arb + Crossbar arb) - "
|
||||||
|
<< arbiter.power.readOp.dynamic * 1e9 << " (nJ)\n";
|
||||||
|
cout << "\tArbiter leakage (VC arb + Crossbar arb) - "
|
||||||
|
<< arbiter.power.readOp.leakage * 1e3 << " (mW)\n";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -47,9 +48,8 @@
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
class Router : public Component
|
class Router : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Router(
|
Router(
|
||||||
double flit_size_,
|
double flit_size_,
|
||||||
double vc_buf, /* vc size = vc_buffer_size * flit_size */
|
double vc_buf, /* vc size = vc_buffer_size * flit_size */
|
||||||
|
@ -70,9 +70,9 @@ class Router : public Component
|
||||||
double vc_count;
|
double vc_count;
|
||||||
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
|
double vc_buffer_size; /* vc size = vc_buffer_size * flit_size */
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TechnologyParameter::DeviceType *deviceType;
|
TechnologyParameter::DeviceType *deviceType;
|
||||||
double FREQUENCY; // move this to config file --TODO
|
double FREQUENCY; // move this to config file --TODO
|
||||||
double Cw3(double len);
|
double Cw3(double len);
|
||||||
double gate_cap(double w);
|
double gate_cap(double w);
|
||||||
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
|
double diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, double stack);
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -39,158 +40,152 @@
|
||||||
#include "subarray.h"
|
#include "subarray.h"
|
||||||
|
|
||||||
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
Subarray::Subarray(const DynamicParameter & dp_, bool is_fa_):
|
||||||
dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
|
dp(dp_), num_rows(dp.num_r_subarray), num_cols(dp.num_c_subarray),
|
||||||
num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
|
num_cols_fa_cam(dp.tag_num_c_subarray), num_cols_fa_ram(dp.data_num_c_subarray),
|
||||||
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_)
|
cell(dp.cell), cam_cell(dp.cam_cell), is_fa(is_fa_) {
|
||||||
{
|
//num_cols=7;
|
||||||
//num_cols=7;
|
//cout<<"num_cols ="<< num_cols <<endl;
|
||||||
//cout<<"num_cols ="<< num_cols <<endl;
|
if (!(is_fa || dp.pure_cam)) {
|
||||||
if (!(is_fa || dp.pure_cam))
|
// ECC overhead
|
||||||
{
|
num_cols += (g_ip->add_ecc_b_ ? (int)ceil(num_cols /
|
||||||
num_cols +=(g_ip->add_ecc_b_ ? (int)ceil(num_cols / num_bits_per_ecc_b_) : 0); // ECC overhead
|
num_bits_per_ecc_b_) : 0);
|
||||||
uint32_t ram_num_cells_wl_stitching =
|
uint32_t ram_num_cells_wl_stitching =
|
||||||
(dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
|
(dp.ram_cell_tech_type == lp_dram) ? dram_num_cells_wl_stitching_ :
|
||||||
(dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
|
(dp.ram_cell_tech_type == comm_dram) ? comm_dram_num_cells_wl_stitching_ : sram_num_cells_wl_stitching_;
|
||||||
|
|
||||||
area.h = cell.h * num_rows;
|
area.h = cell.h * num_rows;
|
||||||
|
|
||||||
area.w = cell.w * num_cols +
|
area.w = cell.w * num_cols +
|
||||||
ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
|
ceil(num_cols / ram_num_cells_wl_stitching) * g_tp.ram_wl_stitching_overhead_; // stitching overhead
|
||||||
}
|
} else { //cam fa
|
||||||
else //cam fa
|
|
||||||
{
|
|
||||||
|
|
||||||
//should not add dummy row here since the dummy row do not need decoder
|
//should not add dummy row here since the dummy row do not need decoder
|
||||||
if (is_fa)// fully associative cache
|
if (is_fa) { // fully associative cache
|
||||||
{
|
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
||||||
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
|
||||||
num_cols_fa_ram += (g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_ram / num_bits_per_ecc_b_) : 0);
|
num_cols = num_cols_fa_cam + num_cols_fa_ram;
|
||||||
num_cols = num_cols_fa_cam + num_cols_fa_ram;
|
} else {
|
||||||
}
|
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
||||||
else
|
num_cols_fa_ram = 0;
|
||||||
{
|
num_cols = num_cols_fa_cam;
|
||||||
num_cols_fa_cam += g_ip->add_ecc_b_ ? (int)ceil(num_cols_fa_cam / num_bits_per_ecc_b_) : 0;
|
}
|
||||||
num_cols_fa_ram = 0;
|
|
||||||
num_cols = num_cols_fa_cam;
|
|
||||||
}
|
|
||||||
|
|
||||||
area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
|
area.h = cam_cell.h * (num_rows + 1);//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
|
||||||
area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
|
area.w = cam_cell.w * num_cols_fa_cam + cell.w * num_cols_fa_ram
|
||||||
+ ceil((num_cols_fa_cam + num_cols_fa_ram) / sram_num_cells_wl_stitching_)*g_tp.ram_wl_stitching_overhead_
|
+ ceil((num_cols_fa_cam + num_cols_fa_ram) /
|
||||||
+ 16*g_tp.wire_local.pitch //the overhead for the NAND gate to connect the two halves
|
sram_num_cells_wl_stitching_) *
|
||||||
+ 128*g_tp.wire_local.pitch;//the overhead for the drivers from matchline to wordline of RAM
|
g_tp.ram_wl_stitching_overhead_
|
||||||
}
|
//the overhead for the NAND gate to connect the two halves
|
||||||
|
+ 16 * g_tp.wire_local.pitch
|
||||||
|
//the overhead for the drivers from matchline to wordline of RAM
|
||||||
|
+ 128 * g_tp.wire_local.pitch;
|
||||||
|
}
|
||||||
|
|
||||||
assert(area.h>0);
|
assert(area.h > 0);
|
||||||
assert(area.w>0);
|
assert(area.w > 0);
|
||||||
compute_C();
|
compute_C();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Subarray::~Subarray()
|
Subarray::~Subarray() {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double Subarray::get_total_cell_area()
|
double Subarray::get_total_cell_area() {
|
||||||
{
|
|
||||||
// return (is_fa==false? cell.get_area() * num_rows * num_cols
|
// return (is_fa==false? cell.get_area() * num_rows * num_cols
|
||||||
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
|
// //: cam_cell.h*(num_rows+1)*(num_cols_fa_cam + sram_cell.get_area()*num_cols_fa_ram));
|
||||||
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
// : cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
||||||
// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
|
// //: cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam + sram_cell.get_area()*(num_rows+1)*num_cols_fa_ram);//for FA, this area does not include the dummy cells in SRAM arrays.
|
||||||
|
|
||||||
if (!(is_fa || dp.pure_cam))
|
if (!(is_fa || dp.pure_cam))
|
||||||
return (cell.get_area() * num_rows * num_cols);
|
return (cell.get_area() * num_rows * num_cols);
|
||||||
else if (is_fa)
|
else if (is_fa) {
|
||||||
{ //for FA, this area includes the dummy cells in SRAM arrays.
|
//for FA, this area includes the dummy cells in SRAM arrays.
|
||||||
//return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
//return (cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram));
|
||||||
//cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
|
//cout<<"diff" <<cam_cell.get_area()*(num_rows+1)*(num_cols_fa_cam + num_cols_fa_ram)- cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram)<<endl;
|
||||||
return (cam_cell.h*(num_rows+1)*(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
|
return (cam_cell.h * (num_rows + 1) *
|
||||||
|
(cam_cell.w*num_cols_fa_cam + cell.w*num_cols_fa_ram));
|
||||||
|
} else {
|
||||||
|
return (cam_cell.get_area() * (num_rows + 1) * num_cols_fa_cam );
|
||||||
}
|
}
|
||||||
else
|
|
||||||
return (cam_cell.get_area()*(num_rows+1)*num_cols_fa_cam );
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void Subarray::compute_C()
|
void Subarray::compute_C() {
|
||||||
{
|
double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
||||||
double c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
||||||
double r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
|
||||||
double C_b_metal = cell.h * g_tp.wire_local.C_per_um;
|
double C_b_row_drain_C;
|
||||||
double C_b_row_drain_C;
|
|
||||||
|
|
||||||
if (dp.is_dram)
|
if (dp.is_dram) {
|
||||||
{
|
C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
|
||||||
C_wl = (gate_C_pass(g_tp.dram.cell_a_w, g_tp.dram.b_w, true, true) + c_w_metal) * num_cols;
|
|
||||||
|
|
||||||
if (dp.ram_cell_tech_type == comm_dram)
|
if (dp.ram_cell_tech_type == comm_dram) {
|
||||||
{
|
C_bl = num_rows * C_b_metal;
|
||||||
C_bl = num_rows * C_b_metal;
|
} else {
|
||||||
|
C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
|
||||||
|
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!(is_fa || dp.pure_cam)) {
|
||||||
|
C_wl = (gate_C_pass(g_tp.sram.cell_a_w,
|
||||||
|
(g_tp.sram.b_w - 2 * g_tp.sram.cell_a_w) / 2.0,
|
||||||
|
false, true) * 2 +
|
||||||
|
c_w_metal) * num_cols;
|
||||||
|
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
|
||||||
|
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
||||||
|
} else {
|
||||||
|
//Following is wordline not matchline
|
||||||
|
//CAM portion
|
||||||
|
c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
|
||||||
|
r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
|
||||||
|
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w,
|
||||||
|
(g_tp.cam.b_w - 2 * g_tp.cam.cell_a_w) /
|
||||||
|
2.0, false, true) * 2 +
|
||||||
|
c_w_metal) * num_cols_fa_cam;
|
||||||
|
R_wl_cam = (r_w_metal) * num_cols_fa_cam;
|
||||||
|
|
||||||
|
if (!dp.pure_cam) {
|
||||||
|
//RAM portion
|
||||||
|
c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
||||||
|
r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
||||||
|
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w,
|
||||||
|
(g_tp.sram.b_w - 2 *
|
||||||
|
g_tp.sram.cell_a_w) / 2.0, false,
|
||||||
|
true) * 2 +
|
||||||
|
c_w_metal) * num_cols_fa_ram;
|
||||||
|
R_wl_ram = (r_w_metal) * num_cols_fa_ram;
|
||||||
|
} else {
|
||||||
|
C_wl_ram = R_wl_ram = 0;
|
||||||
|
}
|
||||||
|
C_wl = C_wl_cam + C_wl_ram;
|
||||||
|
C_wl += (16 + 128) * g_tp.wire_local.pitch *
|
||||||
|
g_tp.wire_local.C_per_um;
|
||||||
|
|
||||||
|
R_wl = R_wl_cam + R_wl_ram;
|
||||||
|
R_wl += (16 + 128) * g_tp.wire_local.pitch *
|
||||||
|
g_tp.wire_local.R_per_um;
|
||||||
|
|
||||||
|
//there are two ways to write to a FA,
|
||||||
|
//1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
|
||||||
|
//2) using separate wordline for read/write and search in RAM.
|
||||||
|
//We are using the second approach.
|
||||||
|
|
||||||
|
//Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
|
||||||
|
C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
|
||||||
|
C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
|
||||||
|
C_bl_cam = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
|
||||||
|
//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
|
||||||
|
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
|
||||||
|
C_bl = (num_rows + 1) * (C_b_row_drain_C + C_b_metal);
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
C_b_row_drain_C = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; // due to shared contact
|
|
||||||
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!(is_fa ||dp.pure_cam))
|
|
||||||
{
|
|
||||||
C_wl = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
|
|
||||||
c_w_metal) * num_cols;
|
|
||||||
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
|
|
||||||
C_bl = num_rows * (C_b_row_drain_C + C_b_metal);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//Following is wordline not matchline
|
|
||||||
//CAM portion
|
|
||||||
c_w_metal = cam_cell.w * g_tp.wire_local.C_per_um;
|
|
||||||
r_w_metal = cam_cell.w * g_tp.wire_local.R_per_um;
|
|
||||||
C_wl_cam = (gate_C_pass(g_tp.cam.cell_a_w, (g_tp.cam.b_w-2*g_tp.cam.cell_a_w)/2.0, false, true)*2 +
|
|
||||||
c_w_metal) * num_cols_fa_cam;
|
|
||||||
R_wl_cam = (r_w_metal) * num_cols_fa_cam;
|
|
||||||
|
|
||||||
if (!dp.pure_cam)
|
|
||||||
{
|
|
||||||
//RAM portion
|
|
||||||
c_w_metal = cell.w * g_tp.wire_local.C_per_um;
|
|
||||||
r_w_metal = cell.w * g_tp.wire_local.R_per_um;
|
|
||||||
C_wl_ram = (gate_C_pass(g_tp.sram.cell_a_w, (g_tp.sram.b_w-2*g_tp.sram.cell_a_w)/2.0, false, true)*2 +
|
|
||||||
c_w_metal) * num_cols_fa_ram;
|
|
||||||
R_wl_ram = (r_w_metal) * num_cols_fa_ram;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
C_wl_ram = R_wl_ram =0;
|
|
||||||
}
|
|
||||||
C_wl = C_wl_cam + C_wl_ram;
|
|
||||||
C_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.C_per_um;
|
|
||||||
|
|
||||||
R_wl = R_wl_cam + R_wl_ram;
|
|
||||||
R_wl += (16+128)*g_tp.wire_local.pitch*g_tp.wire_local.R_per_um;
|
|
||||||
|
|
||||||
//there are two ways to write to a FA,
|
|
||||||
//1) Write to CAM array then force a match on match line to active the corresponding wordline in RAM;
|
|
||||||
//2) using separate wordline for read/write and search in RAM.
|
|
||||||
//We are using the second approach.
|
|
||||||
|
|
||||||
//Bitline CAM portion This is bitline not searchline. We assume no sharing between bitline and searchline according to SUN's implementations.
|
|
||||||
C_b_metal = cam_cell.h * g_tp.wire_local.C_per_um;
|
|
||||||
C_b_row_drain_C = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0; // due to shared contact
|
|
||||||
C_bl_cam = (num_rows+1) * (C_b_row_drain_C + C_b_metal);
|
|
||||||
//height of subarray is decided by CAM array. blank space in sram array are filled with dummy cells
|
|
||||||
C_b_row_drain_C = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; // due to shared contact
|
|
||||||
C_bl = (num_rows +1) * (C_b_row_drain_C + C_b_metal);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -41,9 +42,8 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
class Subarray : public Component
|
class Subarray : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Subarray(const DynamicParameter & dp, bool is_fa_);
|
Subarray(const DynamicParameter & dp, bool is_fa_);
|
||||||
~Subarray();
|
~Subarray();
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ class Subarray : public Component
|
||||||
double C_wl, C_wl_cam, C_wl_ram;
|
double C_wl, C_wl_cam, C_wl_ram;
|
||||||
double R_wl, R_wl_cam, R_wl_ram;
|
double R_wl, R_wl_cam, R_wl_ram;
|
||||||
double C_bl, C_bl_cam;
|
double C_bl, C_bl_cam;
|
||||||
private:
|
private:
|
||||||
|
|
||||||
void compute_C(); // compute bitline and wordline capacitance
|
void compute_C(); // compute bitline and wordline capacitance
|
||||||
};
|
};
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -37,390 +38,390 @@
|
||||||
#include "uca.h"
|
#include "uca.h"
|
||||||
|
|
||||||
UCA::UCA(const DynamicParameter & dyn_p)
|
UCA::UCA(const DynamicParameter & dyn_p)
|
||||||
:dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0)
|
: dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
|
||||||
{
|
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
|
||||||
int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2));
|
/ 2 : (_log2(nbanks) - _log2(nbanks) / 2));
|
||||||
int num_banks_hor_dir = nbanks/num_banks_ver_dir;
|
int num_banks_hor_dir = nbanks / num_banks_ver_dir;
|
||||||
|
|
||||||
if (dp.use_inp_params)
|
if (dp.use_inp_params) {
|
||||||
{
|
RWP = dp.num_rw_ports;
|
||||||
RWP = dp.num_rw_ports;
|
ERP = dp.num_rd_ports;
|
||||||
ERP = dp.num_rd_ports;
|
EWP = dp.num_wr_ports;
|
||||||
EWP = dp.num_wr_ports;
|
SCHP = dp.num_search_ports;
|
||||||
SCHP = dp.num_search_ports;
|
} else {
|
||||||
}
|
RWP = g_ip->num_rw_ports;
|
||||||
else
|
ERP = g_ip->num_rd_ports;
|
||||||
{
|
EWP = g_ip->num_wr_ports;
|
||||||
RWP = g_ip->num_rw_ports;
|
SCHP = g_ip->num_search_ports;
|
||||||
ERP = g_ip->num_rd_ports;
|
}
|
||||||
EWP = g_ip->num_wr_ports;
|
|
||||||
SCHP = g_ip->num_search_ports;
|
|
||||||
}
|
|
||||||
|
|
||||||
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP);
|
num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
|
||||||
num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
|
(RWP + ERP + EWP);
|
||||||
num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
|
num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
|
||||||
num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
|
num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
|
||||||
num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
|
num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
|
||||||
|
num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
|
||||||
|
|
||||||
if (!dp.fully_assoc && !dp.pure_cam)
|
if (!dp.fully_assoc && !dp.pure_cam) {
|
||||||
{
|
|
||||||
|
|
||||||
if (g_ip->fast_access && dp.is_tag == false)
|
if (g_ip->fast_access && dp.is_tag == false) {
|
||||||
{
|
num_do_b_bank *= g_ip->data_assoc;
|
||||||
num_do_b_bank *= g_ip->data_assoc;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
|
num_addr_b_bank, num_di_b_bank, 0,
|
||||||
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||||
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
num_banks_hor_dir * 2, Add_htree, true);
|
||||||
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
num_addr_b_bank, num_di_b_bank, 0,
|
||||||
}
|
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||||
|
num_banks_hor_dir * 2, Data_in_htree, true);
|
||||||
|
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
|
num_addr_b_bank, num_di_b_bank, 0,
|
||||||
|
num_do_b_bank, 0, num_banks_ver_dir * 2,
|
||||||
|
num_banks_hor_dir * 2, Data_out_htree, true);
|
||||||
|
}
|
||||||
|
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
|
|
||||||
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true);
|
num_addr_b_bank, num_di_b_bank,
|
||||||
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
num_si_b_bank, num_do_b_bank, num_so_b_bank,
|
||||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
|
||||||
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
Add_htree, true);
|
||||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
num_addr_b_bank, num_di_b_bank,
|
||||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true);
|
num_si_b_bank, num_do_b_bank, num_so_b_bank,
|
||||||
htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
num_banks_ver_dir * 2, num_banks_hor_dir * 2,
|
||||||
num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true);
|
Data_in_htree, true);
|
||||||
}
|
htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
|
num_addr_b_bank, num_di_b_bank,
|
||||||
|
num_si_b_bank, num_do_b_bank,
|
||||||
|
num_so_b_bank, num_banks_ver_dir * 2,
|
||||||
|
num_banks_hor_dir * 2, Data_out_htree, true);
|
||||||
|
htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
|
num_addr_b_bank, num_di_b_bank,
|
||||||
|
num_si_b_bank, num_do_b_bank,
|
||||||
|
num_so_b_bank, num_banks_ver_dir * 2,
|
||||||
|
num_banks_hor_dir * 2, Data_in_htree, true);
|
||||||
|
htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
|
||||||
|
num_addr_b_bank, num_di_b_bank,
|
||||||
|
num_si_b_bank, num_do_b_bank,
|
||||||
|
num_so_b_bank, num_banks_ver_dir * 2,
|
||||||
|
num_banks_hor_dir * 2, Data_out_htree,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
area.w = htree_in_data->area.w;
|
area.w = htree_in_data->area.w;
|
||||||
area.h = htree_in_data->area.h;
|
area.h = htree_in_data->area.h;
|
||||||
|
|
||||||
area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
|
area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
|
||||||
// cout<<"area cell"<<area_all_dataramcells<<endl;
|
// cout<<"area cell"<<area_all_dataramcells<<endl;
|
||||||
// cout<<area.get_area()<<endl;
|
// cout<<area.get_area()<<endl;
|
||||||
// delay calculation
|
// delay calculation
|
||||||
double inrisetime = 0.0;
|
double inrisetime = 0.0;
|
||||||
compute_delays(inrisetime);
|
compute_delays(inrisetime);
|
||||||
compute_power_energy();
|
compute_power_energy();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
UCA::~UCA()
|
UCA::~UCA() {
|
||||||
{
|
delete htree_in_add;
|
||||||
delete htree_in_add;
|
delete htree_in_data;
|
||||||
delete htree_in_data;
|
delete htree_out_data;
|
||||||
delete htree_out_data;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double UCA::compute_delays(double inrisetime)
|
double UCA::compute_delays(double inrisetime) {
|
||||||
{
|
double outrisetime = bank.compute_delays(inrisetime);
|
||||||
double outrisetime = bank.compute_delays(inrisetime);
|
|
||||||
|
|
||||||
double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
|
double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
|
||||||
double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
|
double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
|
||||||
delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
|
delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
|
||||||
bank.mat.sa_mux_lev_1_predec->delay +
|
bank.mat.sa_mux_lev_1_predec->delay +
|
||||||
bank.mat.sa_mux_lev_1_dec->delay;
|
bank.mat.sa_mux_lev_1_dec->delay;
|
||||||
delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
|
delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
|
||||||
bank.mat.sa_mux_lev_2_predec->delay +
|
bank.mat.sa_mux_lev_2_predec->delay +
|
||||||
bank.mat.sa_mux_lev_2_dec->delay;
|
bank.mat.sa_mux_lev_2_dec->delay;
|
||||||
double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
|
double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
|
||||||
|
|
||||||
delay_before_subarray_output_driver =
|
delay_before_subarray_output_driver =
|
||||||
MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
|
MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
|
||||||
delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
|
delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
|
||||||
MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
|
MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
|
||||||
delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
|
delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
|
||||||
delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
|
delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
|
||||||
bank.htree_out_data->delay + htree_out_data->delay;
|
bank.htree_out_data->delay + htree_out_data->delay;
|
||||||
access_time = bank.mat.delay_comparator;
|
access_time = bank.mat.delay_comparator;
|
||||||
|
|
||||||
double ram_delay_inside_mat;
|
double ram_delay_inside_mat;
|
||||||
if (dp.fully_assoc)
|
if (dp.fully_assoc) {
|
||||||
{
|
//delay of FA contains both CAM tag and RAM data
|
||||||
//delay of FA contains both CAM tag and RAM data
|
{ //delay of CAM
|
||||||
{ //delay of CAM
|
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
||||||
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
access_time = htree_in_add->delay + bank.htree_in_add->delay;
|
||||||
access_time = htree_in_add->delay + bank.htree_in_add->delay;
|
//delay of fully-associative data array
|
||||||
//delay of fully-associative data array
|
access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
|
||||||
access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
|
}
|
||||||
}
|
} else {
|
||||||
}
|
access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
|
||||||
else
|
|
||||||
{
|
|
||||||
access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dp.is_main_mem)
|
|
||||||
{
|
|
||||||
double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
|
|
||||||
double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
|
|
||||||
delay_from_subarray_out_drv_to_out;
|
|
||||||
access_time = t_rcd + cas_latency;
|
|
||||||
}
|
|
||||||
|
|
||||||
double temp;
|
|
||||||
|
|
||||||
if (!dp.fully_assoc)
|
|
||||||
{
|
|
||||||
temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
|
|
||||||
if (dp.is_dram)
|
|
||||||
{
|
|
||||||
temp += bank.mat.delay_writeback; // temp stores random cycle time
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dp.is_main_mem) {
|
||||||
|
double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
|
||||||
|
double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
|
||||||
|
delay_from_subarray_out_drv_to_out;
|
||||||
|
access_time = t_rcd + cas_latency;
|
||||||
|
}
|
||||||
|
|
||||||
temp = MAX(temp, bank.mat.r_predec->delay);
|
double temp;
|
||||||
temp = MAX(temp, bank.mat.b_mux_predec->delay);
|
|
||||||
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
|
|
||||||
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
|
||||||
temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
|
|
||||||
+ bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
|
|
||||||
|
|
||||||
temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
|
if (!dp.fully_assoc) {
|
||||||
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
|
temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
|
||||||
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
|
if (dp.is_dram) {
|
||||||
}
|
temp += bank.mat.delay_writeback; // temp stores random cycle time
|
||||||
|
}
|
||||||
|
|
||||||
// The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
|
|
||||||
if (g_ip->rpters_in_htree == false)
|
|
||||||
{
|
|
||||||
temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
|
|
||||||
}
|
|
||||||
cycle_time = temp;
|
|
||||||
|
|
||||||
double delay_req_network = max_delay_before_row_decoder;
|
temp = MAX(temp, bank.mat.r_predec->delay);
|
||||||
double delay_rep_network = delay_from_subarray_out_drv_to_out;
|
temp = MAX(temp, bank.mat.b_mux_predec->delay);
|
||||||
multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
|
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
|
||||||
|
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
|
||||||
|
} else {
|
||||||
|
ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
|
||||||
|
temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
|
||||||
|
+ bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
|
||||||
|
|
||||||
if (dp.is_main_mem)
|
temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
|
||||||
{
|
temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
|
||||||
multisubbank_interleave_cycle_time = htree_in_add->delay;
|
temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
|
||||||
precharge_delay = htree_in_add->delay +
|
}
|
||||||
bank.htree_in_add->delay + bank.mat.delay_writeback +
|
|
||||||
bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
|
|
||||||
cycle_time = access_time + precharge_delay;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
precharge_delay = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
double dram_array_availability = 0;
|
// The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
|
||||||
if (dp.is_dram)
|
if (g_ip->rpters_in_htree == false) {
|
||||||
{
|
temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
|
||||||
dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
|
}
|
||||||
}
|
cycle_time = temp;
|
||||||
|
|
||||||
return outrisetime;
|
double delay_req_network = max_delay_before_row_decoder;
|
||||||
|
double delay_rep_network = delay_from_subarray_out_drv_to_out;
|
||||||
|
multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
|
||||||
|
|
||||||
|
if (dp.is_main_mem) {
|
||||||
|
multisubbank_interleave_cycle_time = htree_in_add->delay;
|
||||||
|
precharge_delay = htree_in_add->delay +
|
||||||
|
bank.htree_in_add->delay + bank.mat.delay_writeback +
|
||||||
|
bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
|
||||||
|
cycle_time = access_time + precharge_delay;
|
||||||
|
} else {
|
||||||
|
precharge_delay = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double dram_array_availability = 0;
|
||||||
|
if (dp.is_dram) {
|
||||||
|
dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
return outrisetime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// note: currently, power numbers are for a bank of an array
|
// note: currently, power numbers are for a bank of an array
|
||||||
void UCA::compute_power_energy()
|
void UCA::compute_power_energy() {
|
||||||
{
|
bank.compute_power_energy();
|
||||||
bank.compute_power_energy();
|
power = bank.power;
|
||||||
power = bank.power;
|
|
||||||
|
|
||||||
power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
|
power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
|
||||||
power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
|
power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
|
||||||
if (dp.fully_assoc || dp.pure_cam)
|
if (dp.fully_assoc || dp.pure_cam)
|
||||||
power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic;
|
power_routing_to_bank.searchOp.dynamic =
|
||||||
|
htree_in_search->power.searchOp.dynamic +
|
||||||
|
htree_out_search->power.searchOp.dynamic;
|
||||||
|
|
||||||
power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage +
|
power_routing_to_bank.readOp.leakage +=
|
||||||
htree_in_data->power.readOp.leakage +
|
htree_in_add->power.readOp.leakage +
|
||||||
htree_out_data->power.readOp.leakage;
|
htree_in_data->power.readOp.leakage +
|
||||||
|
htree_out_data->power.readOp.leakage;
|
||||||
|
|
||||||
power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage +
|
power_routing_to_bank.readOp.gate_leakage +=
|
||||||
htree_in_data->power.readOp.gate_leakage +
|
htree_in_add->power.readOp.gate_leakage +
|
||||||
htree_out_data->power.readOp.gate_leakage;
|
htree_in_data->power.readOp.gate_leakage +
|
||||||
if (dp.fully_assoc || dp.pure_cam)
|
htree_out_data->power.readOp.gate_leakage;
|
||||||
{
|
if (dp.fully_assoc || dp.pure_cam) {
|
||||||
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
||||||
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
||||||
}
|
}
|
||||||
|
|
||||||
power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
|
power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
|
||||||
power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
|
power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
|
||||||
power.readOp.leakage += power_routing_to_bank.readOp.leakage;
|
power.readOp.leakage += power_routing_to_bank.readOp.leakage;
|
||||||
power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
|
power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
|
||||||
|
|
||||||
// calculate total write energy per access
|
// calculate total write energy per access
|
||||||
power.writeOp.dynamic = power.readOp.dynamic
|
power.writeOp.dynamic = power.readOp.dynamic
|
||||||
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
|
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
|
||||||
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
|
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
|
||||||
- power_routing_to_bank.readOp.dynamic
|
- power_routing_to_bank.readOp.dynamic
|
||||||
+ power_routing_to_bank.writeOp.dynamic
|
+ power_routing_to_bank.writeOp.dynamic
|
||||||
+ bank.htree_in_data->power.readOp.dynamic
|
+ bank.htree_in_data->power.readOp.dynamic
|
||||||
- bank.htree_out_data->power.readOp.dynamic;
|
- bank.htree_out_data->power.readOp.dynamic;
|
||||||
|
|
||||||
if (dp.is_dram == false)
|
if (dp.is_dram == false) {
|
||||||
{
|
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||||
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
dyn_read_energy_from_closed_page = power.readOp.dynamic;
|
dyn_read_energy_from_closed_page = power.readOp.dynamic;
|
||||||
dyn_read_energy_from_open_page = power.readOp.dynamic -
|
dyn_read_energy_from_open_page = power.readOp.dynamic -
|
||||||
(bank.mat.r_predec->power.readOp.dynamic +
|
(bank.mat.r_predec->power.readOp.dynamic +
|
||||||
bank.mat.power_row_decoders.readOp.dynamic +
|
bank.mat.power_row_decoders.readOp.dynamic +
|
||||||
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
|
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
|
||||||
bank.mat.power_sa.readOp.dynamic +
|
bank.mat.power_sa.readOp.dynamic +
|
||||||
bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
dyn_read_energy_remaining_words_in_burst =
|
dyn_read_energy_remaining_words_in_burst =
|
||||||
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
|
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
|
||||||
((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
|
||||||
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
|
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
|
||||||
bank.htree_out_data->power.readOp.dynamic +
|
bank.htree_out_data->power.readOp.dynamic +
|
||||||
power_routing_to_bank.readOp.dynamic);
|
power_routing_to_bank.readOp.dynamic);
|
||||||
dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
|
dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
|
||||||
dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
|
dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
|
||||||
|
|
||||||
activate_energy = htree_in_add->power.readOp.dynamic +
|
activate_energy = htree_in_add->power.readOp.dynamic +
|
||||||
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
|
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
|
||||||
(bank.mat.r_predec->power.readOp.dynamic +
|
(bank.mat.r_predec->power.readOp.dynamic +
|
||||||
bank.mat.power_row_decoders.readOp.dynamic +
|
bank.mat.power_row_decoders.readOp.dynamic +
|
||||||
bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
||||||
read_energy = (htree_in_add->power.readOp.dynamic +
|
read_energy = (htree_in_add->power.readOp.dynamic +
|
||||||
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
|
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
|
||||||
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
|
||||||
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
|
bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
|
||||||
bank.htree_out_data->power.readOp.dynamic +
|
bank.htree_out_data->power.readOp.dynamic +
|
||||||
htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
|
htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
|
||||||
write_energy = (htree_in_add->power.readOp.dynamic +
|
write_energy = (htree_in_add->power.readOp.dynamic +
|
||||||
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
|
bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
|
||||||
htree_in_data->power.readOp.dynamic +
|
htree_in_data->power.readOp.dynamic +
|
||||||
bank.htree_in_data->power.readOp.dynamic +
|
bank.htree_in_data->power.readOp.dynamic +
|
||||||
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
(bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
|
||||||
precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
|
precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
|
||||||
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
leak_power_subbank_closed_page =
|
leak_power_subbank_closed_page =
|
||||||
(bank.mat.r_predec->power.readOp.leakage +
|
(bank.mat.r_predec->power.readOp.leakage +
|
||||||
bank.mat.b_mux_predec->power.readOp.leakage +
|
bank.mat.b_mux_predec->power.readOp.leakage +
|
||||||
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
|
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
|
||||||
bank.mat.power_row_decoders.readOp.leakage +
|
bank.mat.power_row_decoders.readOp.leakage +
|
||||||
bank.mat.power_bit_mux_decoders.readOp.leakage +
|
bank.mat.power_bit_mux_decoders.readOp.leakage +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
|
||||||
bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
|
bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
leak_power_subbank_closed_page +=
|
leak_power_subbank_closed_page +=
|
||||||
(bank.mat.r_predec->power.readOp.gate_leakage +
|
(bank.mat.r_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.b_mux_predec->power.readOp.gate_leakage +
|
bank.mat.b_mux_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
|
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.power_row_decoders.readOp.gate_leakage +
|
bank.mat.power_row_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
|
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
|
||||||
//bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
|
//bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
leak_power_subbank_open_page =
|
leak_power_subbank_open_page =
|
||||||
(bank.mat.r_predec->power.readOp.leakage +
|
(bank.mat.r_predec->power.readOp.leakage +
|
||||||
bank.mat.b_mux_predec->power.readOp.leakage +
|
bank.mat.b_mux_predec->power.readOp.leakage +
|
||||||
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
|
bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
|
||||||
bank.mat.power_row_decoders.readOp.leakage +
|
bank.mat.power_row_decoders.readOp.leakage +
|
||||||
bank.mat.power_bit_mux_decoders.readOp.leakage +
|
bank.mat.power_bit_mux_decoders.readOp.leakage +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
|
||||||
bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
|
bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
leak_power_subbank_open_page +=
|
leak_power_subbank_open_page +=
|
||||||
(bank.mat.r_predec->power.readOp.gate_leakage +
|
(bank.mat.r_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.b_mux_predec->power.readOp.gate_leakage +
|
bank.mat.b_mux_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
|
bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
|
bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
|
||||||
bank.mat.power_row_decoders.readOp.gate_leakage +
|
bank.mat.power_row_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
|
bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
|
bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
|
||||||
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
|
bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
|
||||||
//bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
|
//bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
|
||||||
|
|
||||||
leak_power_request_and_reply_networks =
|
leak_power_request_and_reply_networks =
|
||||||
power_routing_to_bank.readOp.leakage +
|
power_routing_to_bank.readOp.leakage +
|
||||||
bank.htree_in_add->power.readOp.leakage +
|
bank.htree_in_add->power.readOp.leakage +
|
||||||
bank.htree_in_data->power.readOp.leakage +
|
bank.htree_in_data->power.readOp.leakage +
|
||||||
bank.htree_out_data->power.readOp.leakage;
|
bank.htree_out_data->power.readOp.leakage;
|
||||||
|
|
||||||
leak_power_request_and_reply_networks +=
|
leak_power_request_and_reply_networks +=
|
||||||
power_routing_to_bank.readOp.gate_leakage +
|
power_routing_to_bank.readOp.gate_leakage +
|
||||||
bank.htree_in_add->power.readOp.gate_leakage +
|
bank.htree_in_add->power.readOp.gate_leakage +
|
||||||
bank.htree_in_data->power.readOp.gate_leakage +
|
bank.htree_in_data->power.readOp.gate_leakage +
|
||||||
bank.htree_out_data->power.readOp.gate_leakage;
|
bank.htree_out_data->power.readOp.gate_leakage;
|
||||||
|
|
||||||
if (dp.fully_assoc || dp.pure_cam)
|
if (dp.fully_assoc || dp.pure_cam) {
|
||||||
{
|
|
||||||
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
|
||||||
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (dp.is_dram)
|
|
||||||
{ // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power
|
|
||||||
refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
|
|
||||||
bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
|
|
||||||
refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
|
|
||||||
refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
|
|
||||||
refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
|
||||||
refresh_power /= dp.dram_refresh_period;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (dp.is_tag == false)
|
|
||||||
{
|
|
||||||
power.readOp.dynamic = dyn_read_energy_from_closed_page;
|
|
||||||
power.writeOp.dynamic = dyn_read_energy_from_closed_page
|
|
||||||
- dyn_read_energy_remaining_words_in_burst
|
|
||||||
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
|
|
||||||
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
|
|
||||||
+ (power_routing_to_bank.writeOp.dynamic -
|
|
||||||
power_routing_to_bank.readOp.dynamic -
|
|
||||||
bank.htree_out_data->power.readOp.dynamic +
|
|
||||||
bank.htree_in_data->power.readOp.dynamic) *
|
|
||||||
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
|
|
||||||
|
|
||||||
if (dp.is_dram == false)
|
|
||||||
{
|
|
||||||
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// if DRAM, add refresh power to total leakage
|
|
||||||
if (dp.is_dram)
|
|
||||||
{
|
|
||||||
power.readOp.leakage += refresh_power;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: below should be avoided.
|
// if DRAM, add contribution of power spent in row predecoder drivers,
|
||||||
/*if (dp.is_main_mem)
|
// blocks and decoders to refresh power
|
||||||
{
|
if (dp.is_dram) {
|
||||||
power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
|
refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
|
||||||
}*/
|
bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
|
||||||
|
refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
|
||||||
|
refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||||
|
refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||||
|
refresh_power /= dp.dram_refresh_period;
|
||||||
|
}
|
||||||
|
|
||||||
assert(power.readOp.dynamic > 0);
|
|
||||||
assert(power.writeOp.dynamic > 0);
|
if (dp.is_tag == false) {
|
||||||
assert(power.readOp.leakage > 0);
|
power.readOp.dynamic = dyn_read_energy_from_closed_page;
|
||||||
|
power.writeOp.dynamic = dyn_read_energy_from_closed_page
|
||||||
|
- dyn_read_energy_remaining_words_in_burst
|
||||||
|
- bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
|
||||||
|
+ bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
|
||||||
|
+ (power_routing_to_bank.writeOp.dynamic -
|
||||||
|
power_routing_to_bank.readOp.dynamic -
|
||||||
|
bank.htree_out_data->power.readOp.dynamic +
|
||||||
|
bank.htree_in_data->power.readOp.dynamic) *
|
||||||
|
(MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
|
||||||
|
|
||||||
|
if (dp.is_dram == false) {
|
||||||
|
power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if DRAM, add refresh power to total leakage
|
||||||
|
if (dp.is_dram) {
|
||||||
|
power.readOp.leakage += refresh_power;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: below should be avoided.
|
||||||
|
/*if (dp.is_main_mem)
|
||||||
|
{
|
||||||
|
power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
assert(power.readOp.dynamic > 0);
|
||||||
|
assert(power.writeOp.dynamic > 0);
|
||||||
|
assert(power.readOp.leakage > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -40,9 +41,8 @@
|
||||||
#include "htree2.h"
|
#include "htree2.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
|
||||||
class UCA : public Component
|
class UCA : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
UCA(const DynamicParameter & dyn_p);
|
UCA(const DynamicParameter & dyn_p);
|
||||||
~UCA();
|
~UCA();
|
||||||
double compute_delays(double inrisetime); // returns outrisetime
|
double compute_delays(double inrisetime); // returns outrisetime
|
||||||
|
@ -66,7 +66,10 @@ class UCA : public Component
|
||||||
int num_do_b_bank;
|
int num_do_b_bank;
|
||||||
int num_si_b_bank;
|
int num_si_b_bank;
|
||||||
int num_so_b_bank;
|
int num_so_b_bank;
|
||||||
int RWP, ERP, EWP,SCHP;
|
int RWP;
|
||||||
|
int ERP;
|
||||||
|
int EWP;
|
||||||
|
int SCHP;
|
||||||
double area_all_dataramcells;
|
double area_all_dataramcells;
|
||||||
|
|
||||||
double dyn_read_energy_from_closed_page;
|
double dyn_read_energy_from_closed_page;
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT/CACTI
|
* McPAT/CACTI
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -43,9 +44,8 @@
|
||||||
#include "component.h"
|
#include "component.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
|
||||||
class Wire : public Component
|
class Wire : public Component {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
Wire(enum Wire_type wire_model, double len /* in u*/,
|
Wire(enum Wire_type wire_model, double len /* in u*/,
|
||||||
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
|
int nsense = 1/* no. of sense amps connected to the low-swing wire */,
|
||||||
double width_scaling = 1,
|
double width_scaling = 1,
|
||||||
|
@ -56,16 +56,16 @@ class Wire : public Component
|
||||||
~Wire();
|
~Wire();
|
||||||
|
|
||||||
Wire( double width_scaling = 1,
|
Wire( double width_scaling = 1,
|
||||||
double spacing_scaling = 1,
|
double spacing_scaling = 1,
|
||||||
enum Wire_placement wire_placement = outside_mat,
|
enum Wire_placement wire_placement = outside_mat,
|
||||||
double resistivity = CU_RESISTIVITY,
|
double resistivity = CU_RESISTIVITY,
|
||||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
||||||
); // should be used only once for initializing static members
|
); // should be used only once for initializing static members
|
||||||
void init_wire();
|
void init_wire();
|
||||||
|
|
||||||
void calculate_wire_stats();
|
void calculate_wire_stats();
|
||||||
void delay_optimal_wire();
|
void delay_optimal_wire();
|
||||||
double wire_cap(double len, bool call_from_outside=false);
|
double wire_cap(double len, bool call_from_outside = false);
|
||||||
double wire_res(double len);
|
double wire_res(double len);
|
||||||
void low_swing_model();
|
void low_swing_model();
|
||||||
double signal_fall_time();
|
double signal_fall_time();
|
||||||
|
@ -81,9 +81,8 @@ class Wire : public Component
|
||||||
double wire_length;
|
double wire_length;
|
||||||
double in_rise_time, out_rise_time;
|
double in_rise_time, out_rise_time;
|
||||||
|
|
||||||
void set_in_rise_time(double rt)
|
void set_in_rise_time(double rt) {
|
||||||
{
|
in_rise_time = rt;
|
||||||
in_rise_time = rt;
|
|
||||||
}
|
}
|
||||||
static Component global;
|
static Component global;
|
||||||
static Component global_5;
|
static Component global_5;
|
||||||
|
@ -95,10 +94,10 @@ class Wire : public Component
|
||||||
static double wire_spacing_init;
|
static double wire_spacing_init;
|
||||||
void print_wire();
|
void print_wire();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
int nsense; // no. of sense amps connected to a low-swing wire if it
|
int nsense; // no. of sense amps connected to a low-swing wire if it
|
||||||
// is broadcasting data to multiple destinations
|
// is broadcasting data to multiple destinations
|
||||||
// width and spacing scaling factor can be used
|
// width and spacing scaling factor can be used
|
||||||
// to model low level wires or special
|
// to model low level wires or special
|
||||||
// fat wires
|
// fat wires
|
||||||
|
|
65
ext/mcpat/common.h
Normal file
65
ext/mcpat/common.h
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Author: Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __COMMON_H__
|
||||||
|
#define __COMMON_H__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "xmlParser.h"
|
||||||
|
|
||||||
|
// Macro definitions to do string comparson to specific parameter/stat.
|
||||||
|
// Note: These macros assume node_name and value variables of type XMLCSTR
|
||||||
|
// to exist already.
|
||||||
|
#define STRCMP(var, str) else if (strcmp(var, str) == 0)
|
||||||
|
|
||||||
|
#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \
|
||||||
|
lhs = atoi(value)
|
||||||
|
|
||||||
|
#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \
|
||||||
|
lhs = atof(value)
|
||||||
|
|
||||||
|
#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \
|
||||||
|
lhs = string(value)
|
||||||
|
|
||||||
|
#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \
|
||||||
|
lhs = (etype)atoi(value)
|
||||||
|
|
||||||
|
|
||||||
|
// Constants shared across many system components
|
||||||
|
#define BITS_PER_BYTE 8.0
|
||||||
|
#define MIN_BUFFER_SIZE 64
|
||||||
|
// CAM structures do not have any associativity
|
||||||
|
#define CAM_ASSOC 0
|
||||||
|
|
||||||
|
#endif // __COMMON_H__
|
7604
ext/mcpat/core.cc
7604
ext/mcpat/core.cc
File diff suppressed because it is too large
Load diff
478
ext/mcpat/core.h
478
ext/mcpat/core.h
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -33,230 +34,305 @@
|
||||||
#ifndef CORE_H_
|
#ifndef CORE_H_
|
||||||
#define CORE_H_
|
#define CORE_H_
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "cacheunit.h"
|
||||||
#include "interconnect.h"
|
#include "interconnect.h"
|
||||||
#include "logic.h"
|
#include "logic.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "sharedcache.h"
|
|
||||||
|
|
||||||
class BranchPredictor :public Component {
|
// Macros used in the various core-related classes
|
||||||
public:
|
#define NUM_SOURCE_OPERANDS 2
|
||||||
|
#define NUM_INT_INST_SOURCE_OPERANDS 2
|
||||||
|
|
||||||
ParseXML *XML;
|
class BranchPredictorParameters {
|
||||||
int ithCore;
|
public:
|
||||||
InputParameter interface_ip;
|
int assoc;
|
||||||
CoreDynParam coredynp;
|
int nbanks;
|
||||||
double clockRate,executionTime;
|
int local_l1_predictor_size;
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
int local_l2_predictor_size;
|
||||||
ArrayST * globalBPT;
|
int local_predictor_entries;
|
||||||
ArrayST * localBPT;
|
int global_predictor_bits;
|
||||||
ArrayST * L1_localBPT;
|
int global_predictor_entries;
|
||||||
ArrayST * L2_localBPT;
|
int chooser_predictor_bits;
|
||||||
ArrayST * chooser;
|
int chooser_predictor_entries;
|
||||||
ArrayST * RAS;
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~BranchPredictor();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class BranchPredictor : public McPATComponent {
|
||||||
|
public:
|
||||||
|
ArrayST* globalBPT;
|
||||||
|
ArrayST* localBPT;
|
||||||
|
ArrayST* L1_localBPT;
|
||||||
|
ArrayST* L2_localBPT;
|
||||||
|
ArrayST* chooser;
|
||||||
|
ArrayST* RAS;
|
||||||
|
|
||||||
class InstFetchU :public Component {
|
InputParameter interface_ip;
|
||||||
public:
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
ParseXML *XML;
|
BranchPredictorParameters branch_pred_params;
|
||||||
int ithCore;
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
InputParameter interface_ip;
|
|
||||||
CoreDynParam coredynp;
|
|
||||||
double clockRate,executionTime;
|
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
enum Cache_policy cache_p;
|
|
||||||
InstCache icache;
|
|
||||||
ArrayST * IB;
|
|
||||||
ArrayST * BTB;
|
|
||||||
BranchPredictor * BPT;
|
|
||||||
inst_decoder * ID_inst;
|
|
||||||
inst_decoder * ID_operand;
|
|
||||||
inst_decoder * ID_misc;
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~InstFetchU();
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class SchedulerU :public Component {
|
|
||||||
public:
|
|
||||||
|
|
||||||
ParseXML *XML;
|
|
||||||
int ithCore;
|
|
||||||
InputParameter interface_ip;
|
|
||||||
CoreDynParam coredynp;
|
|
||||||
double clockRate,executionTime;
|
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
double Iw_height, fp_Iw_height,ROB_height;
|
|
||||||
ArrayST * int_inst_window;
|
|
||||||
ArrayST * fp_inst_window;
|
|
||||||
ArrayST * ROB;
|
|
||||||
selection_logic * instruction_selection;
|
|
||||||
bool exist;
|
bool exist;
|
||||||
|
|
||||||
SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
void computeEnergy(bool is_tdp=true);
|
const CoreParameters & _core_params,
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
const CoreStatistics & _core_stats,
|
||||||
~SchedulerU();
|
bool exsit = true);
|
||||||
|
void set_params_stats();
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~BranchPredictor();
|
||||||
};
|
};
|
||||||
|
|
||||||
class RENAMINGU :public Component {
|
class InstFetchParameters {
|
||||||
public:
|
public:
|
||||||
|
int btb_size;
|
||||||
ParseXML *XML;
|
int btb_block_size;
|
||||||
int ithCore;
|
int btb_assoc;
|
||||||
InputParameter interface_ip;
|
int btb_num_banks;
|
||||||
double clockRate,executionTime;
|
int btb_latency;
|
||||||
CoreDynParam coredynp;
|
int btb_throughput;
|
||||||
ArrayST * iFRAT;
|
int btb_rw_ports;
|
||||||
ArrayST * fFRAT;
|
|
||||||
ArrayST * iRRAT;
|
|
||||||
ArrayST * fRRAT;
|
|
||||||
ArrayST * ifreeL;
|
|
||||||
ArrayST * ffreeL;
|
|
||||||
dep_resource_conflict_check * idcl;
|
|
||||||
dep_resource_conflict_check * fdcl;
|
|
||||||
ArrayST * RAHT;//register alias history table Used to store GC
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
|
|
||||||
RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~RENAMINGU();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class LoadStoreU :public Component {
|
class InstFetchStatistics {
|
||||||
public:
|
public:
|
||||||
|
double btb_read_accesses;
|
||||||
ParseXML *XML;
|
double btb_write_accesses;
|
||||||
int ithCore;
|
|
||||||
InputParameter interface_ip;
|
|
||||||
CoreDynParam coredynp;
|
|
||||||
enum Cache_policy cache_p;
|
|
||||||
double clockRate,executionTime;
|
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
double lsq_height;
|
|
||||||
DataCache dcache;
|
|
||||||
ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
|
|
||||||
ArrayST * LoadQ;
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~LoadStoreU();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class MemManU :public Component {
|
class InstFetchU : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
|
CacheUnit* icache;
|
||||||
|
ArrayST* IB;
|
||||||
|
ArrayST* BTB;
|
||||||
|
BranchPredictor* BPT;
|
||||||
|
InstructionDecoder* ID_inst;
|
||||||
|
InstructionDecoder* ID_operand;
|
||||||
|
InstructionDecoder* ID_misc;
|
||||||
|
|
||||||
ParseXML *XML;
|
InputParameter interface_ip;
|
||||||
int ithCore;
|
CoreParameters core_params;
|
||||||
InputParameter interface_ip;
|
CoreStatistics core_stats;
|
||||||
CoreDynParam coredynp;
|
InstFetchParameters inst_fetch_params;
|
||||||
double clockRate,executionTime;
|
InstFetchStatistics inst_fetch_stats;
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
ArrayST * itlb;
|
enum Cache_policy cache_p;
|
||||||
ArrayST * dtlb;
|
bool exist;
|
||||||
bool exist;
|
|
||||||
|
|
||||||
MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
void computeEnergy(bool is_tdp=true);
|
const CoreParameters & _core_params,
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
const CoreStatistics & _core_stats,
|
||||||
~MemManU();
|
bool exsit = true);
|
||||||
};
|
void set_params_stats();
|
||||||
|
void computeEnergy();
|
||||||
class RegFU :public Component {
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
public:
|
~InstFetchU();
|
||||||
|
|
||||||
ParseXML *XML;
|
|
||||||
int ithCore;
|
|
||||||
InputParameter interface_ip;
|
|
||||||
CoreDynParam coredynp;
|
|
||||||
double clockRate,executionTime;
|
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
double int_regfile_height, fp_regfile_height;
|
|
||||||
ArrayST * IRF;
|
|
||||||
ArrayST * FRF;
|
|
||||||
ArrayST * RFWIN;
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~RegFU();
|
|
||||||
};
|
|
||||||
|
|
||||||
class EXECU :public Component {
|
|
||||||
public:
|
|
||||||
|
|
||||||
ParseXML *XML;
|
|
||||||
int ithCore;
|
|
||||||
InputParameter interface_ip;
|
|
||||||
double clockRate,executionTime;
|
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
double lsq_height;
|
|
||||||
CoreDynParam coredynp;
|
|
||||||
RegFU * rfu;
|
|
||||||
SchedulerU * scheu;
|
|
||||||
FunctionalUnit * fp_u;
|
|
||||||
FunctionalUnit * exeu;
|
|
||||||
FunctionalUnit * mul;
|
|
||||||
interconnect * int_bypass;
|
|
||||||
interconnect * intTagBypass;
|
|
||||||
interconnect * int_mul_bypass;
|
|
||||||
interconnect * intTag_mul_Bypass;
|
|
||||||
interconnect * fp_bypass;
|
|
||||||
interconnect * fpTagBypass;
|
|
||||||
|
|
||||||
Component bypass;
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true);
|
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~EXECU();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class Core :public Component {
|
class SchedulerU : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
|
static int ROB_STATUS_BITS;
|
||||||
|
|
||||||
ParseXML *XML;
|
ArrayST* int_inst_window;
|
||||||
int ithCore;
|
ArrayST* fp_inst_window;
|
||||||
InputParameter interface_ip;
|
ArrayST* ROB;
|
||||||
double clockRate,executionTime;
|
selection_logic* int_instruction_selection;
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
selection_logic* fp_instruction_selection;
|
||||||
InstFetchU * ifu;
|
|
||||||
LoadStoreU * lsu;
|
InputParameter interface_ip;
|
||||||
MemManU * mmu;
|
CoreParameters core_params;
|
||||||
EXECU * exu;
|
CoreStatistics core_stats;
|
||||||
RENAMINGU * rnu;
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
Pipeline * corepipe;
|
double Iw_height, fp_Iw_height, ROB_height;
|
||||||
UndiffCore * undiffCore;
|
bool exist;
|
||||||
SharedCache * l2cache;
|
|
||||||
CoreDynParam coredynp;
|
SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
//full_decoder inst_decoder;
|
const CoreParameters & _core_params,
|
||||||
//clock_network clockNetwork;
|
const CoreStatistics & _core_stats,
|
||||||
Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_);
|
bool exist_ = true);
|
||||||
void set_core_param();
|
void computeEnergy();
|
||||||
void computeEnergy(bool is_tdp=true);
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
~SchedulerU();
|
||||||
~Core();
|
};
|
||||||
|
|
||||||
|
class RENAMINGU : public McPATComponent {
|
||||||
|
public:
|
||||||
|
ArrayST* iFRAT;
|
||||||
|
ArrayST* fFRAT;
|
||||||
|
ArrayST* iRRAT;
|
||||||
|
ArrayST* fRRAT;
|
||||||
|
ArrayST* ifreeL;
|
||||||
|
ArrayST* ffreeL;
|
||||||
|
dep_resource_conflict_check* idcl;
|
||||||
|
dep_resource_conflict_check* fdcl;
|
||||||
|
ArrayST* RAHT;
|
||||||
|
|
||||||
|
InputParameter interface_ip;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
bool exist;
|
||||||
|
|
||||||
|
RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
|
const CoreParameters & _core_params,
|
||||||
|
const CoreStatistics & _core_stats,
|
||||||
|
bool exist_ = true);
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~RENAMINGU();
|
||||||
|
};
|
||||||
|
|
||||||
|
class LoadStoreU : public McPATComponent {
|
||||||
|
public:
|
||||||
|
CacheUnit* dcache;
|
||||||
|
ArrayST* LSQ;
|
||||||
|
ArrayST* LoadQ;
|
||||||
|
|
||||||
|
InputParameter interface_ip;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
enum Cache_policy cache_p;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
double lsq_height;
|
||||||
|
bool exist;
|
||||||
|
|
||||||
|
LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
|
const CoreParameters & _core_params,
|
||||||
|
const CoreStatistics & _core_stats,
|
||||||
|
bool exist_ = true);
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~LoadStoreU();
|
||||||
|
};
|
||||||
|
|
||||||
|
class MemoryManagementParams {
|
||||||
|
public:
|
||||||
|
int itlb_number_entries;
|
||||||
|
double itlb_latency;
|
||||||
|
double itlb_throughput;
|
||||||
|
int itlb_assoc;
|
||||||
|
int itlb_nbanks;
|
||||||
|
int dtlb_number_entries;
|
||||||
|
double dtlb_latency;
|
||||||
|
double dtlb_throughput;
|
||||||
|
int dtlb_assoc;
|
||||||
|
int dtlb_nbanks;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MemoryManagementStats {
|
||||||
|
public:
|
||||||
|
double itlb_total_accesses;
|
||||||
|
double itlb_total_misses;
|
||||||
|
double itlb_conflicts;
|
||||||
|
double dtlb_read_accesses;
|
||||||
|
double dtlb_read_misses;
|
||||||
|
double dtlb_write_accesses;
|
||||||
|
double dtlb_write_misses;
|
||||||
|
double dtlb_conflicts;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MemManU : public McPATComponent {
|
||||||
|
public:
|
||||||
|
ArrayST* itlb;
|
||||||
|
ArrayST* dtlb;
|
||||||
|
|
||||||
|
InputParameter interface_ip;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
MemoryManagementParams mem_man_params;
|
||||||
|
MemoryManagementStats mem_man_stats;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
bool exist;
|
||||||
|
|
||||||
|
MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
|
const CoreParameters & _core_params,
|
||||||
|
const CoreStatistics & _core_stats, bool exist_ = true);
|
||||||
|
void set_params_stats();
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~MemManU();
|
||||||
|
};
|
||||||
|
|
||||||
|
class RegFU : public McPATComponent {
|
||||||
|
public:
|
||||||
|
static int RFWIN_ACCESS_MULTIPLIER;
|
||||||
|
|
||||||
|
ArrayST* IRF;
|
||||||
|
ArrayST* FRF;
|
||||||
|
ArrayST* RFWIN;
|
||||||
|
|
||||||
|
InputParameter interface_ip;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
double int_regfile_height, fp_regfile_height;
|
||||||
|
bool exist;
|
||||||
|
|
||||||
|
RegFU(XMLNode* _xml_data,
|
||||||
|
InputParameter* interface_ip_, const CoreParameters & _core_params,
|
||||||
|
const CoreStatistics & _core_stats,
|
||||||
|
bool exist_ = true);
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~RegFU();
|
||||||
|
};
|
||||||
|
|
||||||
|
class EXECU : public McPATComponent {
|
||||||
|
public:
|
||||||
|
RegFU* rfu;
|
||||||
|
SchedulerU* scheu;
|
||||||
|
FunctionalUnit* fp_u;
|
||||||
|
FunctionalUnit* exeu;
|
||||||
|
FunctionalUnit* mul;
|
||||||
|
Interconnect* int_bypass;
|
||||||
|
Interconnect* intTagBypass;
|
||||||
|
Interconnect* int_mul_bypass;
|
||||||
|
Interconnect* intTag_mul_Bypass;
|
||||||
|
Interconnect* fp_bypass;
|
||||||
|
Interconnect* fpTagBypass;
|
||||||
|
|
||||||
|
InputParameter interface_ip;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
double lsq_height;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
bool exist;
|
||||||
|
|
||||||
|
EXECU(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
|
double lsq_height_, const CoreParameters & _core_params,
|
||||||
|
const CoreStatistics & _core_stats, bool exist_ = true);
|
||||||
|
void computeEnergy();
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
|
~EXECU();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Core : public McPATComponent {
|
||||||
|
public:
|
||||||
|
InstFetchU* ifu;
|
||||||
|
LoadStoreU* lsu;
|
||||||
|
MemManU* mmu;
|
||||||
|
EXECU* exu;
|
||||||
|
RENAMINGU* rnu;
|
||||||
|
Pipeline* corepipe;
|
||||||
|
UndiffCore* undiffCore;
|
||||||
|
CacheUnit* l2cache;
|
||||||
|
|
||||||
|
int ithCore;
|
||||||
|
InputParameter interface_ip;
|
||||||
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
|
CoreParameters core_params;
|
||||||
|
CoreStatistics core_stats;
|
||||||
|
|
||||||
|
// TODO: Migrate component ID handling into the XML data to remove this
|
||||||
|
// ithCore variable
|
||||||
|
Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_);
|
||||||
|
void initialize_params();
|
||||||
|
void initialize_stats();
|
||||||
|
void set_core_param();
|
||||||
|
void computeEnergy();
|
||||||
|
~Core();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* CORE_H_ */
|
#endif /* CORE_H_ */
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -33,130 +34,190 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "globalvar.h"
|
#include "basic_components.h"
|
||||||
#include "interconnect.h"
|
#include "interconnect.h"
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
interconnect::interconnect(
|
double Interconnect::width_scaling_threshold = 3.0;
|
||||||
string name_,
|
|
||||||
enum Device_ty device_ty_,
|
|
||||||
double base_w, double base_h,
|
|
||||||
int data_w, double len,const InputParameter *configure_interface,
|
|
||||||
int start_wiring_level_,
|
|
||||||
bool pipelinable_ ,
|
|
||||||
double route_over_perc_ ,
|
|
||||||
bool opt_local_,
|
|
||||||
enum Core_type core_ty_,
|
|
||||||
enum Wire_type wire_model,
|
|
||||||
double width_s, double space_s,
|
|
||||||
TechnologyParameter::DeviceType *dt
|
|
||||||
)
|
|
||||||
:name(name_),
|
|
||||||
device_ty(device_ty_),
|
|
||||||
in_rise_time(0),
|
|
||||||
out_rise_time(0),
|
|
||||||
base_width(base_w),
|
|
||||||
base_height(base_h),
|
|
||||||
data_width(data_w),
|
|
||||||
wt(wire_model),
|
|
||||||
width_scaling(width_s),
|
|
||||||
space_scaling(space_s),
|
|
||||||
start_wiring_level(start_wiring_level_),
|
|
||||||
length(len),
|
|
||||||
//interconnect_latency(1e-12),
|
|
||||||
//interconnect_throughput(1e-12),
|
|
||||||
opt_local(opt_local_),
|
|
||||||
core_ty(core_ty_),
|
|
||||||
pipelinable(pipelinable_),
|
|
||||||
route_over_perc(route_over_perc_),
|
|
||||||
deviceType(dt)
|
|
||||||
{
|
|
||||||
|
|
||||||
wt = Global;
|
Interconnect::Interconnect(XMLNode* _xml_data, string name_,
|
||||||
l_ip=*configure_interface;
|
enum Device_ty device_ty_, double base_w,
|
||||||
local_result = init_interface(&l_ip);
|
double base_h, int data_w,
|
||||||
|
double len,
|
||||||
|
const InputParameter *configure_interface,
|
||||||
|
int start_wiring_level_, double _clockRate,
|
||||||
|
bool pipelinable_, double route_over_perc_,
|
||||||
|
bool opt_local_, enum Core_type core_ty_,
|
||||||
|
enum Wire_type wire_model,
|
||||||
|
double width_s, double space_s,
|
||||||
|
TechnologyParameter::DeviceType *dt)
|
||||||
|
: McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0),
|
||||||
|
out_rise_time(0), base_width(base_w), base_height(base_h),
|
||||||
|
data_width(data_w), wt(wire_model), width_scaling(width_s),
|
||||||
|
space_scaling(space_s), start_wiring_level(start_wiring_level_),
|
||||||
|
length(len), opt_local(opt_local_), core_ty(core_ty_),
|
||||||
|
pipelinable(pipelinable_), route_over_perc(route_over_perc_),
|
||||||
|
deviceType(dt) {
|
||||||
|
name = name_;
|
||||||
|
clockRate = _clockRate;
|
||||||
|
l_ip = *configure_interface;
|
||||||
|
local_result = init_interface(&l_ip, name);
|
||||||
|
|
||||||
|
max_unpipelined_link_delay = 0;
|
||||||
max_unpipelined_link_delay = 0; //TODO
|
min_w_nmos = g_tp.min_w_nmos_;
|
||||||
min_w_nmos = g_tp.min_w_nmos_;
|
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
||||||
min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
latency = l_ip.latency;
|
latency = l_ip.latency;
|
||||||
throughput = l_ip.throughput;
|
throughput = l_ip.throughput;
|
||||||
latency_overflow=false;
|
latency_overflow = false;
|
||||||
throughput_overflow=false;
|
throughput_overflow = false;
|
||||||
|
|
||||||
/*
|
if (pipelinable == false) {
|
||||||
* TODO: Add wiring option from semi-global to global automatically
|
//Non-pipelinable wires, such as bypass logic, care latency
|
||||||
* And directly jump to global if semi-global cannot satisfy timing
|
calcWireData();
|
||||||
* Fat wires only available for global wires, thus
|
if (opt_for_clk && opt_local) {
|
||||||
* if signal wiring layer starts from semi-global,
|
while (delay > latency &&
|
||||||
* the next layer up will be global, i.e., semi-global does
|
width_scaling < width_scaling_threshold) {
|
||||||
* not have fat wires.
|
width_scaling *= 2;
|
||||||
*/
|
space_scaling *= 2;
|
||||||
if (pipelinable == false)
|
Wire winit(width_scaling, space_scaling);
|
||||||
//Non-pipelinable wires, such as bypass logic, care latency
|
calcWireData();
|
||||||
{
|
}
|
||||||
compute();
|
if (delay > latency) {
|
||||||
if (opt_for_clk && opt_local)
|
latency_overflow = true;
|
||||||
{
|
}
|
||||||
while (delay > latency && width_scaling<3.0)
|
}
|
||||||
{
|
} else {
|
||||||
width_scaling *= 2;
|
//Pipelinable wires, such as bus, does not care latency but throughput
|
||||||
space_scaling *= 2;
|
calcWireData();
|
||||||
Wire winit(width_scaling, space_scaling);
|
if (opt_for_clk && opt_local) {
|
||||||
compute();
|
while (delay > throughput &&
|
||||||
}
|
width_scaling < width_scaling_threshold) {
|
||||||
if (delay > latency)
|
width_scaling *= 2;
|
||||||
{
|
space_scaling *= 2;
|
||||||
latency_overflow=true;
|
Wire winit(width_scaling, space_scaling);
|
||||||
}
|
calcWireData();
|
||||||
}
|
}
|
||||||
}
|
if (delay > throughput) {
|
||||||
else //Pipelinable wires, such as bus, does not care latency but throughput
|
// insert pipeline stages
|
||||||
{
|
num_pipe_stages = (int)ceil(delay / throughput);
|
||||||
/*
|
assert(num_pipe_stages > 0);
|
||||||
* TODO: Add pipe regs power, area, and timing;
|
delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay;
|
||||||
* Pipelinable wires optimize latency first.
|
}
|
||||||
*/
|
}
|
||||||
compute();
|
}
|
||||||
if (opt_for_clk && opt_local)
|
|
||||||
{
|
power_bit = power;
|
||||||
while (delay > throughput && width_scaling<3.0)
|
power.readOp.dynamic *= data_width;
|
||||||
{
|
power.readOp.leakage *= data_width;
|
||||||
width_scaling *= 2;
|
power.readOp.gate_leakage *= data_width;
|
||||||
space_scaling *= 2;
|
area.set_area(area.get_area()*data_width);
|
||||||
Wire winit(width_scaling, space_scaling);
|
no_device_under_wire_area.h *= data_width;
|
||||||
compute();
|
|
||||||
}
|
if (latency_overflow == true) {
|
||||||
if (delay > throughput)
|
cout << "Warning: " << name
|
||||||
// insert pipeline stages
|
<< " wire structure cannot satisfy latency constraint." << endl;
|
||||||
{
|
}
|
||||||
num_pipe_stages = (int)ceil(delay/throughput);
|
|
||||||
assert(num_pipe_stages>0);
|
assert(power.readOp.dynamic > 0);
|
||||||
delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay;
|
assert(power.readOp.leakage > 0);
|
||||||
}
|
assert(power.readOp.gate_leakage > 0);
|
||||||
}
|
|
||||||
}
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(device_ty, core_ty);
|
||||||
|
|
||||||
|
double sckRation = g_tp.sckt_co_eff;
|
||||||
|
power.readOp.dynamic *= sckRation;
|
||||||
|
power.writeOp.dynamic *= sckRation;
|
||||||
|
power.searchOp.dynamic *= sckRation;
|
||||||
|
|
||||||
|
power.readOp.longer_channel_leakage =
|
||||||
|
power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
|
||||||
|
//Only global wires has the option to choose whether routing over or not
|
||||||
|
if (pipelinable)
|
||||||
|
area.set_area(area.get_area() * route_over_perc +
|
||||||
|
no_device_under_wire_area.get_area() *
|
||||||
|
(1 - route_over_perc));
|
||||||
|
|
||||||
|
Wire wreset();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
Interconnect::calcWireData() {
|
||||||
|
|
||||||
|
Wire *wtemp1 = 0;
|
||||||
|
wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
|
||||||
|
delay = wtemp1->delay;
|
||||||
|
power.readOp.dynamic = wtemp1->power.readOp.dynamic;
|
||||||
|
power.readOp.leakage = wtemp1->power.readOp.leakage;
|
||||||
|
power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
|
||||||
|
|
||||||
|
area.set_area(wtemp1->area.get_area());
|
||||||
|
no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
|
||||||
|
no_device_under_wire_area.w = length;
|
||||||
|
|
||||||
|
if (wtemp1)
|
||||||
|
delete wtemp1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Interconnect::computeEnergy() {
|
||||||
|
double pppm_t[4] = {1, 1, 1, 1};
|
||||||
|
|
||||||
|
// Compute TDP
|
||||||
|
power_t.reset();
|
||||||
|
set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle,
|
||||||
|
int_params.active_ports, int_params.active_ports,
|
||||||
|
int_params.active_ports * int_stats.duty_cycle);
|
||||||
|
power_t = power * pppm_t;
|
||||||
|
|
||||||
|
rt_power.reset();
|
||||||
|
set_pppm(pppm_t, int_stats.accesses, int_params.active_ports,
|
||||||
|
int_params.active_ports, int_stats.accesses);
|
||||||
|
rt_power = power * pppm_t;
|
||||||
|
|
||||||
|
output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate;
|
||||||
|
output_data.subthreshold_leakage_power = power_t.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = power_t.readOp.gate_leakage;
|
||||||
|
output_data.runtime_dynamic_energy = rt_power.readOp.dynamic;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Interconnect::computeArea() {
|
||||||
|
output_data.area = area.get_area() / 1e6;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
Interconnect::set_params_stats(double active_ports,
|
||||||
|
double duty_cycle, double accesses) {
|
||||||
|
int_params.active_ports = active_ports;
|
||||||
|
int_stats.duty_cycle = duty_cycle;
|
||||||
|
int_stats.accesses = accesses;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Interconnect::leakage_feedback(double temperature) {
|
||||||
|
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
|
||||||
|
uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy
|
||||||
|
|
||||||
|
calcWireData();
|
||||||
|
|
||||||
power_bit = power;
|
power_bit = power;
|
||||||
power.readOp.dynamic *= data_width;
|
power.readOp.dynamic *= data_width;
|
||||||
power.readOp.leakage *= data_width;
|
power.readOp.leakage *= data_width;
|
||||||
power.readOp.gate_leakage *= data_width;
|
power.readOp.gate_leakage *= data_width;
|
||||||
area.set_area(area.get_area()*data_width);
|
|
||||||
no_device_under_wire_area.h *= data_width;
|
|
||||||
|
|
||||||
if (latency_overflow==true)
|
|
||||||
cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl;
|
|
||||||
|
|
||||||
|
|
||||||
assert(power.readOp.dynamic > 0);
|
assert(power.readOp.dynamic > 0);
|
||||||
assert(power.readOp.leakage > 0);
|
assert(power.readOp.leakage > 0);
|
||||||
assert(power.readOp.gate_leakage > 0);
|
assert(power.readOp.gate_leakage > 0);
|
||||||
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(device_ty,core_ty);
|
||||||
|
|
||||||
double sckRation = g_tp.sckt_co_eff;
|
double sckRation = g_tp.sckt_co_eff;
|
||||||
power.readOp.dynamic *= sckRation;
|
power.readOp.dynamic *= sckRation;
|
||||||
|
@ -164,59 +225,6 @@ interconnect::interconnect(
|
||||||
power.searchOp.dynamic *= sckRation;
|
power.searchOp.dynamic *= sckRation;
|
||||||
|
|
||||||
power.readOp.longer_channel_leakage =
|
power.readOp.longer_channel_leakage =
|
||||||
power.readOp.leakage*long_channel_device_reduction;
|
power.readOp.leakage*long_channel_device_reduction;
|
||||||
|
|
||||||
if (pipelinable)//Only global wires has the option to choose whether routing over or not
|
|
||||||
area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc));
|
|
||||||
|
|
||||||
Wire wreset();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
interconnect::compute()
|
|
||||||
{
|
|
||||||
|
|
||||||
Wire *wtemp1 = 0;
|
|
||||||
wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling);
|
|
||||||
delay = wtemp1->delay;
|
|
||||||
power.readOp.dynamic = wtemp1->power.readOp.dynamic;
|
|
||||||
power.readOp.leakage = wtemp1->power.readOp.leakage;
|
|
||||||
power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage;
|
|
||||||
|
|
||||||
area.set_area(wtemp1->area.get_area());
|
|
||||||
no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing);
|
|
||||||
no_device_under_wire_area.w = length;
|
|
||||||
|
|
||||||
if (wtemp1)
|
|
||||||
delete wtemp1;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void interconnect::leakage_feedback(double temperature)
|
|
||||||
{
|
|
||||||
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
|
|
||||||
uca_org_t init_result = init_interface(&l_ip); // init_result is dummy
|
|
||||||
|
|
||||||
compute();
|
|
||||||
|
|
||||||
power_bit = power;
|
|
||||||
power.readOp.dynamic *= data_width;
|
|
||||||
power.readOp.leakage *= data_width;
|
|
||||||
power.readOp.gate_leakage *= data_width;
|
|
||||||
|
|
||||||
assert(power.readOp.dynamic > 0);
|
|
||||||
assert(power.readOp.leakage > 0);
|
|
||||||
assert(power.readOp.gate_leakage > 0);
|
|
||||||
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
|
||||||
|
|
||||||
double sckRation = g_tp.sckt_co_eff;
|
|
||||||
power.readOp.dynamic *= sckRation;
|
|
||||||
power.writeOp.dynamic *= sckRation;
|
|
||||||
power.searchOp.dynamic *= sckRation;
|
|
||||||
|
|
||||||
power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -42,46 +43,31 @@
|
||||||
#include "subarray.h"
|
#include "subarray.h"
|
||||||
#include "wire.h"
|
#include "wire.h"
|
||||||
|
|
||||||
// leakge power includes entire htree in a bank (when uca_tree == false)
|
class InterconnectParameters {
|
||||||
// leakge power includes only part to one bank when uca_tree == true
|
public:
|
||||||
|
double active_ports;
|
||||||
|
};
|
||||||
|
|
||||||
class interconnect : public Component
|
class InterconnectStatistics {
|
||||||
{
|
public:
|
||||||
public:
|
double duty_cycle;
|
||||||
interconnect(
|
double accesses;
|
||||||
string name_,
|
};
|
||||||
enum Device_ty device_ty_,
|
|
||||||
double base_w, double base_h, int data_w, double len,
|
|
||||||
const InputParameter *configure_interface, int start_wiring_level_,
|
|
||||||
bool pipelinable_ = false,
|
|
||||||
double route_over_perc_ =0.5,
|
|
||||||
bool opt_local_=true,
|
|
||||||
enum Core_type core_ty_=Inorder,
|
|
||||||
enum Wire_type wire_model=Global,
|
|
||||||
double width_s=1.0, double space_s=1.0,
|
|
||||||
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
|
||||||
);
|
|
||||||
|
|
||||||
~interconnect() {};
|
class Interconnect : public McPATComponent {
|
||||||
|
public:
|
||||||
|
static double width_scaling_threshold;
|
||||||
|
|
||||||
void compute();
|
enum Device_ty device_ty;
|
||||||
string name;
|
|
||||||
enum Device_ty device_ty;
|
|
||||||
double in_rise_time, out_rise_time;
|
double in_rise_time, out_rise_time;
|
||||||
InputParameter l_ip;
|
InputParameter l_ip;
|
||||||
uca_org_t local_result;
|
uca_org_t local_result;
|
||||||
Area no_device_under_wire_area;
|
Area no_device_under_wire_area;
|
||||||
void set_in_rise_time(double rt)
|
|
||||||
{
|
|
||||||
in_rise_time = rt;
|
|
||||||
}
|
|
||||||
|
|
||||||
void leakage_feedback(double temperature);
|
|
||||||
double max_unpipelined_link_delay;
|
double max_unpipelined_link_delay;
|
||||||
powerDef power_bit;
|
powerDef power_bit;
|
||||||
|
|
||||||
double wire_bw;
|
double wire_bw;
|
||||||
double init_wire_bw; // bus width at root
|
double init_wire_bw;
|
||||||
double base_width;
|
double base_width;
|
||||||
double base_height;
|
double base_height;
|
||||||
int data_width;
|
int data_width;
|
||||||
|
@ -92,19 +78,39 @@ class interconnect : public Component
|
||||||
double min_w_nmos;
|
double min_w_nmos;
|
||||||
double min_w_pmos;
|
double min_w_pmos;
|
||||||
double latency, throughput;
|
double latency, throughput;
|
||||||
bool latency_overflow;
|
bool latency_overflow;
|
||||||
bool throughput_overflow;
|
bool throughput_overflow;
|
||||||
double interconnect_latency;
|
double interconnect_latency;
|
||||||
double interconnect_throughput;
|
double interconnect_throughput;
|
||||||
bool opt_local;
|
bool opt_local;
|
||||||
enum Core_type core_ty;
|
enum Core_type core_ty;
|
||||||
bool pipelinable;
|
bool pipelinable;
|
||||||
double route_over_perc;
|
double route_over_perc;
|
||||||
int num_pipe_stages;
|
int num_pipe_stages;
|
||||||
|
TechnologyParameter::DeviceType* deviceType;
|
||||||
private:
|
InterconnectParameters int_params;
|
||||||
TechnologyParameter::DeviceType *deviceType;
|
InterconnectStatistics int_stats;
|
||||||
|
|
||||||
|
Interconnect(XMLNode* _xml_data, string name_,
|
||||||
|
enum Device_ty device_ty_, double base_w,
|
||||||
|
double base_h, int data_w, double len,
|
||||||
|
const InputParameter *configure_interface,
|
||||||
|
int start_wiring_level_,
|
||||||
|
double _clockRate = 0.0f,
|
||||||
|
bool pipelinable_ = false, double route_over_perc_ = 0.5,
|
||||||
|
bool opt_local_ = true, enum Core_type core_ty_ = Inorder,
|
||||||
|
enum Wire_type wire_model = Global, double width_s = 1.0,
|
||||||
|
double space_s = 1.0,
|
||||||
|
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global));
|
||||||
|
private:
|
||||||
|
void calcWireData();
|
||||||
|
public:
|
||||||
|
void computeArea();
|
||||||
|
void computeEnergy();
|
||||||
|
void set_params_stats(double active_ports,
|
||||||
|
double duty_cycle, double accesses);
|
||||||
|
void leakage_feedback(double temperature);
|
||||||
|
~Interconnect() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -34,14 +35,12 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "basic_circuit.h"
|
#include "basic_circuit.h"
|
||||||
#include "basic_components.h"
|
#include "common.h"
|
||||||
#include "const.h"
|
#include "const.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "iocontrollers.h"
|
#include "iocontrollers.h"
|
||||||
#include "logic.h"
|
#include "logic.h"
|
||||||
#include "parameter.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
SUN Niagara 2 I/O power analysis:
|
SUN Niagara 2 I/O power analysis:
|
||||||
|
@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_)
|
||||||
:XML(XML_interface),
|
: McPATComponent(_xml_data, interface_ip_) {
|
||||||
interface_ip(*interface_ip_)
|
name = "NIU";
|
||||||
{
|
set_niu_param();
|
||||||
local_result = init_interface(&interface_ip);
|
}
|
||||||
|
|
||||||
double frontend_area, phy_area, mac_area, SerDer_area;
|
void NIUController::computeArea() {
|
||||||
double frontend_dyn, mac_dyn, SerDer_dyn;
|
double mac_area;
|
||||||
double frontend_gates, mac_gates, SerDer_gates;
|
double frontend_area;
|
||||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
double SerDer_area;
|
||||||
double NMOS_sizing, PMOS_sizing;
|
|
||||||
|
|
||||||
set_niu_param();
|
if (niup.type == 0) { //high performance NIU
|
||||||
|
//Area estimation based on average of die photo from Niagara 2 and
|
||||||
|
//Cadence ChipEstimate using 65nm.
|
||||||
|
mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
//Area estimation based on average of die photo from Niagara 2, ISSCC
|
||||||
|
//"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
|
||||||
|
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface
|
||||||
|
//With Robust VCO Tuning Technique" Frontend is PCS
|
||||||
|
frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 *
|
||||||
|
(interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065);
|
||||||
|
//Area estimation based on average of die photo from Niagara 2 and
|
||||||
|
//Cadence ChipEstimate hard IP @65nm.
|
||||||
|
//SerDer is very hard to scale
|
||||||
|
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um /
|
||||||
|
0.065);//* (interface_ip.F_sz_um/0.065);
|
||||||
|
} else {
|
||||||
|
//Low power implementations are mostly from Cadence ChipEstimator;
|
||||||
|
//Ignore the multiple IP effect
|
||||||
|
// ---When there are multiple IP (same kind or not) selected, Cadence
|
||||||
|
//ChipEstimator results are not a simple summation of all IPs.
|
||||||
|
//Ignore this effect
|
||||||
|
mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer
|
||||||
|
SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um/0.065);
|
||||||
|
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet
|
||||||
|
//Transceiver and XAUI Interface With Robust VCO Tuning Technique"
|
||||||
|
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can
|
||||||
|
//scale perfectly with the technology
|
||||||
|
}
|
||||||
|
|
||||||
if (niup.type == 0) //high performance NIU
|
//total area
|
||||||
{
|
output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6;
|
||||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm.
|
|
||||||
mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
//Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS"
|
|
||||||
//and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS
|
|
||||||
frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
|
|
||||||
//SerDer is very hard to scale
|
|
||||||
SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
|
|
||||||
phy_area = frontend_area + SerDer_area;
|
|
||||||
//total area
|
|
||||||
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
|
|
||||||
//Power
|
|
||||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
|
||||||
mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
|
||||||
//Cadence ChipEstimate using 65nm soft IP;
|
|
||||||
frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
|
|
||||||
//according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
|
|
||||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
|
||||||
SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
|
||||||
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
|
|
||||||
|
|
||||||
//Cadence ChipEstimate using 65nm
|
|
||||||
mac_gates = 111700;
|
|
||||||
frontend_gates = 320000;
|
|
||||||
SerDer_gates = 200000;
|
|
||||||
NMOS_sizing = 5*g_tp.min_w_nmos_;
|
|
||||||
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect
|
|
||||||
// ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not
|
|
||||||
// a simple summation of all IPs. Ignore this effect
|
|
||||||
mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer
|
|
||||||
SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
//Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique"
|
|
||||||
//and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology
|
|
||||||
//total area
|
|
||||||
area.set_area((mac_area + frontend_area + SerDer_area)*1e6);
|
|
||||||
//Power
|
|
||||||
//Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
|
||||||
mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
|
||||||
//Cadence ChipEstimate using 65nm soft IP;
|
|
||||||
frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate;
|
|
||||||
//SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
|
|
||||||
SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
|
||||||
SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU
|
|
||||||
|
|
||||||
mac_gates = 111700;
|
|
||||||
frontend_gates = 52000;
|
|
||||||
SerDer_gates = 199260;
|
|
||||||
|
|
||||||
NMOS_sizing = g_tp.min_w_nmos_;
|
|
||||||
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
|
|
||||||
power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
|
||||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
|
||||||
power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void NIUController::computeEnergy(bool is_tdp)
|
void NIUController::computeEnergy() {
|
||||||
{
|
double mac_dyn;
|
||||||
if (is_tdp)
|
double frontend_dyn;
|
||||||
{
|
double SerDer_dyn;
|
||||||
|
double frontend_gates;
|
||||||
|
double mac_gates;
|
||||||
|
double SerDer_gates;
|
||||||
|
double NMOS_sizing;
|
||||||
|
double PMOS_sizing;
|
||||||
|
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||||
|
|
||||||
|
if (niup.type == 0) { //high performance NIU
|
||||||
|
//Power
|
||||||
|
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
|
||||||
|
//E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||||
|
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||||
|
mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
|
||||||
|
1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
|
||||||
|
//Cadence ChipEstimate using 65nm soft IP;
|
||||||
|
frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 *
|
||||||
|
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||||
|
//according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006
|
||||||
|
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||||
|
SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) *
|
||||||
|
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||||
|
|
||||||
power = power_t;
|
//Cadence ChipEstimate using 65nm
|
||||||
power.readOp.dynamic *= niup.duty_cycle;
|
mac_gates = 111700;
|
||||||
|
frontend_gates = 320000;
|
||||||
|
SerDer_gates = 200000;
|
||||||
|
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||||
|
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
|
} else {
|
||||||
|
//Power
|
||||||
|
//Cadence ChipEstimate using 65nm (mac, front_end are all energy.
|
||||||
|
///E=P*T = P/F = 1.37/1Ghz = 1.37e-9);
|
||||||
|
//2.19W@1GHz fully active according to Cadence ChipEstimate @65nm
|
||||||
|
mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd
|
||||||
|
/ 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate;
|
||||||
|
//Cadence ChipEstimate using 65nm soft IP;
|
||||||
|
frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 *
|
||||||
|
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||||
|
//SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm
|
||||||
|
SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) *
|
||||||
|
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||||
|
|
||||||
|
mac_gates = 111700;
|
||||||
|
frontend_gates = 52000;
|
||||||
|
SerDer_gates = 199260;
|
||||||
|
NMOS_sizing = g_tp.min_w_nmos_;
|
||||||
|
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
//covert to energy per clock cycle of whole NIU
|
||||||
rt_power = power_t;
|
SerDer_dyn /= niup.clockRate;
|
||||||
rt_power.readOp.dynamic *= niup.perc_load;
|
|
||||||
|
power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn;
|
||||||
|
power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) *
|
||||||
|
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(Uncore_device);
|
||||||
|
power.readOp.longer_channel_leakage =
|
||||||
|
power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) *
|
||||||
|
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
|
||||||
|
// Output power
|
||||||
|
output_data.subthreshold_leakage_power =
|
||||||
|
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||||
|
power.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||||
|
output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle;
|
||||||
|
output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NIUController::set_niu_param() {
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("niu_clockRate", niup.clockRate);
|
||||||
|
ASSIGN_INT_IF("num_units", niup.num_units);
|
||||||
|
ASSIGN_INT_IF("type", niup.type);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change from MHz to Hz
|
||||||
|
niup.clockRate *= 1e6;
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("duty_cycle", nius.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("perc_load", nius.perc_load);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
PCIeController::PCIeController(XMLNode* _xml_data,
|
||||||
{
|
InputParameter* interface_ip_)
|
||||||
string indent_str(indent, ' ');
|
: McPATComponent(_xml_data, interface_ip_) {
|
||||||
string indent_str_next(indent+2, ' ');
|
name = "PCIe";
|
||||||
bool long_channel = XML->sys.longer_channel_device;
|
set_pcie_param();
|
||||||
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
cout << "NIU:" << endl;
|
|
||||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl;
|
|
||||||
cout << indent_str<< "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void NIUController::set_niu_param()
|
void PCIeController::computeArea() {
|
||||||
{
|
double ctrl_area;
|
||||||
niup.clockRate = XML->sys.niu.clockrate;
|
double SerDer_area;
|
||||||
niup.clockRate *= 1e6;
|
|
||||||
niup.num_units = XML->sys.niu.number_units;
|
/* Assuming PCIe is bit-slice based architecture
|
||||||
niup.duty_cycle = XML->sys.niu.duty_cycle;
|
* This is the reason for /8 in both area and power calculation
|
||||||
niup.perc_load = XML->sys.niu.total_load_perc;
|
* to get per lane numbers
|
||||||
niup.type = XML->sys.niu.type;
|
*/
|
||||||
// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
|
||||||
|
if (pciep.type == 0) { //high performance PCIe
|
||||||
|
//Area estimation based on average of die photo from Niagara 2 and
|
||||||
|
//Cadence ChipEstimate @ 65nm.
|
||||||
|
ctrl_area = (5.2 + 0.5) / 2 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
//Area estimation based on average of die photo from Niagara 2 and
|
||||||
|
//Cadence ChipEstimate hard IP @65nm.
|
||||||
|
//SerDer is very hard to scale
|
||||||
|
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um /
|
||||||
|
0.065);//* (interface_ip.F_sz_um/0.065);
|
||||||
|
} else {
|
||||||
|
ctrl_area = 0.412 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
//Area estimation based on average of die photo from Niagara 2, and
|
||||||
|
//Cadence ChipEstimate @ 65nm.
|
||||||
|
SerDer_area = 0.36 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Total area
|
||||||
|
output_data.area = ((ctrl_area + (pciep.withPHY ? SerDer_area : 0)) / 8 *
|
||||||
|
pciep.num_channels) * 1e6;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
void PCIeController::computeEnergy() {
|
||||||
:XML(XML_interface),
|
double ctrl_dyn;
|
||||||
interface_ip(*interface_ip_)
|
double SerDer_dyn;
|
||||||
{
|
double ctrl_gates;
|
||||||
local_result = init_interface(&interface_ip);
|
double SerDer_gates = 0;
|
||||||
double frontend_area, phy_area, ctrl_area, SerDer_area;
|
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||||
double ctrl_dyn, frontend_dyn, SerDer_dyn;
|
double NMOS_sizing;
|
||||||
double ctrl_gates,frontend_gates, SerDer_gates;
|
double PMOS_sizing;
|
||||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
|
||||||
double NMOS_sizing, PMOS_sizing;
|
|
||||||
|
|
||||||
/* Assuming PCIe is bit-slice based architecture
|
/* Assuming PCIe is bit-slice based architecture
|
||||||
* This is the reason for /8 in both area and power calculation
|
* This is the reason for /8 in both area and power calculation
|
||||||
* to get per lane numbers
|
* to get per lane numbers
|
||||||
*/
|
*/
|
||||||
|
|
||||||
set_pcie_param();
|
if (pciep.type == 0) { //high performance PCIe
|
||||||
if (pciep.type == 0) //high performance NIU
|
//Power
|
||||||
{
|
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
||||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm.
|
ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
|
||||||
ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||||
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
|
// //Cadence ChipEstimate using 65nm soft IP;
|
||||||
frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||||
//Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm.
|
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||||
//SerDer is very hard to scale
|
//PCIe 2.0 max per lane speed is 4Gb/s
|
||||||
SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065);
|
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) *
|
||||||
phy_area = frontend_area + SerDer_area;
|
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
|
||||||
//total area
|
|
||||||
//Power
|
|
||||||
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
|
||||||
ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
|
||||||
// //Cadence ChipEstimate using 65nm soft IP;
|
|
||||||
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
|
||||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
|
||||||
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
|
|
||||||
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
|
|
||||||
|
|
||||||
//power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels;
|
//Cadence ChipEstimate using 65nm
|
||||||
//Cadence ChipEstimate using 65nm
|
ctrl_gates = 900000 / 8 * pciep.num_channels;
|
||||||
ctrl_gates = 900000/8*pciep.num_channels;
|
// frontend_gates = 120000/8;
|
||||||
// frontend_gates = 120000/8;
|
// SerDer_gates = 200000/8;
|
||||||
// SerDer_gates = 200000/8;
|
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||||
NMOS_sizing = 5*g_tp.min_w_nmos_;
|
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
} else {
|
||||||
}
|
//Power
|
||||||
else
|
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
||||||
{
|
ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 *
|
||||||
ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||||
//Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm.
|
// //Cadence ChipEstimate using 65nm soft IP;
|
||||||
SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
||||||
//total area
|
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||||
//Power
|
//PCIe 2.0 max per lane speed is 4Gb/s
|
||||||
//Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer
|
SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) *
|
||||||
ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2;
|
||||||
// //Cadence ChipEstimate using 65nm soft IP;
|
|
||||||
// frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
|
||||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
|
||||||
SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s
|
|
||||||
SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle
|
|
||||||
|
|
||||||
//Cadence ChipEstimate using 65nm
|
//Cadence ChipEstimate using 65nm
|
||||||
ctrl_gates = 200000/8*pciep.num_channels;
|
ctrl_gates = 200000 / 8 * pciep.num_channels;
|
||||||
// frontend_gates = 120000/8;
|
// frontend_gates = 120000/8;
|
||||||
SerDer_gates = 200000/8*pciep.num_channels;
|
SerDer_gates = 200000 / 8 * pciep.num_channels;
|
||||||
NMOS_sizing = g_tp.min_w_nmos_;
|
NMOS_sizing = g_tp.min_w_nmos_;
|
||||||
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
|
|
||||||
}
|
|
||||||
area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6);
|
|
||||||
power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels;
|
|
||||||
power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
|
||||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
|
||||||
power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
}
|
|
||||||
|
|
||||||
void PCIeController::computeEnergy(bool is_tdp)
|
|
||||||
{
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
power = power_t;
|
|
||||||
power.readOp.dynamic *= pciep.duty_cycle;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
//covert to energy per clock cycle
|
||||||
rt_power = power_t;
|
SerDer_dyn /= pciep.clockRate;
|
||||||
rt_power.readOp.dynamic *= pciep.perc_load;
|
|
||||||
|
power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) *
|
||||||
|
pciep.num_channels;
|
||||||
|
power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) *
|
||||||
|
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(Uncore_device);
|
||||||
|
power.readOp.longer_channel_leakage =
|
||||||
|
power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
power.readOp.gate_leakage = (ctrl_gates +
|
||||||
|
(pciep.withPHY ? SerDer_gates : 0)) *
|
||||||
|
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
|
||||||
|
// Output power
|
||||||
|
output_data.subthreshold_leakage_power =
|
||||||
|
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||||
|
power.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||||
|
output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle;
|
||||||
|
output_data.runtime_dynamic_energy =
|
||||||
|
power.readOp.dynamic * pcies.perc_load;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PCIeController::set_pcie_param() {
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate);
|
||||||
|
ASSIGN_INT_IF("num_units", pciep.num_units);
|
||||||
|
ASSIGN_INT_IF("num_channels", pciep.num_channels);
|
||||||
|
ASSIGN_INT_IF("type", pciep.type);
|
||||||
|
ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change from MHz to Hz
|
||||||
|
pciep.clockRate *= 1e6;
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("perc_load", pcies.perc_load);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
FlashController::FlashController(XMLNode* _xml_data,
|
||||||
{
|
InputParameter* interface_ip_)
|
||||||
string indent_str(indent, ' ');
|
: McPATComponent(_xml_data, interface_ip_) {
|
||||||
string indent_str_next(indent+2, ' ');
|
name = "Flash Controller";
|
||||||
bool long_channel = XML->sys.longer_channel_device;
|
set_fc_param();
|
||||||
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
cout << "PCIe:" << endl;
|
|
||||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl;
|
|
||||||
cout << indent_str<< "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PCIeController::set_pcie_param()
|
void FlashController::computeArea() {
|
||||||
{
|
double ctrl_area;
|
||||||
pciep.clockRate = XML->sys.pcie.clockrate;
|
double SerDer_area;
|
||||||
pciep.clockRate *= 1e6;
|
|
||||||
pciep.num_units = XML->sys.pcie.number_units;
|
|
||||||
pciep.num_channels = XML->sys.pcie.num_channels;
|
|
||||||
pciep.duty_cycle = XML->sys.pcie.duty_cycle;
|
|
||||||
pciep.perc_load = XML->sys.pcie.total_load_perc;
|
|
||||||
pciep.type = XML->sys.pcie.type;
|
|
||||||
pciep.withPHY = XML->sys.pcie.withPHY;
|
|
||||||
// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
|
||||||
|
|
||||||
}
|
/* Assuming Flash is bit-slice based architecture
|
||||||
|
* This is the reason for /8 in both area and power calculation
|
||||||
FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_)
|
* to get per lane numbers
|
||||||
:XML(XML_interface),
|
*/
|
||||||
interface_ip(*interface_ip_)
|
|
||||||
{
|
|
||||||
local_result = init_interface(&interface_ip);
|
|
||||||
double frontend_area, phy_area, ctrl_area, SerDer_area;
|
|
||||||
double ctrl_dyn, frontend_dyn, SerDer_dyn;
|
|
||||||
double ctrl_gates,frontend_gates, SerDer_gates;
|
|
||||||
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
|
||||||
double NMOS_sizing, PMOS_sizing;
|
|
||||||
|
|
||||||
/* Assuming PCIe is bit-slice based architecture
|
|
||||||
* This is the reason for /8 in both area and power calculation
|
|
||||||
* to get per lane numbers
|
|
||||||
*/
|
|
||||||
|
|
||||||
set_fc_param();
|
|
||||||
if (fcp.type == 0) //high performance NIU
|
|
||||||
{
|
|
||||||
cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<<endl;
|
|
||||||
exit(0);
|
|
||||||
NMOS_sizing = 5*g_tp.min_w_nmos_;
|
|
||||||
PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ctrl_area = 0.243 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL from CAST
|
|
||||||
SerDer_area = 0.36/8 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);
|
|
||||||
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it support 8x lanes with each lane
|
|
||||||
//speed up to 250MB/s (PCIe1.1x) This is already saturate the 200MB/s of the flash controller core above.
|
|
||||||
ctrl_gates = 129267;
|
|
||||||
SerDer_gates = 200000/8;
|
|
||||||
NMOS_sizing = g_tp.min_w_nmos_;
|
|
||||||
PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r;
|
|
||||||
|
|
||||||
//Power
|
|
||||||
//Cadence ChipEstimate using 65nm the controller 125mW for every 200MB/s This is power not energy!
|
|
||||||
ctrl_dyn = 0.125*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);
|
|
||||||
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
|
||||||
SerDer_dyn = 0.01*1.6*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;
|
|
||||||
//max Per controller speed is 1.6Gb/s (200MB/s)
|
|
||||||
}
|
|
||||||
double number_channel = 1+(fcp.num_channels-1)*0.2;
|
|
||||||
area.set_area((ctrl_area + (fcp.withPHY? SerDer_area:0))*1e6*number_channel);
|
|
||||||
power_t.readOp.dynamic = (ctrl_dyn + (fcp.withPHY? SerDer_dyn:0))*number_channel;
|
|
||||||
power_t.readOp.leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
|
||||||
power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction;
|
|
||||||
power_t.readOp.gate_leakage = ((ctrl_gates + (fcp.withPHY? SerDer_gates:0))*number_channel)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W
|
|
||||||
}
|
|
||||||
|
|
||||||
void FlashController::computeEnergy(bool is_tdp)
|
|
||||||
{
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
power = power_t;
|
|
||||||
power.readOp.dynamic *= fcp.duty_cycle;
|
|
||||||
|
|
||||||
|
if (fcp.type == 0) { //high performance flash controller
|
||||||
|
cout << "Current McPAT does not support high performance flash "
|
||||||
|
<< "controller since even low power designs are enough for "
|
||||||
|
<< "maintain throughput" <<endl;
|
||||||
|
exit(0);
|
||||||
|
} else {
|
||||||
|
ctrl_area = 0.243 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
|
//Area estimation based on Cadence ChipEstimate @ 65nm: NANDFLASH-CTRL
|
||||||
|
//from CAST
|
||||||
|
SerDer_area = 0.36 / 8 * (interface_ip.F_sz_um / 0.065) *
|
||||||
|
(interface_ip.F_sz_um / 0.065);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
|
||||||
rt_power = power_t;
|
output_data.area = (ctrl_area + (fcp.withPHY ? SerDer_area : 0)) *
|
||||||
rt_power.readOp.dynamic *= fcp.perc_load;
|
1e6 * number_channel;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
void FlashController::computeEnergy() {
|
||||||
{
|
double ctrl_dyn;
|
||||||
string indent_str(indent, ' ');
|
double SerDer_dyn;
|
||||||
string indent_str_next(indent+2, ' ');
|
double ctrl_gates;
|
||||||
bool long_channel = XML->sys.longer_channel_device;
|
double SerDer_gates;
|
||||||
|
double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio();
|
||||||
|
double NMOS_sizing;
|
||||||
|
double PMOS_sizing;
|
||||||
|
|
||||||
if (is_tdp)
|
/* Assuming Flash is bit-slice based architecture
|
||||||
{
|
* This is the reason for /8 in both area and power calculation
|
||||||
cout << "Flash Controller:" << endl;
|
* to get per lane numbers
|
||||||
cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
*/
|
||||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already
|
|
||||||
cout << indent_str<< "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
if (fcp.type == 0) { //high performance flash controller
|
||||||
|
cout << "Current McPAT does not support high performance flash "
|
||||||
|
<< "controller since even low power designs are enough for "
|
||||||
|
<< "maintain throughput" <<endl;
|
||||||
|
exit(0);
|
||||||
|
NMOS_sizing = 5 * g_tp.min_w_nmos_;
|
||||||
|
PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
|
} else {
|
||||||
|
//based On PCIe PHY TSMC65GP from Cadence ChipEstimate @ 65nm, it
|
||||||
|
//support 8x lanes with each lane speed up to 250MB/s (PCIe1.1x).
|
||||||
|
//This is already saturate the 200MB/s of the flash controller core
|
||||||
|
//above.
|
||||||
|
ctrl_gates = 129267;
|
||||||
|
SerDer_gates = 200000 / 8;
|
||||||
|
NMOS_sizing = g_tp.min_w_nmos_;
|
||||||
|
PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r;
|
||||||
|
|
||||||
|
//Power
|
||||||
|
//Cadence ChipEstimate using 65nm the controller 125mW for every
|
||||||
|
//200MB/s This is power not energy!
|
||||||
|
ctrl_dyn = 0.125 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd /
|
||||||
|
1.1 * (interface_ip.F_sz_nm / 65.0);
|
||||||
|
//SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm
|
||||||
|
SerDer_dyn = 0.01 * 1.6 * (interface_ip.F_sz_um / 0.09) *
|
||||||
|
g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2;
|
||||||
|
//max Per controller speed is 1.6Gb/s (200MB/s)
|
||||||
|
}
|
||||||
|
|
||||||
|
double number_channel = 1 + (fcp.num_channels - 1) * 0.2;
|
||||||
|
power.readOp.dynamic = (ctrl_dyn + (fcp.withPHY ? SerDer_dyn : 0)) *
|
||||||
|
number_channel;
|
||||||
|
power.readOp.leakage = ((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) *
|
||||||
|
number_channel) *
|
||||||
|
cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
double long_channel_device_reduction =
|
||||||
|
longer_channel_device_reduction(Uncore_device);
|
||||||
|
power.readOp.longer_channel_leakage =
|
||||||
|
power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
power.readOp.gate_leakage =
|
||||||
|
((ctrl_gates + (fcp.withPHY ? SerDer_gates : 0)) * number_channel) *
|
||||||
|
cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) *
|
||||||
|
g_tp.peri_global.Vdd;//unit W
|
||||||
|
|
||||||
|
// Output power
|
||||||
|
output_data.subthreshold_leakage_power =
|
||||||
|
longer_channel_device ? power.readOp.longer_channel_leakage :
|
||||||
|
power.readOp.leakage;
|
||||||
|
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
||||||
|
output_data.peak_dynamic_power = power.readOp.dynamic * fcs.duty_cycle;
|
||||||
|
output_data.runtime_dynamic_energy = power.readOp.dynamic * fcs.perc_load;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FlashController::set_fc_param()
|
void FlashController::set_fc_param()
|
||||||
{
|
{
|
||||||
// fcp.clockRate = XML->sys.flashc.mc_clock;
|
int num_children = xml_data->nChildNode("param");
|
||||||
// fcp.clockRate *= 1e6;
|
int i;
|
||||||
fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate;
|
for (i = 0; i < num_children; i++) {
|
||||||
fcp.num_channels = ceil(fcp.peakDataTransferRate/200);
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
fcp.num_mcs = XML->sys.flashc.number_mcs;
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
fcp.duty_cycle = XML->sys.flashc.duty_cycle;
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
fcp.perc_load = XML->sys.flashc.total_load_perc;
|
|
||||||
fcp.type = XML->sys.flashc.type;
|
|
||||||
fcp.withPHY = XML->sys.flashc.withPHY;
|
|
||||||
// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_INT_IF("num_channels", fcp.num_channels);
|
||||||
|
ASSIGN_INT_IF("type", fcp.type);
|
||||||
|
ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("perc_load", fcs.perc_load);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,63 +26,52 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
#ifndef IOCONTROLLERS_H_
|
#ifndef IOCONTROLLERS_H_
|
||||||
#define IOCONTROLLERS_H_
|
#define IOCONTROLLERS_H_
|
||||||
|
|
||||||
|
|
||||||
#endif /* IOCONTROLLERS_H_ */
|
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "parameter.h"
|
|
||||||
//#include "io.h"
|
|
||||||
#include "array.h"
|
|
||||||
//#include "Undifferentiated_Core_Area.h"
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "array.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
class NIUController : public Component {
|
class NIUController : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
NIUParameters niup;
|
||||||
InputParameter interface_ip;
|
NIUStatistics nius;
|
||||||
NIUParam niup;
|
|
||||||
powerDef power_t;
|
NIUController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||||
uca_org_t local_result;
|
|
||||||
NIUController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
|
||||||
void set_niu_param();
|
void set_niu_param();
|
||||||
void computeEnergy(bool is_tdp=true);
|
void computeArea();
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
void computeEnergy();
|
||||||
~NIUController(){};
|
~NIUController(){};
|
||||||
};
|
};
|
||||||
|
|
||||||
class PCIeController : public Component {
|
class PCIeController : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
PCIeParameters pciep;
|
||||||
InputParameter interface_ip;
|
PCIeStatistics pcies;
|
||||||
PCIeParam pciep;
|
|
||||||
powerDef power_t;
|
PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||||
uca_org_t local_result;
|
|
||||||
PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
|
||||||
void set_pcie_param();
|
void set_pcie_param();
|
||||||
void computeEnergy(bool is_tdp=true);
|
void computeArea();
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
void computeEnergy();
|
||||||
~PCIeController(){};
|
~PCIeController(){};
|
||||||
};
|
};
|
||||||
|
|
||||||
class FlashController : public Component {
|
class FlashController : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
MCParameters fcp;
|
||||||
InputParameter interface_ip;
|
MCStatistics fcs;
|
||||||
MCParam fcp;
|
|
||||||
powerDef power_t;
|
FlashController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||||
uca_org_t local_result;
|
|
||||||
FlashController(ParseXML *XML_interface,InputParameter* interface_ip_);
|
|
||||||
void set_fc_param();
|
void set_fc_param();
|
||||||
void computeEnergy(bool is_tdp=true);
|
void computeArea();
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
void computeEnergy();
|
||||||
~FlashController(){};
|
~FlashController(){};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#endif /* IOCONTROLLERS_H_ */
|
||||||
|
|
1500
ext/mcpat/logic.cc
1500
ext/mcpat/logic.cc
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,18 +26,16 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
#ifndef LOGIC_H_
|
#ifndef LOGIC_H_
|
||||||
#define LOGIC_H_
|
#define LOGIC_H_
|
||||||
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "arch_const.h"
|
#include "arch_const.h"
|
||||||
#include "basic_circuit.h"
|
#include "basic_circuit.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
@ -49,185 +48,190 @@
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class selection_logic : public Component{
|
class selection_logic : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
selection_logic(bool _is_default, int win_entries_,
|
bool is_default;
|
||||||
int issue_width_, const InputParameter *configure_interface,
|
InputParameter l_ip;
|
||||||
enum Device_ty device_ty_=Core_device,
|
uca_org_t local_result;
|
||||||
enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface);
|
int win_entries;
|
||||||
bool is_default;
|
int issue_width;
|
||||||
InputParameter l_ip;
|
double accesses;
|
||||||
uca_org_t local_result;
|
int num_threads;
|
||||||
const ParseXML *XML_interface;
|
enum Device_ty device_ty;
|
||||||
int win_entries;
|
enum Core_type core_ty;
|
||||||
int issue_width;
|
|
||||||
int num_threads;
|
|
||||||
enum Device_ty device_ty;
|
|
||||||
enum Core_type core_ty;
|
|
||||||
|
|
||||||
void selection_power();
|
selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries,
|
||||||
|
int issue_width_, const InputParameter* configure_interface,
|
||||||
|
string _name, double _accesses,
|
||||||
|
double clockRate_ = 0.0f,
|
||||||
|
enum Device_ty device_ty_ = Core_device,
|
||||||
|
enum Core_type core_ty_ = Inorder);
|
||||||
|
void computeArea();
|
||||||
|
void computeEnergy();
|
||||||
void leakage_feedback(double temperature); // TODO
|
void leakage_feedback(double temperature); // TODO
|
||||||
|
// TODO: Add a deconstructor
|
||||||
};
|
};
|
||||||
|
|
||||||
class dep_resource_conflict_check : public Component{
|
class dep_resource_conflict_check : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true);
|
InputParameter l_ip;
|
||||||
InputParameter l_ip;
|
uca_org_t local_result;
|
||||||
uca_org_t local_result;
|
double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
|
||||||
double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ;
|
CoreParameters coredynp;
|
||||||
CoreDynParam coredynp;
|
int compare_bits;
|
||||||
int compare_bits;
|
bool is_default;
|
||||||
bool is_default;
|
statsDef stats_t;
|
||||||
statsDef tdp_stats;
|
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
|
||||||
powerDef power_t;
|
|
||||||
|
|
||||||
void conflict_check_power();
|
dep_resource_conflict_check(XMLNode* _xml_data, const string _name,
|
||||||
double compare_cap();
|
const InputParameter *configure_interface,
|
||||||
~dep_resource_conflict_check(){
|
const CoreParameters & dyn_p_, int compare_bits_,
|
||||||
local_result.cleanup();
|
double clockRate_ = 0.0f,
|
||||||
}
|
bool _is_default = true);
|
||||||
|
void conflict_check_power();
|
||||||
|
double compare_cap();
|
||||||
|
void computeEnergy() {};
|
||||||
|
~dep_resource_conflict_check() {
|
||||||
|
local_result.cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
void leakage_feedback(double temperature);
|
void leakage_feedback(double temperature);
|
||||||
};
|
};
|
||||||
|
|
||||||
class inst_decoder: public Component{
|
class InstructionDecoder: public McPATComponent {
|
||||||
public:
|
public:
|
||||||
inst_decoder(bool _is_default, const InputParameter *configure_interface,
|
Decoder* final_dec;
|
||||||
int opcode_length_,
|
Predec* pre_dec;
|
||||||
int num_decoders_,
|
|
||||||
bool x86_,
|
|
||||||
enum Device_ty device_ty_=Core_device,
|
|
||||||
enum Core_type core_ty_=Inorder);
|
|
||||||
inst_decoder();
|
|
||||||
bool is_default;
|
|
||||||
int opcode_length;
|
|
||||||
int num_decoders;
|
|
||||||
bool x86;
|
|
||||||
int num_decoder_segments;
|
|
||||||
int num_decoded_signals;
|
|
||||||
InputParameter l_ip;
|
|
||||||
uca_org_t local_result;
|
|
||||||
enum Device_ty device_ty;
|
|
||||||
enum Core_type core_ty;
|
|
||||||
|
|
||||||
Decoder * final_dec;
|
bool is_default;
|
||||||
Predec * pre_dec;
|
int opcode_length;
|
||||||
|
int num_decoders;
|
||||||
|
bool x86;
|
||||||
|
int num_decoder_segments;
|
||||||
|
int num_decoded_signals;
|
||||||
|
InputParameter l_ip;
|
||||||
|
uca_org_t local_result;
|
||||||
|
enum Device_ty device_ty;
|
||||||
|
enum Core_type core_ty;
|
||||||
|
statsDef stats_t;
|
||||||
|
|
||||||
statsDef tdp_stats;
|
InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default,
|
||||||
statsDef rtp_stats;
|
const InputParameter *configure_interface,
|
||||||
statsDef stats_t;
|
int opcode_length_, int num_decoders_, bool x86_,
|
||||||
powerDef power_t;
|
double clockRate_ = 0.0f,
|
||||||
void inst_decoder_delay_power();
|
enum Device_ty device_ty_ = Core_device,
|
||||||
~inst_decoder();
|
enum Core_type core_ty_ = Inorder);
|
||||||
|
InstructionDecoder();
|
||||||
|
void computeEnergy() {};
|
||||||
|
void inst_decoder_delay_power();
|
||||||
|
~InstructionDecoder();
|
||||||
void leakage_feedback(double temperature);
|
void leakage_feedback(double temperature);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// TODO: This should be defined elsewhere? This isn't a true McPATComponent
|
||||||
class DFFCell : public Component {
|
class DFFCell : public Component {
|
||||||
public:
|
public:
|
||||||
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load,
|
InputParameter l_ip;
|
||||||
const InputParameter *configure_interface);
|
bool is_dram;
|
||||||
InputParameter l_ip;
|
double cell_load;
|
||||||
bool is_dram;
|
double WdecNANDn;
|
||||||
double cell_load;
|
double WdecNANDp;
|
||||||
double WdecNANDn;
|
double clock_cap;
|
||||||
double WdecNANDp;
|
int model;
|
||||||
double clock_cap;
|
int n_switch;
|
||||||
int model;
|
int n_keep_1;
|
||||||
int n_switch;
|
int n_keep_0;
|
||||||
int n_keep_1;
|
int n_clock;
|
||||||
int n_keep_0;
|
powerDef e_switch;
|
||||||
int n_clock;
|
powerDef e_keep_1;
|
||||||
powerDef e_switch;
|
powerDef e_keep_0;
|
||||||
powerDef e_keep_1;
|
powerDef e_clock;
|
||||||
powerDef e_keep_0;
|
|
||||||
powerDef e_clock;
|
|
||||||
|
|
||||||
double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
|
DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load,
|
||||||
void compute_DFF_cell(void);
|
const InputParameter *configure_interface);
|
||||||
};
|
double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out);
|
||||||
|
void compute_DFF_cell(void);
|
||||||
|
~DFFCell() {};
|
||||||
|
};
|
||||||
|
|
||||||
class Pipeline : public Component{
|
// TODO: This is a very ambiguous component. Try to refactor it.
|
||||||
|
class Pipeline : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true);
|
InputParameter l_ip;
|
||||||
InputParameter l_ip;
|
uca_org_t local_result;
|
||||||
uca_org_t local_result;
|
CoreParameters coredynp;
|
||||||
CoreDynParam coredynp;
|
enum Device_ty device_ty;
|
||||||
enum Device_ty device_ty;
|
bool is_core_pipeline, is_default;
|
||||||
bool is_core_pipeline, is_default;
|
double num_piperegs;
|
||||||
double num_piperegs;
|
bool process_ind;
|
||||||
// int pipeline_stages;
|
double WNANDn;
|
||||||
// int tot_stage_vector, per_stage_vector;
|
double WNANDp;
|
||||||
bool process_ind;
|
double load_per_pipeline_stage;
|
||||||
double WNANDn ;
|
|
||||||
double WNANDp;
|
Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface,
|
||||||
double load_per_pipeline_stage;
|
const CoreParameters & dyn_p_,
|
||||||
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
|
enum Device_ty device_ty_ = Core_device,
|
||||||
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
|
bool _is_core_pipeline = true, bool _is_default = true);
|
||||||
// bool thread_clock_gated;
|
void compute_stage_vector();
|
||||||
// bool in_order, multithreaded;
|
/**
|
||||||
void compute_stage_vector();
|
* TODO: compute() completes work that should be completed in computeArea()
|
||||||
void compute();
|
* and computeEnergy() recursively. Consider shifting these calculations
|
||||||
~Pipeline(){
|
* around to be consistent with rest of hierarchy
|
||||||
local_result.cleanup();
|
*/
|
||||||
};
|
void compute();
|
||||||
|
void computeArea() {};
|
||||||
|
// TODO: Move energy computation to this function to unify hierarchy
|
||||||
|
void computeEnergy() {};
|
||||||
|
~Pipeline() {
|
||||||
|
local_result.cleanup();
|
||||||
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//class core_pipeline :public pipeline{
|
class FunctionalUnit : public McPATComponent {
|
||||||
//public:
|
|
||||||
// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length;
|
|
||||||
// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width;
|
|
||||||
// bool thread_clock_gated;
|
|
||||||
// bool in_order, multithreaded;
|
|
||||||
// core_pipeline(bool _is_default, const InputParameter *configure_interface);
|
|
||||||
// virtual void compute_stage_vector();
|
|
||||||
//
|
|
||||||
//};
|
|
||||||
|
|
||||||
class FunctionalUnit :public Component{
|
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
InputParameter interface_ip;
|
||||||
int ithCore;
|
CoreParameters core_params;
|
||||||
InputParameter interface_ip;
|
CoreStatistics core_stats;
|
||||||
CoreDynParam coredynp;
|
double FU_height;
|
||||||
double FU_height;
|
double num_fu;
|
||||||
double clockRate,executionTime;
|
double energy;
|
||||||
double num_fu;
|
double base_energy;
|
||||||
double energy, base_energy,per_access_energy, leakage, gate_leakage;
|
double per_access_energy;
|
||||||
bool is_default;
|
bool is_default;
|
||||||
enum FU_type fu_type;
|
enum FU_type fu_type;
|
||||||
statsDef tdp_stats;
|
statsDef stats_t;
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
|
||||||
powerDef power_t;
|
|
||||||
|
|
||||||
FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type);
|
FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
void computeEnergy(bool is_tdp=true);
|
const CoreParameters & _core_params,
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
const CoreStatistics & _core_stats, enum FU_type fu_type);
|
||||||
|
void computeEnergy();
|
||||||
void leakage_feedback(double temperature);
|
void leakage_feedback(double temperature);
|
||||||
|
~FunctionalUnit() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
class UndiffCore :public Component{
|
// TODO: This is a very ambiguous component. Try to refactor it.
|
||||||
|
class UndiffCore : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false);
|
InputParameter interface_ip;
|
||||||
ParseXML *XML;
|
CoreParameters coredynp;
|
||||||
int ithCore;
|
double scktRatio;
|
||||||
InputParameter interface_ip;
|
double chip_PR_overhead;
|
||||||
CoreDynParam coredynp;
|
double macro_PR_overhead;
|
||||||
double clockRate,executionTime;
|
enum Core_type core_ty;
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
bool opt_performance;
|
||||||
enum Core_type core_ty;
|
bool embedded;
|
||||||
bool opt_performance, embedded;
|
double pipeline_stage;
|
||||||
double pipeline_stage,num_hthreads,issue_width;
|
double num_hthreads;
|
||||||
bool is_default;
|
double issue_width;
|
||||||
|
bool is_default;
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
bool exist;
|
||||||
~UndiffCore(){};
|
|
||||||
bool exist;
|
|
||||||
|
|
||||||
|
|
||||||
|
UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
|
const CoreParameters & dyn_p_,
|
||||||
|
bool exist_ = true);
|
||||||
|
void computeArea() {};
|
||||||
|
// TODO: Move energy computation to this function to unify hierarchy
|
||||||
|
void computeEnergy() {};
|
||||||
|
~UndiffCore() {};
|
||||||
};
|
};
|
||||||
#endif /* LOGIC_H_ */
|
#endif /* LOGIC_H_ */
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,15 +26,17 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
#include "basic_components.h"
|
||||||
#include "globalvar.h"
|
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "processor.h"
|
#include "system.h"
|
||||||
#include "version.h"
|
#include "version.h"
|
||||||
#include "xmlParser.h"
|
#include "xmlParser.h"
|
||||||
|
|
||||||
|
@ -41,61 +44,68 @@ using namespace std;
|
||||||
|
|
||||||
void print_usage(char * argv0);
|
void print_usage(char * argv0);
|
||||||
|
|
||||||
int main(int argc,char *argv[])
|
int main(int argc, char *argv[]) {
|
||||||
{
|
char* xml_file = NULL;
|
||||||
char * fb ;
|
int plevel = 2;
|
||||||
bool infile_specified = false;
|
|
||||||
int plevel = 2;
|
for (int32_t i = 0; i < argc; i++) {
|
||||||
opt_for_clk =true;
|
if (argv[i] == string("-infile")) {
|
||||||
//cout.precision(10);
|
xml_file = argv[++i];
|
||||||
if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help"))
|
|
||||||
{
|
} else if (argv[i] == string("-print_level")) {
|
||||||
print_usage(argv[0]);
|
plevel = atoi(argv[++i]);
|
||||||
|
|
||||||
|
} else if (argv[i] == string("-opt_for_clk")) {
|
||||||
|
McPATComponent::opt_for_clk = (bool)atoi(argv[++i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int32_t i = 0; i < argc; i++)
|
// Ensure that the XML file was specified
|
||||||
{
|
if (xml_file == NULL) {
|
||||||
if (argv[i] == string("-infile"))
|
cerr << "ERROR: Please specify infile\n\n";
|
||||||
{
|
print_usage(argv[0]);
|
||||||
infile_specified = true;
|
}
|
||||||
i++;
|
|
||||||
fb = argv[ i];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argv[i] == string("-print_level"))
|
// Ensure that the XML file exists
|
||||||
{
|
struct stat file_info;
|
||||||
i++;
|
if (stat(xml_file, &file_info)) {
|
||||||
plevel = atoi(argv[i]);
|
cerr << "ERROR: File not found: " << xml_file << endl << endl;
|
||||||
}
|
print_usage(argv[0]);
|
||||||
|
}
|
||||||
|
|
||||||
if (argv[i] == string("-opt_for_clk"))
|
cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||||
{
|
<< " of " << VER_UPDATE << ") is computing the target processor...\n "
|
||||||
i++;
|
<< endl;
|
||||||
opt_for_clk = (bool)atoi(argv[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (infile_specified == false)
|
|
||||||
{
|
|
||||||
print_usage(argv[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Parse the XML input file
|
||||||
|
XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component");
|
||||||
|
unsigned int num_children = xml_data.nChildNode("component");
|
||||||
|
assert(num_children == 1);
|
||||||
|
XMLNode system_xml = xml_data.getChildNode("component");
|
||||||
|
assert(strcmp(system_xml.getAttribute("type"), "System") == 0);
|
||||||
|
|
||||||
cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
|
// Recursively instantiate the system hierarchy
|
||||||
<< " of " << VER_UPDATE << ") is computing the target processor...\n "<<endl;
|
System* system = new System(&system_xml);
|
||||||
|
|
||||||
|
// Recursively compute chip area
|
||||||
|
system->computeArea();
|
||||||
|
|
||||||
|
// Recursively compute the power consumed
|
||||||
|
system->computeEnergy();
|
||||||
|
|
||||||
|
// Recursively output the computed values
|
||||||
|
system->displayData(2, plevel);
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
delete system;
|
||||||
|
return 0;
|
||||||
|
|
||||||
//parse XML-based interface
|
|
||||||
ParseXML *p1= new ParseXML();
|
|
||||||
p1->parse(fb);
|
|
||||||
Processor proc(p1);
|
|
||||||
proc.displayEnergy(2, plevel);
|
|
||||||
delete p1;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_usage(char * argv0)
|
void print_usage(char * argv0) {
|
||||||
{
|
|
||||||
cerr << "How to use McPAT:" << endl;
|
cerr << "How to use McPAT:" << endl;
|
||||||
cerr << " mcpat -infile <input file name> -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl;
|
cerr << " mcpat -infile <input file name> -print_level < "
|
||||||
//cerr << " Note:default print level is at processor level, please increase it to see the details" << endl;
|
<< "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P "
|
||||||
|
<< "only)/1 (optimzed for target clock rate)>" << endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,13 +29,16 @@ VPATH = cacti
|
||||||
|
|
||||||
SRCS = \
|
SRCS = \
|
||||||
Ucache.cc \
|
Ucache.cc \
|
||||||
XML_Parse.cc \
|
|
||||||
arbiter.cc \
|
arbiter.cc \
|
||||||
area.cc \
|
area.cc \
|
||||||
array.cc \
|
array.cc \
|
||||||
bank.cc \
|
bank.cc \
|
||||||
basic_circuit.cc \
|
basic_circuit.cc \
|
||||||
basic_components.cc \
|
basic_components.cc \
|
||||||
|
bus_interconnect.cc \
|
||||||
|
cachearray.cc \
|
||||||
|
cachecontroller.cc \
|
||||||
|
cacheunit.cc \
|
||||||
cacti_interface.cc \
|
cacti_interface.cc \
|
||||||
component.cc \
|
component.cc \
|
||||||
core.cc \
|
core.cc \
|
||||||
|
@ -52,10 +55,9 @@ SRCS = \
|
||||||
noc.cc \
|
noc.cc \
|
||||||
nuca.cc \
|
nuca.cc \
|
||||||
parameter.cc \
|
parameter.cc \
|
||||||
processor.cc \
|
|
||||||
router.cc \
|
router.cc \
|
||||||
sharedcache.cc \
|
|
||||||
subarray.cc \
|
subarray.cc \
|
||||||
|
system.cc \
|
||||||
technology.cc \
|
technology.cc \
|
||||||
uca.cc \
|
uca.cc \
|
||||||
wire.cc \
|
wire.cc \
|
||||||
|
|
|
@ -1,81 +0,0 @@
|
||||||
TARGET = mcpatXeonCore
|
|
||||||
SHELL = /bin/sh
|
|
||||||
.PHONY: all depend clean
|
|
||||||
.SUFFIXES: .cc .o
|
|
||||||
|
|
||||||
ifndef NTHREADS
|
|
||||||
NTHREADS = 4
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
LIBS =
|
|
||||||
INCS = -lm
|
|
||||||
|
|
||||||
ifeq ($(TAG),dbg)
|
|
||||||
DBG = -Wall
|
|
||||||
OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti
|
|
||||||
else
|
|
||||||
DBG =
|
|
||||||
OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti
|
|
||||||
#OPT = -O0 -DNTHREADS=$(NTHREADS)
|
|
||||||
endif
|
|
||||||
|
|
||||||
#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT)
|
|
||||||
CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT)
|
|
||||||
CXX = g++ -m32
|
|
||||||
CC = gcc -m32
|
|
||||||
|
|
||||||
VPATH = cacti
|
|
||||||
|
|
||||||
SRCS = \
|
|
||||||
Ucache.cc \
|
|
||||||
XML_Parse.cc \
|
|
||||||
arbiter.cc \
|
|
||||||
area.cc \
|
|
||||||
array.cc \
|
|
||||||
bank.cc \
|
|
||||||
basic_circuit.cc \
|
|
||||||
basic_components.cc \
|
|
||||||
cacti_interface.cc \
|
|
||||||
component.cc \
|
|
||||||
core.cc \
|
|
||||||
crossbar.cc \
|
|
||||||
decoder.cc \
|
|
||||||
htree2.cc \
|
|
||||||
interconnect.cc \
|
|
||||||
io.cc \
|
|
||||||
iocontrollers.cc \
|
|
||||||
logic.cc \
|
|
||||||
main.cc \
|
|
||||||
mat.cc \
|
|
||||||
memoryctrl.cc \
|
|
||||||
noc.cc \
|
|
||||||
nuca.cc \
|
|
||||||
parameter.cc \
|
|
||||||
processor.cc \
|
|
||||||
router.cc \
|
|
||||||
sharedcache.cc \
|
|
||||||
subarray.cc \
|
|
||||||
technology_xeon_core.cc \
|
|
||||||
uca.cc \
|
|
||||||
wire.cc \
|
|
||||||
xmlParser.cc
|
|
||||||
|
|
||||||
OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS))
|
|
||||||
|
|
||||||
all: obj_$(TAG)/$(TARGET)
|
|
||||||
cp -f obj_$(TAG)/$(TARGET) $(TARGET)
|
|
||||||
|
|
||||||
obj_$(TAG)/$(TARGET) : $(OBJS)
|
|
||||||
$(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread
|
|
||||||
|
|
||||||
#obj_$(TAG)/%.o : %.cc
|
|
||||||
# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $<
|
|
||||||
|
|
||||||
obj_$(TAG)/%.o : %.cc
|
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
|
||||||
|
|
||||||
clean:
|
|
||||||
-rm -f *.o $(TARGET)
|
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,89 +26,75 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
#ifndef MEMORYCTRL_H_
|
#ifndef MEMORYCTRL_H_
|
||||||
#define MEMORYCTRL_H_
|
#define MEMORYCTRL_H_
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "parameter.h"
|
|
||||||
//#include "io.h"
|
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
//#include "Undifferentiated_Core_Area.h"
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "cachearray.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
|
||||||
class MCBackend : public Component {
|
class MCBackend : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
InputParameter l_ip;
|
InputParameter l_ip;
|
||||||
uca_org_t local_result;
|
uca_org_t local_result;
|
||||||
enum MemoryCtrl_type mc_type;
|
MCParameters mcp;
|
||||||
MCParam mcp;
|
MCStatistics mcs;
|
||||||
statsDef tdp_stats;
|
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
statsDef stats_t;
|
||||||
powerDef power_t;
|
|
||||||
MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
void compute();
|
const MCParameters & mcp_, const MCStatistics & mcs_);
|
||||||
void computeEnergy(bool is_tdp=true);
|
void computeArea();
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
void computeEnergy();
|
||||||
~MCBackend(){};
|
~MCBackend() {};
|
||||||
};
|
};
|
||||||
|
|
||||||
class MCPHY : public Component {
|
class MCPHY : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
InputParameter l_ip;
|
InputParameter l_ip;
|
||||||
uca_org_t local_result;
|
uca_org_t local_result;
|
||||||
enum MemoryCtrl_type mc_type;
|
MCParameters mcp;
|
||||||
MCParam mcp;
|
MCStatistics mcs;
|
||||||
statsDef tdp_stats;
|
statsDef stats_t;
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_,
|
||||||
powerDef power_t;
|
const MCParameters & mcp_, const MCStatistics & mcs_);
|
||||||
MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
void computeArea();
|
||||||
void compute();
|
void computeEnergy();
|
||||||
void computeEnergy(bool is_tdp=true);
|
~MCPHY() {};
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~MCPHY(){};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class MCFrontEnd : public Component {
|
class MCFrontEnd : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
CacheArray* frontendBuffer;
|
||||||
InputParameter interface_ip;
|
CacheArray* readBuffer;
|
||||||
enum MemoryCtrl_type mc_type;
|
CacheArray* writeBuffer;
|
||||||
MCParam mcp;
|
selection_logic* MC_arb;
|
||||||
selection_logic * MC_arb;
|
|
||||||
ArrayST * frontendBuffer;
|
|
||||||
ArrayST * readBuffer;
|
|
||||||
ArrayST * writeBuffer;
|
|
||||||
|
|
||||||
MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_);
|
InputParameter interface_ip;
|
||||||
void computeEnergy(bool is_tdp=true);
|
MCParameters mcp;
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
MCStatistics mcs;
|
||||||
|
|
||||||
|
MCFrontEnd(XMLNode* _xml_data,
|
||||||
|
InputParameter* interface_ip_, const MCParameters & mcp_,
|
||||||
|
const MCStatistics & mcs_);
|
||||||
~MCFrontEnd();
|
~MCFrontEnd();
|
||||||
};
|
};
|
||||||
|
|
||||||
class MemoryController : public Component {
|
class MemoryController : public McPATComponent {
|
||||||
public:
|
public:
|
||||||
ParseXML *XML;
|
InputParameter interface_ip;
|
||||||
InputParameter interface_ip;
|
MCParameters mcp;
|
||||||
enum MemoryCtrl_type mc_type;
|
MCStatistics mcs;
|
||||||
MCParam mcp;
|
|
||||||
MCFrontEnd * frontend;
|
|
||||||
MCBackend * transecEngine;
|
|
||||||
MCPHY * PHY;
|
|
||||||
Pipeline * pipeLogic;
|
|
||||||
|
|
||||||
//clock_network clockNetwork;
|
MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_);
|
||||||
MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_);
|
void initialize_params();
|
||||||
void set_mc_param();
|
void set_mc_param();
|
||||||
void computeEnergy(bool is_tdp=true);
|
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
|
||||||
~MemoryController();
|
~MemoryController();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* MEMORYCTRL_H_ */
|
#endif /* MEMORYCTRL_H_ */
|
||||||
|
|
512
ext/mcpat/noc.cc
512
ext/mcpat/noc.cc
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,7 +26,7 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
|
@ -35,321 +36,236 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "basic_circuit.h"
|
#include "basic_circuit.h"
|
||||||
|
#include "common.h"
|
||||||
#include "const.h"
|
#include "const.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "noc.h"
|
#include "noc.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
|
|
||||||
NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_)
|
OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
|
||||||
:XML(XML_interface),
|
InputParameter* interface_ip_)
|
||||||
ithNoC(ithNoC_),
|
: McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_),
|
||||||
interface_ip(*interface_ip_),
|
interface_ip(*interface_ip_), link_bus_exist(false),
|
||||||
router(0),
|
router_exist(false) {
|
||||||
link_bus(0),
|
name = "On-Chip Network";
|
||||||
link_bus_exist(false),
|
set_param_stats();
|
||||||
router_exist(false),
|
local_result = init_interface(&interface_ip, name);
|
||||||
M_traffic_pattern(M_traffic_pattern_)
|
scktRatio = g_tp.sckt_co_eff;
|
||||||
{
|
|
||||||
/*
|
|
||||||
* initialize, compute and optimize individual components.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (XML->sys.Embedded)
|
// TODO: Routers and links should be children of the NOC component
|
||||||
{
|
if (noc_params.type) {
|
||||||
interface_ip.wt =Global_30;
|
init_router();
|
||||||
interface_ip.wire_is_mat_type = 0;
|
} else {
|
||||||
interface_ip.wire_os_mat_type = 1;
|
init_link_bus();
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
{
|
|
||||||
interface_ip.wt =Global;
|
|
||||||
interface_ip.wire_is_mat_type = 2;
|
|
||||||
interface_ip.wire_os_mat_type = 2;
|
|
||||||
}
|
|
||||||
set_noc_param();
|
|
||||||
local_result=init_interface(&interface_ip);
|
|
||||||
scktRatio = g_tp.sckt_co_eff;
|
|
||||||
|
|
||||||
if (nocdynp.type)
|
void OnChipNetwork::init_router() {
|
||||||
{/*
|
router = new Router(noc_params.flit_size,
|
||||||
* if NOC compute router, router links must be computed separately
|
noc_params.virtual_channel_per_port *
|
||||||
* and called from external
|
noc_params.input_buffer_entries_per_vc,
|
||||||
* since total chip area must be known first
|
noc_params.virtual_channel_per_port,
|
||||||
*/
|
&(g_tp.peri_global),
|
||||||
init_router();
|
noc_params.input_ports, noc_params.output_ports,
|
||||||
|
noc_params.M_traffic_pattern);
|
||||||
|
// TODO: Make a router class within McPAT that descends from McPATComponent
|
||||||
|
// children.push_back(router);
|
||||||
|
area.set_area(area.get_area() + router->area.get_area() *
|
||||||
|
noc_params.total_nodes);
|
||||||
|
|
||||||
|
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
||||||
|
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
|
||||||
|
router_exist = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void OnChipNetwork::init_link_bus() {
|
||||||
|
if (noc_params.type) {
|
||||||
|
link_name = "Links";
|
||||||
|
} else {
|
||||||
|
link_name = "Bus";
|
||||||
|
}
|
||||||
|
|
||||||
|
interface_ip.throughput = noc_params.link_throughput /
|
||||||
|
noc_params.clockRate;
|
||||||
|
interface_ip.latency = noc_params.link_latency / noc_params.clockRate;
|
||||||
|
|
||||||
|
link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2;
|
||||||
|
|
||||||
|
if (noc_params.total_nodes > 1) {
|
||||||
|
//All links are shared by neighbors
|
||||||
|
link_len /= 2;
|
||||||
|
}
|
||||||
|
link_bus = new Interconnect(xml_data, "Link", Uncore_device,
|
||||||
|
noc_params.link_base_width,
|
||||||
|
noc_params.link_base_height,
|
||||||
|
noc_params.flit_size, link_len, &interface_ip,
|
||||||
|
noc_params.link_start_wiring_level,
|
||||||
|
noc_params.clockRate, true/*pipelinable*/,
|
||||||
|
noc_params.route_over_perc);
|
||||||
|
children.push_back(link_bus);
|
||||||
|
|
||||||
|
link_bus_exist = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: This should use the McPATComponent::computeEnergy function to
|
||||||
|
// recursively calculate energy of routers and links and then add
|
||||||
|
void OnChipNetwork::computeEnergy() {
|
||||||
|
double pppm_t[4] = {1, 1, 1, 1};
|
||||||
|
|
||||||
|
// Initialize stats for TDP
|
||||||
|
tdp_stats.reset();
|
||||||
|
tdp_stats.readAc.access = noc_stats.duty_cycle;
|
||||||
|
if (router_exist) {
|
||||||
|
// TODO: Define a regression to exercise routers
|
||||||
|
// TODO: Clean this up: it is too invasive and breaks abstraction
|
||||||
|
set_pppm(pppm_t, 1 * tdp_stats.readAc.access, 1, 1, 1);
|
||||||
|
router->power = router->power * pppm_t;
|
||||||
|
set_pppm(pppm_t, noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes);
|
||||||
|
}
|
||||||
|
if (link_bus_exist) {
|
||||||
|
if (noc_params.type) {
|
||||||
|
link_bus->int_params.active_ports = noc_params.min_ports - 1;
|
||||||
|
} else {
|
||||||
|
link_bus->int_params.active_ports = noc_params.min_ports;
|
||||||
}
|
}
|
||||||
else
|
link_bus->int_stats.duty_cycle =
|
||||||
{
|
noc_params.M_traffic_pattern * noc_stats.duty_cycle;
|
||||||
init_link_bus(link_len_); //if bus compute bus
|
|
||||||
|
// TODO: Decide how to roll multiple routers into a single top-level
|
||||||
|
// NOC module. I would prefer not to, but it might be a nice feature
|
||||||
|
set_pppm(pppm_t, noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes,
|
||||||
|
noc_params.total_nodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize stats for runtime energy and power
|
||||||
|
rtp_stats.reset();
|
||||||
|
rtp_stats.readAc.access = noc_stats.total_access;
|
||||||
|
set_pppm(pppm_t, 1, 0 , 0, 0);
|
||||||
|
if (router_exist) {
|
||||||
|
// TODO: Move this to a McPATComponent parent class of Router
|
||||||
|
router->buffer.rt_power.readOp.dynamic =
|
||||||
|
(router->buffer.power.readOp.dynamic +
|
||||||
|
router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access;
|
||||||
|
router->crossbar.rt_power.readOp.dynamic =
|
||||||
|
router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access;
|
||||||
|
router->arbiter.rt_power.readOp.dynamic =
|
||||||
|
router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access;
|
||||||
|
|
||||||
|
router->rt_power = router->rt_power +
|
||||||
|
(router->buffer.rt_power + router->crossbar.rt_power +
|
||||||
|
router->arbiter.rt_power) * pppm_t +
|
||||||
|
router->power * pppm_lkg;//TDP power must be calculated first!
|
||||||
|
}
|
||||||
|
if (link_bus_exist) {
|
||||||
|
link_bus->int_stats.accesses = noc_stats.total_access;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively compute energy
|
||||||
|
McPATComponent::computeEnergy();
|
||||||
|
}
|
||||||
|
|
||||||
|
void OnChipNetwork::set_param_stats() {
|
||||||
|
// TODO: Remove this or move initialization elsewhere
|
||||||
|
memset(&noc_params, 0, sizeof(OnChipNetworkParameters));
|
||||||
|
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
int mat_type;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_INT_IF("type", noc_params.type);
|
||||||
|
ASSIGN_FP_IF("clockrate", noc_params.clockRate);
|
||||||
|
ASSIGN_INT_IF("flit_bits", noc_params.flit_size);
|
||||||
|
ASSIGN_FP_IF("link_len", link_len);
|
||||||
|
ASSIGN_FP_IF("link_throughput", noc_params.link_throughput);
|
||||||
|
ASSIGN_FP_IF("link_latency", noc_params.link_latency);
|
||||||
|
ASSIGN_INT_IF("input_ports", noc_params.input_ports);
|
||||||
|
ASSIGN_INT_IF("output_ports", noc_params.output_ports);
|
||||||
|
ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports);
|
||||||
|
ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes);
|
||||||
|
ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes);
|
||||||
|
ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage);
|
||||||
|
ASSIGN_FP_IF("link_routing_over_percentage",
|
||||||
|
noc_params.route_over_perc);
|
||||||
|
ASSIGN_INT_IF("has_global_link", noc_params.has_global_link);
|
||||||
|
ASSIGN_INT_IF("virtual_channel_per_port",
|
||||||
|
noc_params.virtual_channel_per_port);
|
||||||
|
ASSIGN_INT_IF("input_buffer_entries_per_vc",
|
||||||
|
noc_params.input_buffer_entries_per_vc);
|
||||||
|
ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern);
|
||||||
|
ASSIGN_FP_IF("link_base_width", noc_params.link_base_width);
|
||||||
|
ASSIGN_FP_IF("link_base_height", noc_params.link_base_height);
|
||||||
|
ASSIGN_INT_IF("link_start_wiring_level",
|
||||||
|
noc_params.link_start_wiring_level);
|
||||||
|
ASSIGN_INT_IF("wire_mat_type", mat_type);
|
||||||
|
ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// //clock power
|
// Change from MHz to Hz
|
||||||
// clockNetwork.init_wire_external(is_default, &interface_ip);
|
noc_params.clockRate *= 1e6;
|
||||||
// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
|
|
||||||
// clockNetwork.end_wiring_level =5;//toplevel metal
|
|
||||||
// clockNetwork.start_wiring_level =5;//toplevel metal
|
|
||||||
// clockNetwork.num_regs = corepipe.tot_stage_vector;
|
|
||||||
// clockNetwork.optimize_wire();
|
|
||||||
}
|
|
||||||
|
|
||||||
void NoC::init_router()
|
interface_ip.wire_is_mat_type = mat_type;
|
||||||
{
|
interface_ip.wire_os_mat_type = mat_type;
|
||||||
router = new Router(nocdynp.flit_size,
|
|
||||||
nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc,
|
|
||||||
nocdynp.virtual_channel_per_port, &(g_tp.peri_global),
|
|
||||||
nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern);
|
|
||||||
//router->print_router();
|
|
||||||
area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes);
|
|
||||||
|
|
||||||
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
|
num_children = xml_data->nChildNode("stat");
|
||||||
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
|
for (i = 0; i < num_children; i++) {
|
||||||
router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
router_exist = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void NoC ::init_link_bus(double link_len_)
|
if (!node_name)
|
||||||
{
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle);
|
||||||
|
ASSIGN_FP_IF("total_accesses", noc_stats.total_access);
|
||||||
|
|
||||||
// if (nocdynp.min_ports==1 )
|
else {
|
||||||
if (nocdynp.type)
|
warnUnrecognizedStat(node_name);
|
||||||
link_name = "Links";
|
|
||||||
else
|
|
||||||
link_name = "Bus";
|
|
||||||
|
|
||||||
link_len=link_len_;
|
|
||||||
assert(link_len>0);
|
|
||||||
|
|
||||||
interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate;
|
|
||||||
interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate;
|
|
||||||
|
|
||||||
link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2;
|
|
||||||
|
|
||||||
if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors
|
|
||||||
link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size,
|
|
||||||
link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc);
|
|
||||||
|
|
||||||
link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area()
|
|
||||||
* nocdynp.global_linked_ports);
|
|
||||||
|
|
||||||
area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes);
|
|
||||||
link_bus_exist = true;
|
|
||||||
}
|
|
||||||
void NoC::computeEnergy(bool is_tdp)
|
|
||||||
{
|
|
||||||
//power_point_product_masks
|
|
||||||
double pppm_t[4] = {1,1,1,1};
|
|
||||||
double M=nocdynp.duty_cycle;
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
//init stats for TDP
|
|
||||||
stats_t.readAc.access = M;
|
|
||||||
tdp_stats = stats_t;
|
|
||||||
if (router_exist)
|
|
||||||
{
|
|
||||||
set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern
|
|
||||||
router->power = router->power*pppm_t;
|
|
||||||
set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes);
|
|
||||||
power = power + router->power*pppm_t;
|
|
||||||
}
|
|
||||||
if (link_bus_exist)
|
|
||||||
{
|
|
||||||
if (nocdynp.type)
|
|
||||||
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports,
|
|
||||||
nocdynp.global_linked_ports, nocdynp.global_linked_ports);
|
|
||||||
//reset traffic pattern; local port do not have router links
|
|
||||||
else
|
|
||||||
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports,
|
|
||||||
nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern
|
|
||||||
|
|
||||||
link_bus_tot_per_Router.power = link_bus->power*pppm_t;
|
|
||||||
|
|
||||||
set_pppm(pppm_t, nocdynp.total_nodes,
|
|
||||||
nocdynp.total_nodes,
|
|
||||||
nocdynp.total_nodes,
|
|
||||||
nocdynp.total_nodes);
|
|
||||||
power = power + link_bus_tot_per_Router.power*pppm_t;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//init stats for runtime power (RTP)
|
|
||||||
stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses;
|
|
||||||
rtp_stats = stats_t;
|
|
||||||
set_pppm(pppm_t, 1, 0 , 0, 0);
|
|
||||||
if (router_exist)
|
|
||||||
{
|
|
||||||
router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ;
|
|
||||||
router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ;
|
|
||||||
router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ;
|
|
||||||
|
|
||||||
router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t +
|
|
||||||
router->power*pppm_lkg;//TDP power must be calculated first!
|
|
||||||
rt_power = rt_power + router->rt_power;
|
|
||||||
}
|
|
||||||
if (link_bus_exist)
|
|
||||||
{
|
|
||||||
set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access);
|
|
||||||
link_bus->rt_power = link_bus->power * pppm_t;
|
|
||||||
rt_power = rt_power + link_bus->rt_power;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
|
|
||||||
{
|
|
||||||
string indent_str(indent, ' ');
|
|
||||||
string indent_str_next(indent+2, ' ');
|
|
||||||
bool long_channel = XML->sys.longer_channel_device;
|
|
||||||
|
|
||||||
double M =M_traffic_pattern*nocdynp.duty_cycle;
|
|
||||||
/*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc;
|
|
||||||
* When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to
|
|
||||||
* be applied together with McPAT's extra traffic pattern.
|
|
||||||
* */
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
cout << name << endl;
|
|
||||||
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
|
|
||||||
cout << indent_str << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
|
|
||||||
if (router_exist)
|
|
||||||
{
|
|
||||||
cout << indent_str << "Router: " << endl;
|
|
||||||
cout << indent_str_next << "Area = " << router->area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next<< "Peak Dynamic = " << router->power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? router->power.readOp.longer_channel_leakage:router->power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next<< "Runtime Dynamic = " << router->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
if (plevel >2){
|
|
||||||
cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)
|
|
||||||
*nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
cout << indent_str<< indent_str<< "Crossbar:" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Area = " << router->crossbar.area.get_area()*1e-6 << " mm^2" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->crossbar.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? router->crossbar.power.readOp.longer_channel_leakage:router->crossbar.power.readOp.leakage) << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->crossbar.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
cout << indent_str<< indent_str<< "Arbiter:" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->arbiter.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? router->arbiter.power.readOp.longer_channel_leakage:router->arbiter.power.readOp.leakage) << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->arbiter.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (link_bus_exist)
|
|
||||||
{
|
|
||||||
cout << indent_str << (nocdynp.type? "Per Router ":"") << link_name<<": " << endl;
|
|
||||||
cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next<< "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic*
|
|
||||||
nocdynp.clockRate << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? link_bus_tot_per_Router.power.readOp.longer_channel_leakage:link_bus_tot_per_Router.power.readOp.leakage)
|
|
||||||
<<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage
|
|
||||||
<< " W" << endl;
|
|
||||||
cout << indent_str_next<< "Runtime Dynamic = " << link_bus->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
|
|
||||||
cout<<endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
|
|
||||||
// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
|
|
||||||
// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clockRate = noc_params.clockRate;
|
||||||
|
noc_params.min_ports =
|
||||||
|
min(noc_params.input_ports, noc_params.output_ports);
|
||||||
|
if (noc_params.type) {
|
||||||
|
noc_params.global_linked_ports = (noc_params.input_ports - 1) +
|
||||||
|
(noc_params.output_ports - 1);
|
||||||
|
}
|
||||||
|
noc_params.total_nodes =
|
||||||
|
noc_params.horizontal_nodes * noc_params.vertical_nodes;
|
||||||
|
|
||||||
|
assert(noc_params.chip_coverage <= 1);
|
||||||
|
assert(noc_params.route_over_perc <= 1);
|
||||||
|
assert(link_len > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void NoC::set_noc_param()
|
OnChipNetwork ::~OnChipNetwork() {
|
||||||
{
|
|
||||||
|
|
||||||
nocdynp.type = XML->sys.NoC[ithNoC].type;
|
|
||||||
nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate;
|
|
||||||
nocdynp.clockRate *= 1e6;
|
|
||||||
nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
|
|
||||||
|
|
||||||
nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits;
|
|
||||||
if (nocdynp.type)
|
|
||||||
{
|
|
||||||
nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports;
|
|
||||||
nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1
|
|
||||||
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
|
|
||||||
nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1);
|
|
||||||
/*
|
|
||||||
* Except local i/o ports, all ports needs links( global_linked_ports);
|
|
||||||
* However only min_ports can be fully active simultaneously
|
|
||||||
* since the fewer number of ports (input or output ) is the bottleneck.
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
nocdynp.input_ports = 1;
|
|
||||||
nocdynp.output_ports = 1;
|
|
||||||
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
|
|
||||||
nocdynp.global_linked_ports = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port;
|
|
||||||
nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc;
|
|
||||||
|
|
||||||
nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes;
|
|
||||||
nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes;
|
|
||||||
nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes;
|
|
||||||
nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle;
|
|
||||||
nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link;
|
|
||||||
nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput;
|
|
||||||
nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency;
|
|
||||||
nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage;
|
|
||||||
nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc;
|
|
||||||
|
|
||||||
assert (nocdynp.chip_coverage <=1);
|
|
||||||
assert (nocdynp.route_over_perc <=1);
|
|
||||||
|
|
||||||
if (nocdynp.type)
|
|
||||||
name = "NOC";
|
|
||||||
else
|
|
||||||
name = "BUSES";
|
|
||||||
|
|
||||||
}
|
if (router) {
|
||||||
|
delete router;
|
||||||
|
router = 0;
|
||||||
NoC ::~NoC(){
|
}
|
||||||
|
if (link_bus) {
|
||||||
if(router) {delete router; router = 0;}
|
delete link_bus;
|
||||||
if(link_bus) {delete link_bus; link_bus = 0;}
|
link_bus = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,13 +26,13 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
#ifndef NOC_H_
|
#ifndef NOC_H_
|
||||||
#define NOC_H_
|
#define NOC_H_
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
#include "interconnect.h"
|
#include "interconnect.h"
|
||||||
|
@ -39,37 +40,62 @@
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "router.h"
|
#include "router.h"
|
||||||
|
|
||||||
class NoC :public Component {
|
class OnChipNetworkParameters {
|
||||||
public:
|
public:
|
||||||
|
double clockRate;
|
||||||
|
int flit_size;
|
||||||
|
int input_ports;
|
||||||
|
int output_ports;
|
||||||
|
int min_ports;
|
||||||
|
int global_linked_ports;
|
||||||
|
int virtual_channel_per_port;
|
||||||
|
int input_buffer_entries_per_vc;
|
||||||
|
int horizontal_nodes;
|
||||||
|
int vertical_nodes;
|
||||||
|
int total_nodes;
|
||||||
|
double link_throughput;
|
||||||
|
double link_latency;
|
||||||
|
double chip_coverage;
|
||||||
|
double route_over_perc;
|
||||||
|
bool has_global_link;
|
||||||
|
bool type;
|
||||||
|
double M_traffic_pattern;
|
||||||
|
double link_base_width;
|
||||||
|
double link_base_height;
|
||||||
|
int link_start_wiring_level;
|
||||||
|
};
|
||||||
|
|
||||||
ParseXML *XML;
|
class OnChipNetworkStatistics {
|
||||||
int ithNoC;
|
public:
|
||||||
InputParameter interface_ip;
|
double duty_cycle;
|
||||||
double link_len;
|
double total_access;
|
||||||
double executionTime;
|
};
|
||||||
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
|
||||||
Router * router;
|
class OnChipNetwork : public McPATComponent {
|
||||||
interconnect * link_bus;
|
public:
|
||||||
NoCParam nocdynp;
|
Router* router;
|
||||||
uca_org_t local_result;
|
Interconnect* link_bus;
|
||||||
statsDef tdp_stats;
|
Component link_bus_tot_per_Router;
|
||||||
statsDef rtp_stats;
|
|
||||||
statsDef stats_t;
|
int ithNoC;
|
||||||
powerDef power_t;
|
InputParameter interface_ip;
|
||||||
Component link_bus_tot_per_Router;
|
double link_len;
|
||||||
bool link_bus_exist;
|
double scktRatio, chip_PR_overhead, macro_PR_overhead;
|
||||||
bool router_exist;
|
OnChipNetworkParameters noc_params;
|
||||||
string name, link_name;
|
OnChipNetworkStatistics noc_stats;
|
||||||
double M_traffic_pattern;
|
uca_org_t local_result;
|
||||||
NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0);
|
statsDef stats_t;
|
||||||
void set_noc_param();
|
bool link_bus_exist;
|
||||||
void computeEnergy(bool is_tdp=true);
|
bool router_exist;
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
string link_name;
|
||||||
void init_link_bus(double link_len_);
|
|
||||||
void init_router();
|
OnChipNetwork(XMLNode* _xml_data, int ithNoC_,
|
||||||
void computeEnergy_link_bus(bool is_tdp=true);
|
InputParameter* interface_ip_);
|
||||||
void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
void set_param_stats();
|
||||||
~NoC();
|
void computeEnergy();
|
||||||
|
void init_link_bus();
|
||||||
|
void init_router();
|
||||||
|
~OnChipNetwork();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* NOC_H_ */
|
#endif /* NOC_H_ */
|
||||||
|
|
|
@ -1,839 +0,0 @@
|
||||||
/*****************************************************************************
|
|
||||||
* McPAT
|
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
||||||
* All Rights Reserved
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions are
|
|
||||||
* met: redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer;
|
|
||||||
* redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution;
|
|
||||||
* neither the name of the copyright holders nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
|
||||||
*
|
|
||||||
***************************************************************************/
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cassert>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <cstring>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "array.h"
|
|
||||||
#include "basic_circuit.h"
|
|
||||||
#include "const.h"
|
|
||||||
#include "parameter.h"
|
|
||||||
#include "processor.h"
|
|
||||||
#include "version.h"
|
|
||||||
|
|
||||||
Processor::Processor(ParseXML *XML_interface)
|
|
||||||
:XML(XML_interface),//TODO: using one global copy may have problems.
|
|
||||||
mc(0),
|
|
||||||
niu(0),
|
|
||||||
pcie(0),
|
|
||||||
flashcontroller(0)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
* There is no point to have heterogeneous memory controller on chip,
|
|
||||||
* thus McPAT only support homogeneous memory controllers.
|
|
||||||
*/
|
|
||||||
int i;
|
|
||||||
double pppm_t[4] = {1,1,1,1};
|
|
||||||
set_proc_param();
|
|
||||||
if (procdynp.homoCore)
|
|
||||||
numCore = procdynp.numCore==0? 0:1;
|
|
||||||
else
|
|
||||||
numCore = procdynp.numCore;
|
|
||||||
|
|
||||||
if (procdynp.homoL2)
|
|
||||||
numL2 = procdynp.numL2==0? 0:1;
|
|
||||||
else
|
|
||||||
numL2 = procdynp.numL2;
|
|
||||||
|
|
||||||
if (XML->sys.Private_L2 && numCore != numL2)
|
|
||||||
{
|
|
||||||
cout<<"Number of private L2 does not match number of cores"<<endl;
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (procdynp.homoL3)
|
|
||||||
numL3 = procdynp.numL3==0? 0:1;
|
|
||||||
else
|
|
||||||
numL3 = procdynp.numL3;
|
|
||||||
|
|
||||||
if (procdynp.homoNOC)
|
|
||||||
numNOC = procdynp.numNOC==0? 0:1;
|
|
||||||
else
|
|
||||||
numNOC = procdynp.numNOC;
|
|
||||||
|
|
||||||
// if (!procdynp.homoNOC)
|
|
||||||
// {
|
|
||||||
// cout<<"Current McPAT does not support heterogeneous NOC"<<endl;
|
|
||||||
// exit(0);
|
|
||||||
// }
|
|
||||||
|
|
||||||
if (procdynp.homoL1Dir)
|
|
||||||
numL1Dir = procdynp.numL1Dir==0? 0:1;
|
|
||||||
else
|
|
||||||
numL1Dir = procdynp.numL1Dir;
|
|
||||||
|
|
||||||
if (procdynp.homoL2Dir)
|
|
||||||
numL2Dir = procdynp.numL2Dir==0? 0:1;
|
|
||||||
else
|
|
||||||
numL2Dir = procdynp.numL2Dir;
|
|
||||||
|
|
||||||
for (i = 0;i < numCore; i++)
|
|
||||||
{
|
|
||||||
cores.push_back(new Core(XML,i, &interface_ip));
|
|
||||||
cores[i]->computeEnergy();
|
|
||||||
cores[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoCore){
|
|
||||||
core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore);
|
|
||||||
set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore);
|
|
||||||
core.power = core.power + cores[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore);
|
|
||||||
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
|
|
||||||
area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
power = power + core.power;
|
|
||||||
rt_power = rt_power + core.rt_power;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
core.area.set_area(core.area.get_area() + cores[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
|
|
||||||
set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1);
|
|
||||||
core.power = core.power + cores[i]->power*pppm_t;
|
|
||||||
power = power + cores[i]->power*pppm_t;
|
|
||||||
|
|
||||||
set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1);
|
|
||||||
core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + cores[i]->rt_power*pppm_t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!XML->sys.Private_L2)
|
|
||||||
{
|
|
||||||
if (numL2 >0)
|
|
||||||
for (i = 0;i < numL2; i++)
|
|
||||||
{
|
|
||||||
l2array.push_back(new SharedCache(XML,i, &interface_ip));
|
|
||||||
l2array[i]->computeEnergy();
|
|
||||||
l2array[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoL2){
|
|
||||||
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2);
|
|
||||||
set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2);
|
|
||||||
l2.power = l2.power + l2array[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2);
|
|
||||||
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
|
|
||||||
area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
power = power + l2.power;
|
|
||||||
rt_power = rt_power + l2.rt_power;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
|
|
||||||
set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1);
|
|
||||||
l2.power = l2.power + l2array[i]->power*pppm_t;
|
|
||||||
power = power + l2array[i]->power*pppm_t;;
|
|
||||||
set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1);
|
|
||||||
l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + l2array[i]->rt_power*pppm_t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numL3 >0)
|
|
||||||
for (i = 0;i < numL3; i++)
|
|
||||||
{
|
|
||||||
l3array.push_back(new SharedCache(XML,i, &interface_ip, L3));
|
|
||||||
l3array[i]->computeEnergy();
|
|
||||||
l3array[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoL3){
|
|
||||||
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3);
|
|
||||||
set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3);
|
|
||||||
l3.power = l3.power + l3array[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3);
|
|
||||||
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
|
|
||||||
area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
power = power + l3.power;
|
|
||||||
rt_power = rt_power + l3.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1);
|
|
||||||
l3.power = l3.power + l3array[i]->power*pppm_t;
|
|
||||||
power = power + l3array[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1);
|
|
||||||
l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + l3array[i]->rt_power*pppm_t;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (numL1Dir >0)
|
|
||||||
for (i = 0;i < numL1Dir; i++)
|
|
||||||
{
|
|
||||||
l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory));
|
|
||||||
l1dirarray[i]->computeEnergy();
|
|
||||||
l1dirarray[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoL1Dir){
|
|
||||||
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir);
|
|
||||||
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
|
|
||||||
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir);
|
|
||||||
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
|
|
||||||
area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
power = power + l1dir.power;
|
|
||||||
rt_power = rt_power + l1dir.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + l1dirarray[i]->area.get_area());
|
|
||||||
set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1);
|
|
||||||
l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t;
|
|
||||||
power = power + l1dirarray[i]->power;
|
|
||||||
set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1);
|
|
||||||
l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + l1dirarray[i]->rt_power;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numL2Dir >0)
|
|
||||||
for (i = 0;i < numL2Dir; i++)
|
|
||||||
{
|
|
||||||
l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory));
|
|
||||||
l2dirarray[i]->computeEnergy();
|
|
||||||
l2dirarray[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoL2Dir){
|
|
||||||
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir);
|
|
||||||
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
|
|
||||||
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir);
|
|
||||||
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
|
|
||||||
area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm
|
|
||||||
power = power + l2dir.power;
|
|
||||||
rt_power = rt_power + l2dir.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + l2dirarray[i]->area.get_area());
|
|
||||||
set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1);
|
|
||||||
l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t;
|
|
||||||
power = power + l2dirarray[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1);
|
|
||||||
l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
|
||||||
{
|
|
||||||
mc = new MemoryController(XML, &interface_ip, MC);
|
|
||||||
mc->computeEnergy();
|
|
||||||
mc->computeEnergy(false);
|
|
||||||
mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
|
|
||||||
area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs);
|
|
||||||
set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
|
|
||||||
mcs.power = mc->power*pppm_t;
|
|
||||||
power = power + mcs.power;
|
|
||||||
set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs);
|
|
||||||
mcs.rt_power = mc->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + mcs.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (XML->sys.flashc.number_mcs >0 )//flash controller
|
|
||||||
{
|
|
||||||
flashcontroller = new FlashController(XML, &interface_ip);
|
|
||||||
flashcontroller->computeEnergy();
|
|
||||||
flashcontroller->computeEnergy(false);
|
|
||||||
double number_fcs = flashcontroller->fcp.num_mcs;
|
|
||||||
flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs);
|
|
||||||
area.set_area(area.get_area()+flashcontrollers.area.get_area());
|
|
||||||
set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs );
|
|
||||||
flashcontrollers.power = flashcontroller->power*pppm_t;
|
|
||||||
power = power + flashcontrollers.power;
|
|
||||||
set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs );
|
|
||||||
flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + flashcontrollers.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (XML->sys.niu.number_units >0)
|
|
||||||
{
|
|
||||||
niu = new NIUController(XML, &interface_ip);
|
|
||||||
niu->computeEnergy();
|
|
||||||
niu->computeEnergy(false);
|
|
||||||
nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
|
|
||||||
area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units);
|
|
||||||
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
|
|
||||||
nius.power = niu->power*pppm_t;
|
|
||||||
power = power + nius.power;
|
|
||||||
set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units);
|
|
||||||
nius.rt_power = niu->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + nius.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0)
|
|
||||||
{
|
|
||||||
pcie = new PCIeController(XML, &interface_ip);
|
|
||||||
pcie->computeEnergy();
|
|
||||||
pcie->computeEnergy(false);
|
|
||||||
pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
|
|
||||||
area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units);
|
|
||||||
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
|
|
||||||
pcies.power = pcie->power*pppm_t;
|
|
||||||
power = power + pcies.power;
|
|
||||||
set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units);
|
|
||||||
pcies.rt_power = pcie->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + pcies.rt_power;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numNOC >0)
|
|
||||||
{
|
|
||||||
for (i = 0;i < numNOC; i++)
|
|
||||||
{
|
|
||||||
if (XML->sys.NoC[i].type)
|
|
||||||
{//First add up area of routers if NoC is used
|
|
||||||
nocs.push_back(new NoC(XML,i, &interface_ip, 1));
|
|
||||||
if (procdynp.homoNOC)
|
|
||||||
{
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
|
|
||||||
area.set_area(area.get_area() + noc.area.get_area());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + nocs[i]->area.get_area());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{//Bus based interconnect
|
|
||||||
nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage)));
|
|
||||||
if (procdynp.homoNOC){
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC);
|
|
||||||
area.set_area(area.get_area() + noc.area.get_area());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area());
|
|
||||||
area.set_area(area.get_area() + nocs[i]->area.get_area());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip
|
|
||||||
* area must be obtain to decide the link routing
|
|
||||||
*/
|
|
||||||
for (i = 0;i < numNOC; i++)
|
|
||||||
{
|
|
||||||
if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type)
|
|
||||||
{
|
|
||||||
nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links
|
|
||||||
if (procdynp.homoNOC)
|
|
||||||
{
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
|
||||||
* nocs[i]->nocdynp.total_nodes
|
|
||||||
* procdynp.numNOC);
|
|
||||||
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
|
||||||
* nocs[i]->nocdynp.total_nodes
|
|
||||||
* procdynp.numNOC);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
|
||||||
* nocs[i]->nocdynp.total_nodes);
|
|
||||||
area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area()
|
|
||||||
* nocs[i]->nocdynp.total_nodes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//Compute energy of NoC (w or w/o links) or buses
|
|
||||||
for (i = 0;i < numNOC; i++)
|
|
||||||
{
|
|
||||||
nocs[i]->computeEnergy();
|
|
||||||
nocs[i]->computeEnergy(false);
|
|
||||||
if (procdynp.homoNOC){
|
|
||||||
set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
|
|
||||||
noc.power = noc.power + nocs[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC);
|
|
||||||
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
|
|
||||||
power = power + noc.power;
|
|
||||||
rt_power = rt_power + noc.rt_power;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1);
|
|
||||||
noc.power = noc.power + nocs[i]->power*pppm_t;
|
|
||||||
power = power + nocs[i]->power*pppm_t;
|
|
||||||
set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1);
|
|
||||||
noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t;
|
|
||||||
rt_power = rt_power + nocs[i]->rt_power*pppm_t;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// //clock power
|
|
||||||
// globalClock.init_wire_external(is_default, &interface_ip);
|
|
||||||
// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2
|
|
||||||
// globalClock.end_wiring_level =5;//toplevel metal
|
|
||||||
// globalClock.start_wiring_level =5;//toplevel metal
|
|
||||||
// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes
|
|
||||||
// globalClock.optimize_wire();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void Processor::displayDeviceType(int device_type_, uint32_t indent)
|
|
||||||
{
|
|
||||||
string indent_str(indent, ' ');
|
|
||||||
|
|
||||||
switch ( device_type_ ) {
|
|
||||||
|
|
||||||
case 0 :
|
|
||||||
cout <<indent_str<<"Device Type= "<<"ITRS high performance device type"<<endl;
|
|
||||||
break;
|
|
||||||
case 1 :
|
|
||||||
cout <<indent_str<<"Device Type= "<<"ITRS low standby power device type"<<endl;
|
|
||||||
break;
|
|
||||||
case 2 :
|
|
||||||
cout <<indent_str<<"Device Type= "<<"ITRS low operating power device type"<<endl;
|
|
||||||
break;
|
|
||||||
case 3 :
|
|
||||||
cout <<indent_str<<"Device Type= "<<"LP-DRAM device type"<<endl;
|
|
||||||
break;
|
|
||||||
case 4 :
|
|
||||||
cout <<indent_str<<"Device Type= "<<"COMM-DRAM device type"<<endl;
|
|
||||||
break;
|
|
||||||
default :
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Unknown Device Type"<<endl;
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Processor::displayInterconnectType(int interconnect_type_, uint32_t indent)
|
|
||||||
{
|
|
||||||
string indent_str(indent, ' ');
|
|
||||||
|
|
||||||
switch ( interconnect_type_ ) {
|
|
||||||
|
|
||||||
case 0 :
|
|
||||||
cout <<indent_str<<"Interconnect metal projection= "<<"aggressive interconnect technology projection"<<endl;
|
|
||||||
break;
|
|
||||||
case 1 :
|
|
||||||
cout <<indent_str<<"Interconnect metal projection= "<<"conservative interconnect technology projection"<<endl;
|
|
||||||
break;
|
|
||||||
default :
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Unknown Interconnect Projection Type"<<endl;
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Processor::displayEnergy(uint32_t indent, int plevel, bool is_tdp)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
bool long_channel = XML->sys.longer_channel_device;
|
|
||||||
string indent_str(indent, ' ');
|
|
||||||
string indent_str_next(indent+2, ' ');
|
|
||||||
if (is_tdp)
|
|
||||||
{
|
|
||||||
|
|
||||||
if (plevel<5)
|
|
||||||
{
|
|
||||||
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
|
|
||||||
<< " of " << VER_UPDATE << ") results (current print level is "<< plevel
|
|
||||||
<<", please increase print level to see the details in components): "<<endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR
|
|
||||||
<< " of " << VER_UPDATE << ") results (current print level is 5)"<< endl;
|
|
||||||
}
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
cout <<indent_str<<"Technology "<<XML->sys.core_tech_node<<" nm"<<endl;
|
|
||||||
//cout <<indent_str<<"Device Type= "<<XML->sys.device_type<<endl;
|
|
||||||
if (long_channel)
|
|
||||||
cout <<indent_str<<"Using Long Channel Devices When Appropriate"<<endl;
|
|
||||||
//cout <<indent_str<<"Interconnect metal projection= "<<XML->sys.interconnect_projection_type<<endl;
|
|
||||||
displayInterconnectType(XML->sys.interconnect_projection_type, indent);
|
|
||||||
cout <<indent_str<<"Core clock Rate(MHz) "<<XML->sys.core[0].clock_rate<<endl;
|
|
||||||
cout <<endl;
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
cout <<"Processor: "<<endl;
|
|
||||||
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str << "Peak Power = " << power.readOp.dynamic +
|
|
||||||
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
|
|
||||||
cout << indent_str << "Total Leakage = " <<
|
|
||||||
(long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) + power.readOp.gate_leakage <<" W" << endl;
|
|
||||||
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str << "Subthreshold Leakage = " << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
if (numCore >0){
|
|
||||||
cout <<indent_str<<"Total Cores: "<<XML->sys.number_of_cores << " cores "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type,indent);
|
|
||||||
cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (!XML->sys.Private_L2)
|
|
||||||
{
|
|
||||||
if (numL2 >0){
|
|
||||||
cout <<indent_str<<"Total L2s: "<<endl;
|
|
||||||
displayDeviceType(XML->sys.L2[0].device_type,indent);
|
|
||||||
cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (numL3 >0){
|
|
||||||
cout <<indent_str<<"Total L3s: "<<endl;
|
|
||||||
displayDeviceType(XML->sys.L3[0].device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (numL1Dir >0){
|
|
||||||
cout <<indent_str<<"Total First Level Directory: "<<endl;
|
|
||||||
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (numL2Dir >0){
|
|
||||||
cout <<indent_str<<"Total First Level Directory: "<<endl;
|
|
||||||
displayDeviceType(XML->sys.L1Directory[0].device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (numNOC >0){
|
|
||||||
cout <<indent_str<<"Total NoCs (Network/Bus): "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl;
|
|
||||||
//cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Total MCs: "<<XML->sys.mc.number_mcs << " Memory Controllers "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.flashc.number_mcs >0)
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Total Flash/SSD Controllers: "<<flashcontroller->fcp.num_mcs << " Flash/SSD Controllers "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.niu.number_units >0 )
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Total NIUs: "<<niu->niup.num_units << " Network Interface Units "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
|
|
||||||
{
|
|
||||||
cout <<indent_str<<"Total PCIes: "<<pcie->pciep.num_units << " PCIe Controllers "<<endl;
|
|
||||||
displayDeviceType(XML->sys.device_type, indent);
|
|
||||||
cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl;
|
|
||||||
cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl;
|
|
||||||
cout << indent_str_next << "Subthreshold Leakage = "
|
|
||||||
<< (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl;
|
|
||||||
cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl;
|
|
||||||
cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl;
|
|
||||||
cout <<endl;
|
|
||||||
}
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
if (plevel >1)
|
|
||||||
{
|
|
||||||
for (i = 0;i < numCore; i++)
|
|
||||||
{
|
|
||||||
cores[i]->displayEnergy(indent+4,plevel,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
if (!XML->sys.Private_L2)
|
|
||||||
{
|
|
||||||
for (i = 0;i < numL2; i++)
|
|
||||||
{
|
|
||||||
l2array[i]->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (i = 0;i < numL3; i++)
|
|
||||||
{
|
|
||||||
l3array[i]->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
for (i = 0;i < numL1Dir; i++)
|
|
||||||
{
|
|
||||||
l1dirarray[i]->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
for (i = 0;i < numL2Dir; i++)
|
|
||||||
{
|
|
||||||
l2dirarray[i]->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0)
|
|
||||||
{
|
|
||||||
mc->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0)
|
|
||||||
{
|
|
||||||
flashcontroller->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.niu.number_units >0 )
|
|
||||||
{
|
|
||||||
niu->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0)
|
|
||||||
{
|
|
||||||
pcie->displayEnergy(indent+4,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0;i < numNOC; i++)
|
|
||||||
{
|
|
||||||
nocs[i]->displayEnergy(indent+4,plevel,is_tdp);
|
|
||||||
cout <<"*****************************************************************************************"<<endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void Processor::set_proc_param()
|
|
||||||
{
|
|
||||||
bool debug = false;
|
|
||||||
|
|
||||||
procdynp.homoCore = bool(debug?1:XML->sys.homogeneous_cores);
|
|
||||||
procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s);
|
|
||||||
procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s);
|
|
||||||
procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs);
|
|
||||||
procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories);
|
|
||||||
procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories);
|
|
||||||
|
|
||||||
procdynp.numCore = XML->sys.number_of_cores;
|
|
||||||
procdynp.numL2 = XML->sys.number_of_L2s;
|
|
||||||
procdynp.numL3 = XML->sys.number_of_L3s;
|
|
||||||
procdynp.numNOC = XML->sys.number_of_NoCs;
|
|
||||||
procdynp.numL1Dir = XML->sys.number_of_L1Directories;
|
|
||||||
procdynp.numL2Dir = XML->sys.number_of_L2Directories;
|
|
||||||
procdynp.numMC = XML->sys.mc.number_mcs;
|
|
||||||
procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc;
|
|
||||||
|
|
||||||
// if (procdynp.numCore<1)
|
|
||||||
// {
|
|
||||||
// cout<<" The target processor should at least have one core on chip." <<endl;
|
|
||||||
// exit(0);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if (numNOCs<0 || numNOCs>2)
|
|
||||||
// {
|
|
||||||
// cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<<endl;
|
|
||||||
// exit(0);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/* Basic parameters*/
|
|
||||||
interface_ip.data_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
|
|
||||||
interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
|
|
||||||
interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type;
|
|
||||||
interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type;
|
|
||||||
|
|
||||||
interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type;
|
|
||||||
interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied.
|
|
||||||
interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components.
|
|
||||||
interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components.
|
|
||||||
interface_ip.leakage_power_wt = 0;
|
|
||||||
interface_ip.cycle_time_wt = 0;
|
|
||||||
|
|
||||||
interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied.
|
|
||||||
interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
|
|
||||||
interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components.
|
|
||||||
interface_ip.leakage_power_dev = 10000;
|
|
||||||
interface_ip.cycle_time_dev = 10000;
|
|
||||||
|
|
||||||
interface_ip.ed = 2;
|
|
||||||
interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately
|
|
||||||
interface_ip.int_prefetch_w = 1;
|
|
||||||
interface_ip.page_sz_bits = 0;
|
|
||||||
interface_ip.temp = debug?360: XML->sys.temperature;
|
|
||||||
interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node;
|
|
||||||
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
|
|
||||||
|
|
||||||
//***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors.
|
|
||||||
//They will be overridden during each components initialization
|
|
||||||
interface_ip.cache_sz =64;
|
|
||||||
interface_ip.line_sz = 1;
|
|
||||||
interface_ip.assoc = 1;
|
|
||||||
interface_ip.nbanks = 1;
|
|
||||||
interface_ip.out_w = interface_ip.line_sz*8;
|
|
||||||
interface_ip.specific_tag = 1;
|
|
||||||
interface_ip.tag_w = 64;
|
|
||||||
interface_ip.access_mode = 2;
|
|
||||||
|
|
||||||
interface_ip.obj_func_dyn_energy = 0;
|
|
||||||
interface_ip.obj_func_dyn_power = 0;
|
|
||||||
interface_ip.obj_func_leak_power = 0;
|
|
||||||
interface_ip.obj_func_cycle_t = 1;
|
|
||||||
|
|
||||||
interface_ip.is_main_mem = false;
|
|
||||||
interface_ip.rpters_in_htree = true ;
|
|
||||||
interface_ip.ver_htree_wires_over_array = 0;
|
|
||||||
interface_ip.broadcast_addr_din_over_ver_htrees = 0;
|
|
||||||
|
|
||||||
interface_ip.num_rw_ports = 1;
|
|
||||||
interface_ip.num_rd_ports = 0;
|
|
||||||
interface_ip.num_wr_ports = 0;
|
|
||||||
interface_ip.num_se_rd_ports = 0;
|
|
||||||
interface_ip.num_search_ports = 1;
|
|
||||||
interface_ip.nuca = 0;
|
|
||||||
interface_ip.nuca_bank_count = 0;
|
|
||||||
interface_ip.is_cache =true;
|
|
||||||
interface_ip.pure_ram =false;
|
|
||||||
interface_ip.pure_cam =false;
|
|
||||||
interface_ip.force_cache_config =false;
|
|
||||||
if (XML->sys.Embedded)
|
|
||||||
{
|
|
||||||
interface_ip.wt =Global_30;
|
|
||||||
interface_ip.wire_is_mat_type = 0;
|
|
||||||
interface_ip.wire_os_mat_type = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
interface_ip.wt =Global;
|
|
||||||
interface_ip.wire_is_mat_type = 2;
|
|
||||||
interface_ip.wire_os_mat_type = 2;
|
|
||||||
}
|
|
||||||
interface_ip.force_wiretype = false;
|
|
||||||
interface_ip.print_detail = 1;
|
|
||||||
interface_ip.add_ecc_b_ =true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Processor::~Processor(){
|
|
||||||
while (!cores.empty())
|
|
||||||
{
|
|
||||||
delete cores.back();
|
|
||||||
cores.pop_back();
|
|
||||||
}
|
|
||||||
while (!l2array.empty())
|
|
||||||
{
|
|
||||||
delete l2array.back();
|
|
||||||
l2array.pop_back();
|
|
||||||
}
|
|
||||||
while (!l3array.empty())
|
|
||||||
{
|
|
||||||
delete l3array.back();
|
|
||||||
l3array.pop_back();
|
|
||||||
}
|
|
||||||
while (!nocs.empty())
|
|
||||||
{
|
|
||||||
delete nocs.back();
|
|
||||||
nocs.pop_back();
|
|
||||||
}
|
|
||||||
if (!mc)
|
|
||||||
{
|
|
||||||
delete mc;
|
|
||||||
}
|
|
||||||
if (!niu)
|
|
||||||
{
|
|
||||||
delete niu;
|
|
||||||
}
|
|
||||||
if (!pcie)
|
|
||||||
{
|
|
||||||
delete pcie;
|
|
||||||
}
|
|
||||||
if (!flashcontroller)
|
|
||||||
{
|
|
||||||
delete flashcontroller;
|
|
||||||
}
|
|
||||||
};
|
|
File diff suppressed because it is too large
Load diff
350
ext/mcpat/system.cc
Normal file
350
ext/mcpat/system.cc
Normal file
|
@ -0,0 +1,350 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* McPAT
|
||||||
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
|
* All Rights Reserved
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "array.h"
|
||||||
|
#include "basic_circuit.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "const.h"
|
||||||
|
#include "parameter.h"
|
||||||
|
#include "system.h"
|
||||||
|
#include "version.h"
|
||||||
|
|
||||||
|
// TODO: Fix this constructor to default initialize all pointers to NULL
|
||||||
|
System::System(XMLNode* _xml_data)
|
||||||
|
: McPATComponent(_xml_data) {
|
||||||
|
int i;
|
||||||
|
int currCore = 0;
|
||||||
|
int currNOC = 0;
|
||||||
|
name = "System";
|
||||||
|
set_proc_param();
|
||||||
|
|
||||||
|
// TODO: This loop can (and should) be called by every component in
|
||||||
|
// the hierarchy. Consider moving it to McPATComponent
|
||||||
|
int numChildren = xml_data->nChildNode("component");
|
||||||
|
for (i = 0; i < numChildren; i++ ) {
|
||||||
|
// For each child node of the system,
|
||||||
|
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
|
||||||
|
XMLCSTR type = childXML->getAttribute("type");
|
||||||
|
|
||||||
|
if (!type) {
|
||||||
|
warnMissingComponentType(childXML->getAttribute("id"));
|
||||||
|
|
||||||
|
} STRCMP(type, "Core") {
|
||||||
|
// TODO: If homogeneous cores, and currCore > 0, just copy core 0
|
||||||
|
children.push_back(new Core(childXML, currCore, &interface_ip));
|
||||||
|
currCore++;
|
||||||
|
} STRCMP(type, "CacheUnit") {
|
||||||
|
children.push_back(new CacheUnit(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "CacheController") {
|
||||||
|
// TODO: Remove reliance on interface_ip - there should be a better
|
||||||
|
// way to share global variables than passing, copying
|
||||||
|
children.push_back(new CacheController(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "MemoryController") {
|
||||||
|
children.push_back(new MemoryController(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "FlashController") {
|
||||||
|
children.push_back(new FlashController(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "NIUController") {
|
||||||
|
children.push_back(new NIUController(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "PCIeController") {
|
||||||
|
children.push_back(new PCIeController(childXML, &interface_ip));
|
||||||
|
} STRCMP(type, "Memory") {
|
||||||
|
// TODO:
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
} STRCMP(type, "OnChipNetwork") {
|
||||||
|
// TODO: Many of the parameters to this constructor should be
|
||||||
|
// handled in another way
|
||||||
|
children.push_back(new OnChipNetwork(childXML, currNOC,
|
||||||
|
&interface_ip));
|
||||||
|
currNOC++;
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
} STRCMP(type, "BusInterconnect") {
|
||||||
|
// TODO: Many of the parameters to this constructor should be
|
||||||
|
// handled in another way
|
||||||
|
children.push_back(new BusInterconnect(childXML, &interface_ip));
|
||||||
|
warnIncompleteComponentType(type);
|
||||||
|
|
||||||
|
// TODO: Add a directory data type that can handle the directories
|
||||||
|
// as defined by certain McScript output
|
||||||
|
} else {
|
||||||
|
warnUnrecognizedComponent(type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void System::displayDeviceType(int device_type_, uint32_t indent) {
|
||||||
|
string indent_str(indent, ' ');
|
||||||
|
cout << indent_str << "Device Type = ";
|
||||||
|
|
||||||
|
switch ( device_type_ ) {
|
||||||
|
case 0:
|
||||||
|
cout << "ITRS high performance device type" << endl;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
cout << "ITRS low standby power device type" << endl;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
cout << "ITRS low operating power device type" << endl;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
cout << "LP-DRAM device type" << endl;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
cout << "COMM-DRAM device type" << endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cout << indent_str << "Unknown!" << endl;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void System::displayInterconnectType(int interconnect_type_, uint32_t indent) {
|
||||||
|
string indent_str(indent, ' ');
|
||||||
|
cout << indent_str << "Interconnect metal projection = ";
|
||||||
|
|
||||||
|
switch ( interconnect_type_ ) {
|
||||||
|
case 0:
|
||||||
|
cout << "aggressive interconnect technology projection" << endl;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
cout << "conservative interconnect technology projection" << endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
cout << indent_str << "Unknown!" << endl;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Migrate this down to the McPATComponent::displayData function
|
||||||
|
void System::displayData(uint32_t indent, int plevel) {
|
||||||
|
string indent_str(indent, ' ');
|
||||||
|
string indent_str_next(indent + 2, ' ');
|
||||||
|
if (plevel < 5) {
|
||||||
|
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||||
|
<< " of " << VER_UPDATE << ") results (current print level is "
|
||||||
|
<< plevel
|
||||||
|
<< ", please increase print level to see the details in "
|
||||||
|
<< "components) " << endl;
|
||||||
|
} else {
|
||||||
|
cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR
|
||||||
|
<< " of " << VER_UPDATE << ") results (current print level is 5)"
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
cout << "*****************************************************************"
|
||||||
|
<< "************************" << endl;
|
||||||
|
cout << indent_str << "Technology " << core_tech_node << " nm" << endl;
|
||||||
|
if (longer_channel_device)
|
||||||
|
cout << indent_str << "Using Long Channel Devices When Appropriate" << endl;
|
||||||
|
displayInterconnectType(interconnect_projection_type, indent);
|
||||||
|
cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl;
|
||||||
|
cout << endl;
|
||||||
|
|
||||||
|
cout << "*****************************************************************"
|
||||||
|
<< "************************" << endl;
|
||||||
|
|
||||||
|
McPATComponent::displayData(indent, plevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
void System::set_proc_param() {
|
||||||
|
// TODO: Consider creating a SystemParams class that tracks system-wide
|
||||||
|
// parameters like these
|
||||||
|
longer_channel_device = false;
|
||||||
|
core_tech_node = -1;
|
||||||
|
temperature = -1;
|
||||||
|
interconnect_projection_type = -1;
|
||||||
|
device_type = -1;
|
||||||
|
physical_address_width = -1;
|
||||||
|
|
||||||
|
int num_children = xml_data->nChildNode("param");
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
|
||||||
|
XMLCSTR node_name = paramNode->getAttribute("name");
|
||||||
|
XMLCSTR value = paramNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingParamName(paramNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("core_tech_node", core_tech_node);
|
||||||
|
ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate);
|
||||||
|
ASSIGN_INT_IF("temperature", temperature);
|
||||||
|
ASSIGN_INT_IF("device_type", device_type);
|
||||||
|
ASSIGN_INT_IF("longer_channel_device", longer_channel_device);
|
||||||
|
ASSIGN_INT_IF("interconnect_projection_type",
|
||||||
|
interconnect_projection_type);
|
||||||
|
ASSIGN_INT_IF("machine_bits", data_path_width);
|
||||||
|
ASSIGN_INT_IF("virtual_address_width", virtual_address_width);
|
||||||
|
ASSIGN_INT_IF("physical_address_width", physical_address_width);
|
||||||
|
ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size);
|
||||||
|
ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type);
|
||||||
|
ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type);
|
||||||
|
ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt);
|
||||||
|
ASSIGN_INT_IF("area_wt", interface_ip.area_wt);
|
||||||
|
ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt);
|
||||||
|
ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt);
|
||||||
|
ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt);
|
||||||
|
ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev);
|
||||||
|
ASSIGN_INT_IF("area_dev", interface_ip.area_dev);
|
||||||
|
ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev);
|
||||||
|
ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev);
|
||||||
|
ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev);
|
||||||
|
ASSIGN_INT_IF("ed", interface_ip.ed);
|
||||||
|
ASSIGN_INT_IF("burst_len", interface_ip.burst_len);
|
||||||
|
ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w);
|
||||||
|
ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits);
|
||||||
|
ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool);
|
||||||
|
ASSIGN_INT_IF("ver_htree_wires_over_array",
|
||||||
|
interface_ip.ver_htree_wires_over_array);
|
||||||
|
ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees",
|
||||||
|
interface_ip.broadcast_addr_din_over_ver_htrees);
|
||||||
|
ASSIGN_INT_IF("nuca", interface_ip.nuca);
|
||||||
|
ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count);
|
||||||
|
ASSIGN_ENUM_IF("force_cache_config",
|
||||||
|
interface_ip.force_cache_config, bool);
|
||||||
|
ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type);
|
||||||
|
ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype);
|
||||||
|
ASSIGN_INT_IF("print_detail", interface_ip.print_detail);
|
||||||
|
ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedParam(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Change from MHz to Hz
|
||||||
|
target_core_clockrate *= 1e6;
|
||||||
|
interconnect_projection_type =
|
||||||
|
(interconnect_projection_type == 0) ? 0 : 1;
|
||||||
|
|
||||||
|
num_children = xml_data->nChildNode("stat");
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
|
||||||
|
XMLCSTR node_name = statNode->getAttribute("name");
|
||||||
|
XMLCSTR value = statNode->getAttribute("value");
|
||||||
|
|
||||||
|
if (!node_name)
|
||||||
|
warnMissingStatName(statNode->getAttribute("id"));
|
||||||
|
|
||||||
|
ASSIGN_FP_IF("total_cycles", total_cycles);
|
||||||
|
|
||||||
|
else {
|
||||||
|
warnUnrecognizedStat(node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (temperature < 0) {
|
||||||
|
errorUnspecifiedParam("temperature");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (core_tech_node < 0) {
|
||||||
|
errorUnspecifiedParam("core_tech_node");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (interconnect_projection_type < 0) {
|
||||||
|
errorUnspecifiedParam("interconnect_projection_type");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_type < 0) {
|
||||||
|
errorUnspecifiedParam("device_type");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (physical_address_width <= 0) {
|
||||||
|
errorNonPositiveParam("physical_address_width");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data_path_width <= 0) {
|
||||||
|
errorNonPositiveParam("machine_bits");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (total_cycles <= 0) {
|
||||||
|
fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ",
|
||||||
|
"power numbers will be funky...\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
clockRate = target_core_clockrate;
|
||||||
|
execution_time = total_cycles / (target_core_clockrate);
|
||||||
|
|
||||||
|
/* Basic parameters*/
|
||||||
|
interface_ip.data_arr_ram_cell_tech_type = device_type;
|
||||||
|
interface_ip.data_arr_peri_global_tech_type = device_type;
|
||||||
|
interface_ip.tag_arr_ram_cell_tech_type = device_type;
|
||||||
|
interface_ip.tag_arr_peri_global_tech_type = device_type;
|
||||||
|
|
||||||
|
interface_ip.ic_proj_type = interconnect_projection_type;
|
||||||
|
interface_ip.temp = temperature;
|
||||||
|
interface_ip.F_sz_nm = core_tech_node;
|
||||||
|
interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000;
|
||||||
|
interface_ip.is_main_mem = false;
|
||||||
|
|
||||||
|
// These are there just to make CACTI's error_checking() happy.
|
||||||
|
// They are either not actually used or overwritten by each component.
|
||||||
|
interface_ip.cache_sz = MIN_BUFFER_SIZE;
|
||||||
|
interface_ip.nbanks = 1;
|
||||||
|
interface_ip.out_w = 0;
|
||||||
|
interface_ip.line_sz = 1;
|
||||||
|
interface_ip.assoc = 1;
|
||||||
|
interface_ip.num_rw_ports = 1;
|
||||||
|
interface_ip.num_search_ports = 1;
|
||||||
|
interface_ip.is_cache = true;
|
||||||
|
interface_ip.pure_ram = false;
|
||||||
|
interface_ip.pure_cam = false;
|
||||||
|
|
||||||
|
|
||||||
|
//This section of code does not have real meaning; it is just to ensure
|
||||||
|
//all data will have initial value to prevent errors.
|
||||||
|
//They will be overridden during each components initialization
|
||||||
|
interface_ip.specific_tag = 1;
|
||||||
|
interface_ip.tag_w = 64;
|
||||||
|
interface_ip.access_mode = 2;
|
||||||
|
|
||||||
|
interface_ip.obj_func_dyn_energy = 0;
|
||||||
|
interface_ip.obj_func_dyn_power = 0;
|
||||||
|
interface_ip.obj_func_leak_power = 0;
|
||||||
|
interface_ip.obj_func_cycle_t = 1;
|
||||||
|
interface_ip.num_rw_ports = 1;
|
||||||
|
interface_ip.num_rd_ports = 0;
|
||||||
|
interface_ip.num_wr_ports = 0;
|
||||||
|
interface_ip.num_se_rd_ports = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
System::~System() {
|
||||||
|
// TODO: Delete children... do this in McPATComponent
|
||||||
|
};
|
|
@ -1,7 +1,7 @@
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
* McPAT
|
* McPAT
|
||||||
* SOFTWARE LICENSE AGREEMENT
|
* SOFTWARE LICENSE AGREEMENT
|
||||||
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* All Rights Reserved
|
* All Rights Reserved
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -25,19 +25,23 @@
|
||||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* Authors: Joel Hestness
|
||||||
|
* Yasuko Eckert
|
||||||
*
|
*
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
#ifndef PROCESSOR_H_
|
|
||||||
#define PROCESSOR_H_
|
|
||||||
|
|
||||||
#include <vector>
|
#ifndef SYSTEM_H_
|
||||||
|
#define SYSTEM_H_
|
||||||
|
|
||||||
#include "XML_Parse.h"
|
|
||||||
#include "arbiter.h"
|
#include "arbiter.h"
|
||||||
#include "area.h"
|
#include "area.h"
|
||||||
#include "array.h"
|
#include "array.h"
|
||||||
#include "basic_components.h"
|
#include "basic_components.h"
|
||||||
|
#include "bus_interconnect.h"
|
||||||
|
#include "cachecontroller.h"
|
||||||
|
#include "cacheunit.h"
|
||||||
#include "core.h"
|
#include "core.h"
|
||||||
#include "decoder.h"
|
#include "decoder.h"
|
||||||
#include "iocontrollers.h"
|
#include "iocontrollers.h"
|
||||||
|
@ -45,35 +49,23 @@
|
||||||
#include "noc.h"
|
#include "noc.h"
|
||||||
#include "parameter.h"
|
#include "parameter.h"
|
||||||
#include "router.h"
|
#include "router.h"
|
||||||
#include "sharedcache.h"
|
|
||||||
|
|
||||||
class Processor : public Component
|
class System : public McPATComponent {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
ParseXML *XML;
|
|
||||||
vector<Core *> cores;
|
|
||||||
vector<SharedCache *> l2array;
|
|
||||||
vector<SharedCache *> l3array;
|
|
||||||
vector<SharedCache *> l1dirarray;
|
|
||||||
vector<SharedCache *> l2dirarray;
|
|
||||||
vector<NoC *> nocs;
|
|
||||||
MemoryController * mc;
|
|
||||||
NIUController * niu;
|
|
||||||
PCIeController * pcie;
|
|
||||||
FlashController * flashcontroller;
|
|
||||||
InputParameter interface_ip;
|
InputParameter interface_ip;
|
||||||
ProcParam procdynp;
|
|
||||||
//wire globalInterconnect;
|
int device_type;
|
||||||
//clock_network globalClock;
|
double core_tech_node;
|
||||||
Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers;
|
int interconnect_projection_type;
|
||||||
int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir;
|
int temperature;
|
||||||
Processor(ParseXML *XML_interface);
|
|
||||||
void compute();
|
System(XMLNode* _xml_data);
|
||||||
void set_proc_param();
|
void set_proc_param();
|
||||||
void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true);
|
// TODO: make this recursively compute energy on subcomponents
|
||||||
|
void displayData(uint32_t indent = 0, int plevel = 100);
|
||||||
void displayDeviceType(int device_type_, uint32_t indent = 0);
|
void displayDeviceType(int device_type_, uint32_t indent = 0);
|
||||||
void displayInterconnectType(int interconnect_type_, uint32_t indent = 0);
|
void displayInterconnectType(int interconnect_type_, uint32_t indent = 0);
|
||||||
~Processor();
|
~System();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* PROCESSOR_H_ */
|
#endif /* SYSTEM_H_ */
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -42,6 +42,7 @@
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2002, Business-Insight
|
* Copyright (c) 2002, Business-Insight
|
||||||
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
||||||
* <a href="http://www.Business-Insight.com">Business-Insight</a>
|
* <a href="http://www.Business-Insight.com">Business-Insight</a>
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -160,33 +161,32 @@
|
||||||
#define XMLDLLENTRY
|
#define XMLDLLENTRY
|
||||||
#ifndef XML_NO_WIDE_CHAR
|
#ifndef XML_NO_WIDE_CHAR
|
||||||
#include <wchar.h> // to have 'wcsrtombs' for ANSI version
|
#include <wchar.h> // to have 'wcsrtombs' for ANSI version
|
||||||
// to have 'mbsrtowcs' for WIDECHAR version
|
// to have 'mbsrtowcs' for WIDECHAR version
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Some common types for char set portable code
|
// Some common types for char set portable code
|
||||||
#ifdef _XMLWIDECHAR
|
#ifdef _XMLWIDECHAR
|
||||||
#define _CXML(c) L ## c
|
#define _CXML(c) L ## c
|
||||||
#define XMLCSTR const wchar_t *
|
#define XMLCSTR const wchar_t *
|
||||||
#define XMLSTR wchar_t *
|
#define XMLSTR wchar_t *
|
||||||
#define XMLCHAR wchar_t
|
#define XMLCHAR wchar_t
|
||||||
#else
|
#else
|
||||||
#define _CXML(c) c
|
#define _CXML(c) c
|
||||||
#define XMLCSTR const char *
|
#define XMLCSTR const char *
|
||||||
#define XMLSTR char *
|
#define XMLSTR char *
|
||||||
#define XMLCHAR char
|
#define XMLCHAR char
|
||||||
#endif
|
#endif
|
||||||
#ifndef FALSE
|
#ifndef FALSE
|
||||||
#define FALSE 0
|
#define FALSE 0
|
||||||
#endif /* FALSE */
|
#endif /* FALSE */
|
||||||
#ifndef TRUE
|
#ifndef TRUE
|
||||||
#define TRUE 1
|
#define TRUE 1
|
||||||
#endif /* TRUE */
|
#endif /* TRUE */
|
||||||
|
|
||||||
|
|
||||||
/// Enumeration for XML parse errors.
|
/// Enumeration for XML parse errors.
|
||||||
typedef enum XMLError
|
typedef enum XMLError {
|
||||||
{
|
|
||||||
eXMLErrorNone = 0,
|
eXMLErrorNone = 0,
|
||||||
eXMLErrorMissingEndTag,
|
eXMLErrorMissingEndTag,
|
||||||
eXMLErrorNoXMLTagFound,
|
eXMLErrorNoXMLTagFound,
|
||||||
|
@ -213,30 +213,32 @@ typedef enum XMLError
|
||||||
|
|
||||||
|
|
||||||
/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents
|
/// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents
|
||||||
typedef enum XMLElementType
|
typedef enum XMLElementType {
|
||||||
{
|
eNodeChild = 0,
|
||||||
eNodeChild=0,
|
eNodeAttribute = 1,
|
||||||
eNodeAttribute=1,
|
eNodeText = 2,
|
||||||
eNodeText=2,
|
eNodeClear = 3,
|
||||||
eNodeClear=3,
|
eNodeNULL = 4
|
||||||
eNodeNULL=4
|
|
||||||
} XMLElementType;
|
} XMLElementType;
|
||||||
|
|
||||||
/// Structure used to obtain error details if the parse fails.
|
/// Structure used to obtain error details if the parse fails.
|
||||||
typedef struct XMLResults
|
typedef struct XMLResults {
|
||||||
{
|
|
||||||
enum XMLError error;
|
enum XMLError error;
|
||||||
int nLine,nColumn;
|
int nLine;
|
||||||
|
int nColumn;
|
||||||
} XMLResults;
|
} XMLResults;
|
||||||
|
|
||||||
/// Structure for XML clear (unformatted) node (usually comments)
|
/// Structure for XML clear (unformatted) node (usually comments)
|
||||||
typedef struct XMLClear {
|
typedef struct XMLClear {
|
||||||
XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag;
|
XMLCSTR lpszValue;
|
||||||
|
XMLCSTR lpszOpenTag;
|
||||||
|
XMLCSTR lpszCloseTag;
|
||||||
} XMLClear;
|
} XMLClear;
|
||||||
|
|
||||||
/// Structure for XML attribute.
|
/// Structure for XML attribute.
|
||||||
typedef struct XMLAttribute {
|
typedef struct XMLAttribute {
|
||||||
XMLCSTR lpszName; XMLCSTR lpszValue;
|
XMLCSTR lpszName;
|
||||||
|
XMLCSTR lpszValue;
|
||||||
} XMLAttribute;
|
} XMLAttribute;
|
||||||
|
|
||||||
/// XMLElementPosition are not interchangeable with simple indexes
|
/// XMLElementPosition are not interchangeable with simple indexes
|
||||||
|
@ -256,9 +258,8 @@ struct XMLNodeContents;
|
||||||
* <li> XMLNode::openFileHelper </li>
|
* <li> XMLNode::openFileHelper </li>
|
||||||
* <li> XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)</li>
|
* <li> XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)</li>
|
||||||
* </ul> */
|
* </ul> */
|
||||||
typedef struct XMLDLLENTRY XMLNode
|
typedef struct XMLDLLENTRY XMLNode {
|
||||||
{
|
private:
|
||||||
private:
|
|
||||||
|
|
||||||
struct XMLNodeDataTag;
|
struct XMLNodeDataTag;
|
||||||
|
|
||||||
|
@ -267,7 +268,7 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
/// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode
|
/// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode
|
||||||
XMLNode(struct XMLNodeDataTag *p);
|
XMLNode(struct XMLNodeDataTag *p);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static XMLCSTR getVersion();///< Return the XMLParser library version number
|
static XMLCSTR getVersion();///< Return the XMLParser library version number
|
||||||
|
|
||||||
/** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string.
|
/** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string.
|
||||||
|
@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
* @{ */
|
* @{ */
|
||||||
|
|
||||||
/// Parse an XML string and return the root of a XMLNode tree representing the string.
|
/// Parse an XML string and return the root of a XMLNode tree representing the string.
|
||||||
static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
|
static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL,
|
||||||
|
XMLResults *pResults = NULL);
|
||||||
/**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is
|
/**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is
|
||||||
* the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the
|
* the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the
|
||||||
* "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
* "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
||||||
|
@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/// Parse an XML file and return the root of a XMLNode tree representing the file.
|
/// Parse an XML file and return the root of a XMLNode tree representing the file.
|
||||||
static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL);
|
static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL,
|
||||||
|
XMLResults *pResults = NULL);
|
||||||
/**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is
|
/**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is
|
||||||
* the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the
|
* the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the
|
||||||
* "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
* "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error.
|
||||||
|
@ -301,7 +304,7 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made.
|
/// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made.
|
||||||
static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL);
|
static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL);
|
||||||
/**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file.
|
/**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file.
|
||||||
* This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each
|
* This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each
|
||||||
* application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files
|
* application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files
|
||||||
|
@ -322,7 +325,7 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error
|
static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error
|
||||||
|
|
||||||
/// Create an XML string starting from the current XMLNode.
|
/// Create an XML string starting from the current XMLNode.
|
||||||
XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const;
|
XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const;
|
||||||
/**< The returned string should be free'd using the "freeXMLString" function.
|
/**< The returned string should be free'd using the "freeXMLString" function.
|
||||||
*
|
*
|
||||||
* If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element
|
* If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element
|
||||||
|
@ -330,8 +333,8 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
|
|
||||||
/// Save the content of an xmlNode inside a file
|
/// Save the content of an xmlNode inside a file
|
||||||
XMLError writeToFile(XMLCSTR filename,
|
XMLError writeToFile(XMLCSTR filename,
|
||||||
const char *encoding=NULL,
|
const char *encoding = NULL,
|
||||||
char nFormat=1) const;
|
char nFormat = 1) const;
|
||||||
/**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns.
|
/**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns.
|
||||||
* If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8".
|
* If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8".
|
||||||
* If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS".
|
* If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS".
|
||||||
|
@ -349,14 +352,15 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
XMLNode getChildNode(int i=0) const; ///< return ith child node
|
XMLNode getChildNode(int i=0) const; ///< return ith child node
|
||||||
XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name.
|
XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name.
|
||||||
XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing)
|
XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing)
|
||||||
|
XMLNode* getChildNodePtr(XMLCSTR name, int *j) const;
|
||||||
XMLNode getChildNodeWithAttribute(XMLCSTR tagName,
|
XMLNode getChildNodeWithAttribute(XMLCSTR tagName,
|
||||||
XMLCSTR attributeName,
|
XMLCSTR attributeName,
|
||||||
XMLCSTR attributeValue=NULL,
|
XMLCSTR attributeValue=NULL,
|
||||||
int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing)
|
int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing)
|
||||||
XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
|
XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
|
||||||
///< return the first child node with specific path
|
///< return the first child node with specific path
|
||||||
XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
|
XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/');
|
||||||
///< return the first child node with specific path.
|
///< return the first child node with specific path.
|
||||||
|
|
||||||
int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name
|
int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name
|
||||||
int nChildNode() const; ///< nbr of child node
|
int nChildNode() const; ///< nbr of child node
|
||||||
|
@ -418,12 +422,12 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
*/
|
*/
|
||||||
XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name
|
XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name
|
||||||
XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
|
||||||
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
|
||||||
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
|
||||||
XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
|
XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
|
||||||
XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
|
XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
|
||||||
XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
|
||||||
XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
|
||||||
XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
@ -482,12 +486,12 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
|
|
||||||
XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name
|
XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name
|
||||||
XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added
|
||||||
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added
|
||||||
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
|
XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added
|
||||||
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
|
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added
|
||||||
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
|
XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added
|
||||||
XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added
|
||||||
XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear_WOSD(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added
|
||||||
XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
|
XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
|
@ -508,15 +512,14 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/// Enumeration for XML character encoding.
|
/// Enumeration for XML character encoding.
|
||||||
typedef enum XMLCharEncoding
|
typedef enum XMLCharEncoding {
|
||||||
{
|
char_encoding_error = 0,
|
||||||
char_encoding_error=0,
|
char_encoding_UTF8 = 1,
|
||||||
char_encoding_UTF8=1,
|
char_encoding_legacy = 2,
|
||||||
char_encoding_legacy=2,
|
char_encoding_ShiftJIS = 3,
|
||||||
char_encoding_ShiftJIS=3,
|
char_encoding_GB2312 = 4,
|
||||||
char_encoding_GB2312=4,
|
char_encoding_Big5 = 5,
|
||||||
char_encoding_Big5=5,
|
char_encoding_GBK = 6 // this is actually the same as Big5
|
||||||
char_encoding_GBK=6 // this is actually the same as Big5
|
|
||||||
} XMLCharEncoding;
|
} XMLCharEncoding;
|
||||||
|
|
||||||
/** \addtogroup conversions
|
/** \addtogroup conversions
|
||||||
|
@ -589,48 +592,46 @@ typedef struct XMLDLLENTRY XMLNode
|
||||||
* If an inconsistency in the encoding is detected, then the return value is "0". */
|
* If an inconsistency in the encoding is detected, then the return value is "0". */
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// these are functions and structures used internally by the XMLNode class (don't bother about them):
|
// these are functions and structures used internally by the XMLNode class (don't bother about them):
|
||||||
|
|
||||||
typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
|
typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete):
|
||||||
{
|
XMLCSTR lpszName; // Element name (=NULL if root)
|
||||||
XMLCSTR lpszName; // Element name (=NULL if root)
|
int nChild, // Number of child nodes
|
||||||
int nChild, // Number of child nodes
|
nText, // Number of text fields
|
||||||
nText, // Number of text fields
|
nClear, // Number of Clear fields (comments)
|
||||||
nClear, // Number of Clear fields (comments)
|
nAttribute; // Number of attributes
|
||||||
nAttribute; // Number of attributes
|
char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
|
||||||
char isDeclaration; // Whether node is an XML declaration - '<?xml ?>'
|
struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
|
||||||
struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root)
|
XMLNode *pChild; // Array of child nodes
|
||||||
XMLNode *pChild; // Array of child nodes
|
XMLCSTR *pText; // Array of text fields
|
||||||
XMLCSTR *pText; // Array of text fields
|
XMLClear *pClear; // Array of clear fields
|
||||||
XMLClear *pClear; // Array of clear fields
|
XMLAttribute *pAttribute; // Array of attributes
|
||||||
XMLAttribute *pAttribute; // Array of attributes
|
int *pOrder; // order of the child_nodes,text_fields,clear_fields
|
||||||
int *pOrder; // order of the child_nodes,text_fields,clear_fields
|
int ref_count; // for garbage collection (smart pointers)
|
||||||
int ref_count; // for garbage collection (smart pointers)
|
} XMLNodeData;
|
||||||
} XMLNodeData;
|
XMLNodeData *d;
|
||||||
XMLNodeData *d;
|
|
||||||
|
|
||||||
char parseClearTag(void *px, void *pa);
|
char parseClearTag(void *px, void *pa);
|
||||||
char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
|
char maybeAddTxT(void *pa, XMLCSTR tokenPStr);
|
||||||
int ParseXMLElement(void *pXML);
|
int ParseXMLElement(void *pXML);
|
||||||
void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
|
void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype);
|
||||||
int indexText(XMLCSTR lpszValue) const;
|
int indexText(XMLCSTR lpszValue) const;
|
||||||
int indexClear(XMLCSTR lpszValue) const;
|
int indexClear(XMLCSTR lpszValue) const;
|
||||||
XMLNode addChild_priv(int,XMLSTR,char,int);
|
XMLNode addChild_priv(int, XMLSTR, char, int);
|
||||||
XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR);
|
XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR);
|
||||||
XMLCSTR addText_priv(int,XMLSTR,int);
|
XMLCSTR addText_priv(int, XMLSTR, int);
|
||||||
XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int);
|
XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int);
|
||||||
void emptyTheNode(char force);
|
void emptyTheNode(char force);
|
||||||
static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
|
static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype);
|
||||||
static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
|
static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat);
|
||||||
static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
|
static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index);
|
||||||
static void exactMemory(XMLNodeData *d);
|
static void exactMemory(XMLNodeData *d);
|
||||||
static int detachFromParent(XMLNodeData *d);
|
static int detachFromParent(XMLNodeData *d);
|
||||||
} XMLNode;
|
} XMLNode;
|
||||||
|
|
||||||
/// This structure is given by the function XMLNode::enumContents.
|
/// This structure is given by the function XMLNode::enumContents.
|
||||||
typedef struct XMLNodeContents
|
typedef struct XMLNodeContents {
|
||||||
{
|
|
||||||
/// This dictates what's the content of the XMLNodeContent
|
/// This dictates what's the content of the XMLNodeContent
|
||||||
enum XMLElementType etype;
|
enum XMLElementType etype;
|
||||||
/**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */
|
/**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */
|
||||||
|
@ -664,12 +665,12 @@ XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);}
|
||||||
* delete them without any trouble.
|
* delete them without any trouble.
|
||||||
*
|
*
|
||||||
* @{ */
|
* @{ */
|
||||||
XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0);
|
XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue=0);
|
||||||
XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0);
|
XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue=0);
|
||||||
XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0);
|
XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue=0);
|
||||||
XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0);
|
XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue=.0);
|
||||||
XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML(""));
|
XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue=_CXML(""));
|
||||||
XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
|
XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, XMLCHAR defautValue=_CXML('\0'));
|
||||||
/** @} */
|
/** @} */
|
||||||
|
|
||||||
/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions.
|
/** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions.
|
||||||
|
@ -685,10 +686,9 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0'));
|
||||||
* \note If you are creating from scratch an XML file using the provided XMLNode class
|
* \note If you are creating from scratch an XML file using the provided XMLNode class
|
||||||
* you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the
|
* you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the
|
||||||
* processing job for you during rendering).*/
|
* processing job for you during rendering).*/
|
||||||
typedef struct XMLDLLENTRY ToXMLStringTool
|
typedef struct XMLDLLENTRY ToXMLStringTool {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
ToXMLStringTool(): buf(NULL),buflen(0){}
|
ToXMLStringTool(): buf(NULL), buflen(0){}
|
||||||
~ToXMLStringTool();
|
~ToXMLStringTool();
|
||||||
void freeBuffer();///<call this function when you have finished using this object to release memory used by the internal buffer.
|
void freeBuffer();///<call this function when you have finished using this object to release memory used by the internal buffer.
|
||||||
|
|
||||||
|
@ -718,10 +718,9 @@ private:
|
||||||
* b64-encoded text included inside the XML file, use "decode". Alternatively, these
|
* b64-encoded text included inside the XML file, use "decode". Alternatively, these
|
||||||
* functions can also be used to "encrypt/decrypt" some critical data contained inside
|
* functions can also be used to "encrypt/decrypt" some critical data contained inside
|
||||||
* the XML (it's not a strong encryption at all, but sometimes it can be useful). */
|
* the XML (it's not a strong encryption at all, but sometimes it can be useful). */
|
||||||
typedef struct XMLDLLENTRY XMLParserBase64Tool
|
typedef struct XMLDLLENTRY XMLParserBase64Tool {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
XMLParserBase64Tool(): buf(NULL),buflen(0){}
|
XMLParserBase64Tool(): buf(NULL), buflen(0){}
|
||||||
~XMLParserBase64Tool();
|
~XMLParserBase64Tool();
|
||||||
void freeBuffer();///< Call this function when you have finished using this object to release memory used by the internal buffer.
|
void freeBuffer();///< Call this function when you have finished using this object to release memory used by the internal buffer.
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue