gem5/ext/mcpat/cacti/io.cc
Yasuko Eckert 0deef376d9 ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes
Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known
issues/concernts we did not have a chance to address in this patch.

High-level changes in this patch include:
 1) Making XML parsing modular and hierarchical:
   - Shift parsing responsibility into the components
   - Read XML in a (mostly) context-free recursive manner so that McPAT input
     files can contain arbitrary component hierarchies
 2) Making power, energy, and area calculations a hierarchical and recursive
    process
   - Components track their subcomponents and recursively call compute
     functions in stages
   - Make C++ object hierarchy reflect inheritance of classes of components
     with similar structures
   - Simplify computeArea() and computeEnergy() functions to eliminate
     successive calls to calculate separate TDP vs. runtime energy
   - Remove Processor component (now unnecessary) and introduce a more abstract
     System component
 3) Standardizing McPAT output across all components
   - Use a single, common data structure for storing and printing McPAT output
   - Recursively call print functions through component hierarchy
 4) For caches, allow splitting data array and tag array reads and writes for
    better accuracy
 5) Improving the usability of CACTI by printing more helpful warning and error
    messages
 6) Minor: Impose more rigorous code style for clarity (more work still to be
    done)
Overall, these changes greatly reduce the amount of replicated code, and they
improve McPAT runtime and decrease memory footprint.
2014-06-03 13:32:59 -07:00

2008 lines
78 KiB
C++

/*****************************************************************************
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <fstream>
#include <iostream>
#include <sstream>
#include "Ucache.h"
#include "arbiter.h"
#include "area.h"
#include "basic_circuit.h"
#include "crossbar.h"
#include "io.h"
#include "nuca.h"
#include "parameter.h"
//#include "highradix.h"
using namespace std;
/* Parses "cache.cfg" file */
void
InputParameter::parse_cfg(const string & in_file) {
FILE *fp = fopen(in_file.c_str(), "r");
char line[5000];
char jk[5000];
char temp_var[5000];
if (!fp) {
cout << in_file << " is missing!\n";
exit(-1);
}
while (fscanf(fp, "%[^\n]\n", line) != EOF) {
if (!strncmp("-size", line, strlen("-size"))) {
sscanf(line, "-size %[(:-~)*]%u", jk, &(cache_sz));
continue;
}
if (!strncmp("-page size", line, strlen("-page size"))) {
sscanf(line, "-page size %[(:-~)*]%u", jk, &(page_sz_bits));
continue;
}
if (!strncmp("-burst length", line, strlen("-burst length"))) {
sscanf(line, "-burst %[(:-~)*]%u", jk, &(burst_len));
continue;
}
if (!strncmp("-internal prefetch width", line, strlen("-internal prefetch width"))) {
sscanf(line, "-internal prefetch %[(:-~)*]%u", jk, &(int_prefetch_w));
continue;
}
if (!strncmp("-block", line, strlen("-block"))) {
sscanf(line, "-block size (bytes) %d", &(line_sz));
continue;
}
if (!strncmp("-associativity", line, strlen("-associativity"))) {
sscanf(line, "-associativity %d", &(assoc));
continue;
}
if (!strncmp("-read-write", line, strlen("-read-write"))) {
sscanf(line, "-read-write port %d", &(num_rw_ports));
continue;
}
if (!strncmp("-exclusive read", line, strlen("exclusive read"))) {
sscanf(line, "-exclusive read port %d", &(num_rd_ports));
continue;
}
if (!strncmp("-exclusive write", line, strlen("-exclusive write"))) {
sscanf(line, "-exclusive write port %d", &(num_wr_ports));
continue;
}
if (!strncmp("-single ended", line, strlen("-single ended"))) {
sscanf(line, "-single %[(:-~)*]%d", jk,
&(num_se_rd_ports));
continue;
}
if (!strncmp("-search", line, strlen("-search"))) {
sscanf(line, "-search port %d", &(num_search_ports));
continue;
}
if (!strncmp("-UCA bank", line, strlen("-UCA bank"))) {
sscanf(line, "-UCA bank%[((:-~)| )*]%d", jk, &(nbanks));
continue;
}
if (!strncmp("-technology", line, strlen("-technology"))) {
sscanf(line, "-technology (u) %lf", &(F_sz_um));
F_sz_nm = F_sz_um * 1000;
continue;
}
if (!strncmp("-output/input", line, strlen("-output/input"))) {
sscanf(line, "-output/input bus %[(:-~)*]%d", jk, &(out_w));
continue;
}
if (!strncmp("-operating temperature", line, strlen("-operating temperature"))) {
sscanf(line, "-operating temperature %[(:-~)*]%d", jk, &(temp));
continue;
}
if (!strncmp("-cache type", line, strlen("-cache type"))) {
sscanf(line, "-cache type%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("cache", temp_var, sizeof("cache"))) {
is_cache = true;
} else {
is_cache = false;
}
if (!strncmp("main memory", temp_var, sizeof("main memory"))) {
is_main_mem = true;
} else {
is_main_mem = false;
}
if (!strncmp("cam", temp_var, sizeof("cam"))) {
pure_cam = true;
} else {
pure_cam = false;
}
if (!strncmp("ram", temp_var, sizeof("ram"))) {
pure_ram = true;
} else {
if (!is_main_mem)
pure_ram = false;
else
pure_ram = true;
}
continue;
}
if (!strncmp("-tag size", line, strlen("-tag size"))) {
sscanf(line, "-tag size%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("default", temp_var, sizeof("default"))) {
specific_tag = false;
tag_w = 42; /* the acutal value is calculated
* later based on the cache size, bank count, and associativity
*/
} else {
specific_tag = true;
sscanf(line, "-tag size (b) %d", &(tag_w));
}
continue;
}
if (!strncmp("-access mode", line, strlen("-access mode"))) {
sscanf(line, "-access %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("fast", temp_var, strlen("fast"))) {
access_mode = 2;
} else if (!strncmp("sequential", temp_var, strlen("sequential"))) {
access_mode = 1;
} else if (!strncmp("normal", temp_var, strlen("normal"))) {
access_mode = 0;
} else {
cout << "ERROR: Invalid access mode!\n";
exit(0);
}
continue;
}
if (!strncmp("-Data array cell type", line,
strlen("-Data array cell type"))) {
sscanf(line, "-Data array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
data_arr_ram_cell_tech_type = 0;
} else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
data_arr_ram_cell_tech_type = 1;
} else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
data_arr_ram_cell_tech_type = 2;
} else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
data_arr_ram_cell_tech_type = 3;
} else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
data_arr_ram_cell_tech_type = 4;
} else {
cout << "ERROR: Invalid type!\n";
exit(0);
}
continue;
}
if (!strncmp("-Data array peripheral type", line, strlen("-Data array peripheral type"))) {
sscanf(line, "-Data array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
data_arr_peri_global_tech_type = 0;
} else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
data_arr_peri_global_tech_type = 1;
} else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
data_arr_peri_global_tech_type = 2;
} else {
cout << "ERROR: Invalid type!\n";
exit(0);
}
continue;
}
if (!strncmp("-Tag array cell type", line, strlen("-Tag array cell type"))) {
sscanf(line, "-Tag array cell type %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
tag_arr_ram_cell_tech_type = 0;
} else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
tag_arr_ram_cell_tech_type = 1;
} else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
tag_arr_ram_cell_tech_type = 2;
} else if (!strncmp("lp-dram", temp_var, strlen("lp-dram"))) {
tag_arr_ram_cell_tech_type = 3;
} else if (!strncmp("comm-dram", temp_var, strlen("comm-dram"))) {
tag_arr_ram_cell_tech_type = 4;
} else {
cout << "ERROR: Invalid type!\n";
exit(0);
}
continue;
}
if (!strncmp("-Tag array peripheral type", line, strlen("-Tag array peripheral type"))) {
sscanf(line, "-Tag array peripheral type %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("itrs-hp", temp_var, strlen("itrs-hp"))) {
tag_arr_peri_global_tech_type = 0;
} else if (!strncmp("itrs-lstp", temp_var, strlen("itrs-lstp"))) {
tag_arr_peri_global_tech_type = 1;
} else if (!strncmp("itrs-lop", temp_var, strlen("itrs-lop"))) {
tag_arr_peri_global_tech_type = 2;
} else {
cout << "ERROR: Invalid type!\n";
exit(0);
}
continue;
}
if (!strncmp("-design", line, strlen("-design"))) {
sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
&(delay_wt), &(dynamic_power_wt),
&(leakage_power_wt),
&(cycle_time_wt), &(area_wt));
continue;
}
if (!strncmp("-deviate", line, strlen("-deviate"))) {
sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
&(delay_dev), &(dynamic_power_dev),
&(leakage_power_dev),
&(cycle_time_dev), &(area_dev));
continue;
}
if (!strncmp("-Optimize", line, strlen("-Optimize"))) {
sscanf(line, "-Optimize %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("ED^2", temp_var, strlen("ED^2"))) {
ed = 2;
} else if (!strncmp("ED", temp_var, strlen("ED"))) {
ed = 1;
} else {
ed = 0;
}
}
if (!strncmp("-NUCAdesign", line, strlen("-NUCAdesign"))) {
sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
&(delay_wt_nuca), &(dynamic_power_wt_nuca),
&(leakage_power_wt_nuca),
&(cycle_time_wt_nuca), &(area_wt_nuca));
continue;
}
if (!strncmp("-NUCAdeviate", line, strlen("-NUCAdeviate"))) {
sscanf(line, "-%[((:-~)| |,)*]%d:%d:%d:%d:%d", jk,
&(delay_dev_nuca), &(dynamic_power_dev_nuca),
&(leakage_power_dev_nuca),
&(cycle_time_dev_nuca), &(area_dev_nuca));
continue;
}
if (!strncmp("-Cache model", line, strlen("-cache model"))) {
sscanf(line, "-Cache model %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("UCA", temp_var, strlen("UCA"))) {
nuca = 0;
} else {
nuca = 1;
}
continue;
}
if (!strncmp("-NUCA bank", line, strlen("-NUCA bank"))) {
sscanf(line, "-NUCA bank count %d", &(nuca_bank_count));
if (nuca_bank_count != 0) {
force_nuca_bank = 1;
}
continue;
}
if (!strncmp("-Wire inside mat", line, strlen("-Wire inside mat"))) {
sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("global", temp_var, strlen("global"))) {
wire_is_mat_type = 2;
continue;
} else if (!strncmp("local", temp_var, strlen("local"))) {
wire_is_mat_type = 0;
continue;
} else {
wire_is_mat_type = 1;
continue;
}
}
if (!strncmp("-Wire outside mat", line, strlen("-Wire outside mat"))) {
sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("global", temp_var, strlen("global"))) {
wire_os_mat_type = 2;
} else {
wire_os_mat_type = 1;
}
continue;
}
if (!strncmp("-Interconnect projection", line, strlen("-Interconnect projection"))) {
sscanf(line, "-Interconnect projection%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("aggressive", temp_var, strlen("aggressive"))) {
ic_proj_type = 0;
} else {
ic_proj_type = 1;
}
continue;
}
if (!strncmp("-Wire signalling", line, strlen("-wire signalling"))) {
sscanf(line, "-Wire%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("default", temp_var, strlen("default"))) {
force_wiretype = 0;
wt = Global;
} else if (!(strncmp("Global_10", temp_var, strlen("Global_10")))) {
force_wiretype = 1;
wt = Global_10;
} else if (!(strncmp("Global_20", temp_var, strlen("Global_20")))) {
force_wiretype = 1;
wt = Global_20;
} else if (!(strncmp("Global_30", temp_var, strlen("Global_30")))) {
force_wiretype = 1;
wt = Global_30;
} else if (!(strncmp("Global_5", temp_var, strlen("Global_5")))) {
force_wiretype = 1;
wt = Global_5;
} else if (!(strncmp("Global", temp_var, strlen("Global")))) {
force_wiretype = 1;
wt = Global;
} else {
wt = Low_swing;
force_wiretype = 1;
}
continue;
}
if (!strncmp("-Core", line, strlen("-Core"))) {
sscanf(line, "-Core count %d\n", &(cores));
if (cores > 16) {
printf("No. of cores should be less than 16!\n");
}
continue;
}
if (!strncmp("-Cache level", line, strlen("-Cache level"))) {
sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("L2", temp_var, strlen("L2"))) {
cache_level = 0;
} else {
cache_level = 1;
}
}
if (!strncmp("-Print level", line, strlen("-Print level"))) {
sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) {
print_detail = 1;
} else {
print_detail = 0;
}
}
if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) {
sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("true", temp_var, strlen("true"))) {
add_ecc_b_ = true;
} else {
add_ecc_b_ = false;
}
}
if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) {
sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("true", temp_var, strlen("true"))) {
print_input_args = true;
} else {
print_input_args = false;
}
}
if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) {
sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var);
if (!strncmp("true", temp_var, strlen("true"))) {
force_cache_config = true;
} else {
force_cache_config = false;
}
}
if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) {
sscanf(line, "-Ndbl %d\n", &(ndbl));
continue;
}
if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) {
sscanf(line, "-Ndwl %d\n", &(ndwl));
continue;
}
if (!strncmp("-Nspd", line, strlen("-Nspd"))) {
sscanf(line, "-Nspd %d\n", &(nspd));
continue;
}
if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) {
sscanf(line, "-Ndsam1 %d\n", &(ndsam1));
continue;
}
if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) {
sscanf(line, "-Ndsam2 %d\n", &(ndsam2));
continue;
}
if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) {
sscanf(line, "-Ndcm %d\n", &(ndcm));
continue;
}
}
rpters_in_htree = true;
fclose(fp);
}
void
InputParameter::display_ip() {
cout << "Cache size : " << cache_sz << endl;
cout << "Block size : " << line_sz << endl;
cout << "Associativity : " << assoc << endl;
cout << "Read only ports : " << num_rd_ports << endl;
cout << "Write only ports : " << num_wr_ports << endl;
cout << "Read write ports : " << num_rw_ports << endl;
cout << "Single ended read ports : " << num_se_rd_ports << endl;
if (fully_assoc || pure_cam) {
cout << "Search ports : " << num_search_ports << endl;
}
cout << "Cache banks (UCA) : " << nbanks << endl;
cout << "Technology : " << F_sz_um << endl;
cout << "Temperature : " << temp << endl;
cout << "Tag size : " << tag_w << endl;
if (is_cache) {
cout << "array type : " << "Cache" << endl;
}
if (pure_ram) {
cout << "array type : " << "Scratch RAM" << endl;
}
if (pure_cam) {
cout << "array type : " << "CAM" << endl;
}
cout << "Model as memory : " << is_main_mem << endl;
cout << "Access mode : " << access_mode << endl;
cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl;
cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl;
cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl;
cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl;
cout << "Optimization target : " << ed << endl;
cout << "Design objective (UCA wt) : " << delay_wt << " "
<< dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt
<< " " << area_wt << endl;
cout << "Design objective (UCA dev) : " << delay_dev << " "
<< dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev
<< " " << area_dev << endl;
if (nuca) {
cout << "Cores : " << cores << endl;
cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " "
<< dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca
<< " " << area_wt_nuca << endl;
cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " "
<< dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca
<< " " << area_dev_nuca << endl;
}
cout << "Cache model : " << nuca << endl;
cout << "Nuca bank : " << nuca_bank_count << endl;
cout << "Wire inside mat : " << wire_is_mat_type << endl;
cout << "Wire outside mat : " << wire_os_mat_type << endl;
cout << "Interconnect projection : " << ic_proj_type << endl;
cout << "Wire signalling : " << force_wiretype << endl;
cout << "Print level : " << print_detail << endl;
cout << "ECC overhead : " << add_ecc_b_ << endl;
cout << "Page size : " << page_sz_bits << endl;
cout << "Burst length : " << burst_len << endl;
cout << "Internal prefetch width : " << int_prefetch_w << endl;
cout << "Force cache config : " << g_ip->force_cache_config << endl;
if (g_ip->force_cache_config) {
cout << "Ndwl : " << g_ip->ndwl << endl;
cout << "Ndbl : " << g_ip->ndbl << endl;
cout << "Nspd : " << g_ip->nspd << endl;
cout << "Ndcm : " << g_ip->ndcm << endl;
cout << "Ndsam1 : " << g_ip->ndsam1 << endl;
cout << "Ndsam2 : " << g_ip->ndsam2 << endl;
}
}
powerComponents operator+(const powerComponents & x, const powerComponents & y) {
powerComponents z;
z.dynamic = x.dynamic + y.dynamic;
z.leakage = x.leakage + y.leakage;
z.gate_leakage = x.gate_leakage + y.gate_leakage;
z.short_circuit = x.short_circuit + y.short_circuit;
z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage;
return z;
}
powerComponents operator*(const powerComponents & x, double const * const y) {
powerComponents z;
z.dynamic = x.dynamic * y[0];
z.leakage = x.leakage * y[1];
z.gate_leakage = x.gate_leakage * y[2];
z.short_circuit = x.short_circuit * y[3];
//longer channel leakage has the same behavior as normal leakage
z.longer_channel_leakage = x.longer_channel_leakage * y[1];
return z;
}
powerDef operator+(const powerDef & x, const powerDef & y) {
powerDef z;
z.readOp = x.readOp + y.readOp;
z.writeOp = x.writeOp + y.writeOp;
z.searchOp = x.searchOp + y.searchOp;
return z;
}
powerDef operator*(const powerDef & x, double const * const y) {
powerDef z;
z.readOp = x.readOp * y;
z.writeOp = x.writeOp * y;
z.searchOp = x.searchOp * y;
return z;
}
uca_org_t cacti_interface(const string & infile_name) {
uca_org_t fin_res;
//uca_org_t result;
fin_res.valid = false;
g_ip = new InputParameter();
g_ip->parse_cfg(infile_name);
if (!g_ip->error_checking(infile_name))
exit(0);
if (g_ip->print_input_args)
g_ip->display_ip();
init_tech_params(g_ip->F_sz_um, false);
Wire winit; // Do not delete this line. It initializes wires.
// For HighRadix Only
// //// Wire wirea(g_ip->wt, 1000);
// //// wirea.print_wire();
// //// cout << "Wire Area " << wirea.area.get_area() << " sq. u" << endl;
// // winit.print_wire();
// //
// HighRadix *hr;
// hr = new HighRadix();
// hr->compute_power();
// hr->print_router();
// exit(0);
//
// double sub_switch_sz = 2;
// double rows = 32;
// for (int i=0; i<6; i++) {
// sub_switch_sz = pow(2, i);
// rows = 64/sub_switch_sz;
// hr = new HighRadix(sub_switch_sz, rows, .8/* freq */, 64, 2, 64, 0.7);
// hr->compute_power();
// hr->print_router();
// delete hr;
// }
// // HighRadix yarc;
// // yarc.compute_power();
// // yarc.print_router();
// winit.print_wire();
// exit(0);
// For HighRadix Only End
if (g_ip->nuca == 1) {
Nuca n(&g_tp.peri_global);
n.sim_nuca();
}
g_ip->display_ip();
solve(&fin_res);
output_UCA(&fin_res);
output_data_csv(fin_res);
delete (g_ip);
return fin_res;
}
//cacti6.5's plain interface, please keep !!!
uca_org_t cacti_interface(
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,
int excl_write_ports,
int single_ended_read_ports,
int banks,
double tech_node, // in nm
int page_sz,
int burst_length,
int pre_width,
int output_width,
int specific_tag,
int tag_width,
int access_mode, //0 normal, 1 seq, 2 fast
int cache, //scratch ram or cache
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_area,
int obj_func_cycle_time,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area,
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in, // 0-4
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
int wire_inside_mat_type_in,
int wire_outside_mat_type_in,
int is_nuca, // 0 - UCA, 1 - NUCA
int core_count,
int cache_level, // 0 - L2, 1 - L3
int nuca_bank_count,
int nuca_obj_func_delay,
int nuca_obj_func_dynamic_power,
int nuca_obj_func_leakage_power,
int nuca_obj_func_area,
int nuca_obj_func_cycle_time,
int nuca_dev_func_delay,
int nuca_dev_func_dynamic_power,
int nuca_dev_func_leakage_power,
int nuca_dev_func_area,
int nuca_dev_func_cycle_time,
int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
int p_input) {
g_ip = new InputParameter();
g_ip->add_ecc_b_ = true;
g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
g_ip->ic_proj_type = interconnect_projection_type_in;
g_ip->wire_is_mat_type = wire_inside_mat_type_in;
g_ip->wire_os_mat_type = wire_outside_mat_type_in;
g_ip->burst_len = burst_length;
g_ip->int_prefetch_w = pre_width;
g_ip->page_sz_bits = page_sz;
g_ip->cache_sz = cache_size;
g_ip->line_sz = line_size;
g_ip->assoc = associativity;
g_ip->nbanks = banks;
g_ip->out_w = output_width;
g_ip->specific_tag = specific_tag;
if (tag_width == 0) {
g_ip->tag_w = 42;
} else {
g_ip->tag_w = tag_width;
}
g_ip->access_mode = access_mode;
g_ip->delay_wt = obj_func_delay;
g_ip->dynamic_power_wt = obj_func_dynamic_power;
g_ip->leakage_power_wt = obj_func_leakage_power;
g_ip->area_wt = obj_func_area;
g_ip->cycle_time_wt = obj_func_cycle_time;
g_ip->delay_dev = dev_func_delay;
g_ip->dynamic_power_dev = dev_func_dynamic_power;
g_ip->leakage_power_dev = dev_func_leakage_power;
g_ip->area_dev = dev_func_area;
g_ip->cycle_time_dev = dev_func_cycle_time;
g_ip->ed = ed_ed2_none;
switch (wt) {
case (0):
g_ip->force_wiretype = 0;
g_ip->wt = Global;
break;
case (1):
g_ip->force_wiretype = 1;
g_ip->wt = Global;
break;
case (2):
g_ip->force_wiretype = 1;
g_ip->wt = Global_5;
break;
case (3):
g_ip->force_wiretype = 1;
g_ip->wt = Global_10;
break;
case (4):
g_ip->force_wiretype = 1;
g_ip->wt = Global_20;
break;
case (5):
g_ip->force_wiretype = 1;
g_ip->wt = Global_30;
break;
case (6):
g_ip->force_wiretype = 1;
g_ip->wt = Low_swing;
break;
default:
cout << "Unknown wire type!\n";
exit(0);
}
g_ip->delay_wt_nuca = nuca_obj_func_delay;
g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power;
g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power;
g_ip->area_wt_nuca = nuca_obj_func_area;
g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time;
g_ip->delay_dev_nuca = dev_func_delay;
g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power;
g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power;
g_ip->area_dev_nuca = nuca_dev_func_area;
g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time;
g_ip->nuca = is_nuca;
g_ip->nuca_bank_count = nuca_bank_count;
if (nuca_bank_count > 0) {
g_ip->force_nuca_bank = 1;
}
g_ip->cores = core_count;
g_ip->cache_level = cache_level;
g_ip->temp = temp;
g_ip->F_sz_nm = tech_node;
g_ip->F_sz_um = tech_node / 1000;
g_ip->is_main_mem = (main_mem != 0) ? true : false;
g_ip->is_cache = (cache != 0) ? true : false;
g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
g_ip->num_rw_ports = rw_ports;
g_ip->num_rd_ports = excl_read_ports;
g_ip->num_wr_ports = excl_write_ports;
g_ip->num_se_rd_ports = single_ended_read_ports;
g_ip->print_detail = 1;
g_ip->nuca = 0;
g_ip->wt = Global_5;
g_ip->force_cache_config = false;
g_ip->force_wiretype = false;
g_ip->print_input_args = p_input;
uca_org_t fin_res;
fin_res.valid = false;
if (g_ip->error_checking() == false) exit(0);
if (g_ip->print_input_args)
g_ip->display_ip();
init_tech_params(g_ip->F_sz_um, false);
Wire winit; // Do not delete this line. It initializes wires.
if (g_ip->nuca == 1) {
Nuca n(&g_tp.peri_global);
n.sim_nuca();
}
solve(&fin_res);
output_UCA(&fin_res);
delete (g_ip);
return fin_res;
}
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(
int cache_size,
int line_size,
int associativity,
int rw_ports,
int excl_read_ports,// para5
int excl_write_ports,
int single_ended_read_ports,
int search_ports,
int banks,
double tech_node,//para10
int output_width,
int specific_tag,
int tag_width,
int access_mode,
int cache, //para15
int main_mem,
int obj_func_delay,
int obj_func_dynamic_power,
int obj_func_leakage_power,
int obj_func_cycle_time, //para20
int obj_func_area,
int dev_func_delay,
int dev_func_dynamic_power,
int dev_func_leakage_power,
int dev_func_area, //para25
int dev_func_cycle_time,
int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
int temp,
int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
int data_arr_ram_cell_tech_flavor_in,//para30
int data_arr_peri_global_tech_flavor_in,
int tag_arr_ram_cell_tech_flavor_in,
int tag_arr_peri_global_tech_flavor_in,
int interconnect_projection_type_in,
int wire_inside_mat_type_in,//para35
int wire_outside_mat_type_in,
int REPEATERS_IN_HTREE_SEGMENTS_in,
int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
int PAGE_SIZE_BITS_in,//para40
int BURST_LENGTH_in,
int INTERNAL_PREFETCH_WIDTH_in,
int force_wiretype,
int wiretype,
int force_config,//para45
int ndwl,
int ndbl,
int nspd,
int ndcm,
int ndsam1,//para50
int ndsam2,
int ecc) {
g_ip = new InputParameter();
uca_org_t fin_res;
fin_res.valid = false;
g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in;
g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in;
g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in;
g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in;
g_ip->ic_proj_type = interconnect_projection_type_in;
g_ip->wire_is_mat_type = wire_inside_mat_type_in;
g_ip->wire_os_mat_type = wire_outside_mat_type_in;
g_ip->burst_len = BURST_LENGTH_in;
g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in;
g_ip->page_sz_bits = PAGE_SIZE_BITS_in;
g_ip->cache_sz = cache_size;
g_ip->line_sz = line_size;
g_ip->assoc = associativity;
g_ip->nbanks = banks;
g_ip->out_w = output_width;
g_ip->specific_tag = specific_tag;
if (specific_tag == 0) {
g_ip->tag_w = 42;
} else {
g_ip->tag_w = tag_width;
}
g_ip->access_mode = access_mode;
g_ip->delay_wt = obj_func_delay;
g_ip->dynamic_power_wt = obj_func_dynamic_power;
g_ip->leakage_power_wt = obj_func_leakage_power;
g_ip->area_wt = obj_func_area;
g_ip->cycle_time_wt = obj_func_cycle_time;
g_ip->delay_dev = dev_func_delay;
g_ip->dynamic_power_dev = dev_func_dynamic_power;
g_ip->leakage_power_dev = dev_func_leakage_power;
g_ip->area_dev = dev_func_area;
g_ip->cycle_time_dev = dev_func_cycle_time;
g_ip->temp = temp;
g_ip->ed = ed_ed2_none;
g_ip->F_sz_nm = tech_node;
g_ip->F_sz_um = tech_node / 1000;
g_ip->is_main_mem = (main_mem != 0) ? true : false;
g_ip->is_cache = (cache == 1) ? true : false;
g_ip->pure_ram = (cache == 0) ? true : false;
g_ip->pure_cam = (cache == 2) ? true : false;
g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false;
g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in;
g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in;
g_ip->num_rw_ports = rw_ports;
g_ip->num_rd_ports = excl_read_ports;
g_ip->num_wr_ports = excl_write_ports;
g_ip->num_se_rd_ports = single_ended_read_ports;
g_ip->num_search_ports = search_ports;
g_ip->print_detail = 1;
g_ip->nuca = 0;
if (force_wiretype == 0) {
g_ip->wt = Global;
g_ip->force_wiretype = false;
} else {
g_ip->force_wiretype = true;
if (wiretype == 10) {
g_ip->wt = Global_10;
}
if (wiretype == 20) {
g_ip->wt = Global_20;
}
if (wiretype == 30) {
g_ip->wt = Global_30;
}
if (wiretype == 5) {
g_ip->wt = Global_5;
}
if (wiretype == 0) {
g_ip->wt = Low_swing;
}
}
//g_ip->wt = Global_5;
if (force_config == 0) {
g_ip->force_cache_config = false;
} else {
g_ip->force_cache_config = true;
g_ip->ndbl = ndbl;
g_ip->ndwl = ndwl;
g_ip->nspd = nspd;
g_ip->ndcm = ndcm;
g_ip->ndsam1 = ndsam1;
g_ip->ndsam2 = ndsam2;
}
if (ecc == 0) {
g_ip->add_ecc_b_ = false;
} else {
g_ip->add_ecc_b_ = true;
}
if (!g_ip->error_checking())
exit(0);
init_tech_params(g_ip->F_sz_um, false);
Wire winit; // Do not delete this line. It initializes wires.
g_ip->display_ip();
solve(&fin_res);
output_UCA(&fin_res);
output_data_csv(fin_res);
delete (g_ip);
return fin_res;
}
bool InputParameter::error_checking(string name) {
int A;
bool seq_access = false;
fast_access = true;
switch (access_mode) {
case 0:
seq_access = false;
fast_access = false;
break;
case 1:
seq_access = true;
fast_access = false;
break;
case 2:
seq_access = false;
fast_access = true;
break;
}
if (is_main_mem) {
if (ic_proj_type == 0) {
cerr << name
<< ": DRAM model supports only conservative interconnect "
<< "projection but is set to aggressive!\n\n";
return false;
}
}
uint32_t B = line_sz;
if (B < 1) {
cerr << name << ": Block size must be >= 1, but is set to " << B
<< endl;
return false;
} else if (B*8 < out_w) {
cerr << name << ": Block size must be at least " << out_w / 8
<< ", but is set to " << B << endl;
return false;
}
if (F_sz_um <= 0) {
cerr << name << ": Feature size must be > 0, but is set to "
<< F_sz_um << endl;
return false;
} else if (F_sz_um > 0.091) {
cerr << name << ": Feature size must be <= 90 nm, but is set to "
<< F_sz_um << endl;
return false;
}
uint32_t RWP = num_rw_ports;
uint32_t ERP = num_rd_ports;
uint32_t EWP = num_wr_ports;
uint32_t NSER = num_se_rd_ports;
uint32_t SCHP = num_search_ports;
//TODO: revisit this. This is an important feature. Sheng thought this should be used
// // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to
// // the number of banks, we assume that the multiple ports are implemented via the multiple banks.
// // In such a case we assume that each bank has 1 RWP port.
// if ((RWP + ERP + EWP) <= nbanks && nbanks>1)
// {
// RWP = 1;
// ERP = 0;
// EWP = 0;
// NSER = 0;
// }
// else if ((RWP < 0) || (EWP < 0) || (ERP < 0))
// {
// cerr << "Ports must >=0" << endl;
// return false;
// }
// else if (RWP > 2)
// {
// cerr << "Maximum of 2 read/write ports" << endl;
// return false;
// }
// else if ((RWP+ERP+EWP) < 1)
// Changed to new implementation:
// The number of ports specified at input is per bank
if ((RWP + ERP + EWP) < 1) {
cerr << name << ": Must have at least one port" << endl;
return false;
}
if (is_pow2(nbanks) == false) {
cerr << name << ": Number of subbanks should be greater than or "
<< "equal to 1 and should be a power of 2, but is set to "
<< nbanks << endl;
return false;
}
int C = cache_sz / nbanks;
if (C < 64) {
cerr << name << ": Cache size must be >=64, but is set to " << C
<< endl;
return false;
}
//TODO: revisit this
// if (pure_ram==true && assoc!=1)
// {
// cerr << "Pure RAM must have assoc as 1" << endl;
// return false;
// }
//fully assoc and cam check
if (is_cache && assoc == 0)
fully_assoc = true;
else
fully_assoc = false;
if (pure_cam == true && assoc != 0) {
cerr << name
<< ": Pure CAM must have associativity as 0, but is set to"
<< assoc << endl;
return false;
}
if (assoc == 0 && (pure_cam == false && is_cache == false)) {
cerr << name
<< ": Only CAM or Fully associative cache can have associativity "
<< "as 0" << endl;
return false;
}
if ((fully_assoc == true || pure_cam == true)
&& (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type
|| data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) {
cerr << name
<< ": CAM and fully associative cache must have same device type "
<< "for both data and tag array" << endl;
cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type
<< ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl
<< "\tData array peripheral = " << data_arr_peri_global_tech_type
<< ", Tag array peripheral = " << tag_arr_peri_global_tech_type
<< endl;
return false;
}
if ((fully_assoc == true || pure_cam == true)
&& (data_arr_ram_cell_tech_type == lp_dram ||
data_arr_ram_cell_tech_type == comm_dram)) {
cerr << name << ": DRAM based CAM and fully associative cache are not "
<< "supported" << endl;
return false;
}
if ((fully_assoc == true || pure_cam == true)
&& (is_main_mem == true)) {
cerr << name
<< ": CAM and fully associative cache cannot be as main memory"
<< endl;
return false;
}
if ((fully_assoc || pure_cam) && SCHP < 1) {
cerr << name
<< ": CAM and fully associative must have at least 1 search port,"
<< " but are set to " << SCHP << endl;
return false;
}
if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) {
ERP = SCHP;
}
// if ((!(fully_assoc || pure_cam)) && SCHP>=1)
// {
// cerr << "None CAM and fully associative cannot have search ports" << endl;
// return false;
// }
if (assoc == 0) {
A = C / B;
//fully_assoc = true;
} else {
if (assoc == 1) {
A = 1;
//fully_assoc = false;
} else {
//fully_assoc = false;
A = assoc;
if (is_pow2(A) == false) {
cerr << name
<< ": Associativity must be a power of 2, but is set to "
<< A << endl;
return false;
}
}
}
if (C / (B*A) <= 1 && assoc != 0) {
cerr << name << ": Number of sets (" << (C / (B * A))
<< ") is too small: " << endl;
cerr << " Need to either increase cache size, or decrease "
<< "associativity or block size" << endl;
cerr << " (or use fully associative cache)" << endl;
return false;
}
block_sz = B;
/*dt: testing sequential access mode*/
if (seq_access) {
tag_assoc = A;
data_assoc = 1;
is_seq_acc = true;
} else {
tag_assoc = A;
data_assoc = A;
is_seq_acc = false;
}
if (assoc == 0) {
data_assoc = 1;
}
num_rw_ports = RWP;
num_rd_ports = ERP;
num_wr_ports = EWP;
num_se_rd_ports = NSER;
if (!(fully_assoc || pure_cam))
num_search_ports = 0;
nsets = C / (B * A);
if (temp < 300 || temp > 400 || temp % 10 != 0) {
cerr << name << ": " << temp
<< " Temperature must be between 300 and 400 Kelvin and multiple "
<< "of 10." << endl;
return false;
}
if (nsets < 1) {
cerr << name << ": Less than one set..." << endl;
return false;
}
return true;
}
void output_data_csv(const uca_org_t & fin_res) {
//TODO: the csv output should remain
fstream file("out.csv", ios::in);
bool print_index = file.fail();
file.close();
file.open("out.csv", ios::out | ios::app);
if (file.fail() == true) {
cerr << "File out.csv could not be opened successfully" << endl;
} else {
if (print_index == true) {
file << "Tech node (nm), ";
file << "Capacity (bytes), ";
file << "Number of banks, ";
file << "Associativity, ";
file << "Output width (bits), ";
file << "Access time (ns), ";
file << "Random cycle time (ns), ";
file << "Dynamic search energy (nJ), ";
file << "Dynamic read energy (nJ), ";
file << "Dynamic write energy (nJ), ";
file << "Standby leakage per bank(mW), ";
file << "Area (mm2), ";
file << "Ndwl, ";
file << "Ndbl, ";
file << "Nspd, ";
file << "Ndcm, ";
file << "Ndsam_level_1, ";
file << "Ndsam_level_2, ";
file << "Data arrary area efficiency %, ";
file << "Ntwl, ";
file << "Ntbl, ";
file << "Ntspd, ";
file << "Ntcm, ";
file << "Ntsam_level_1, ";
file << "Ntsam_level_2, ";
file << "Tag arrary area efficiency %, ";
// file << "Resistance per unit micron (ohm-micron), ";
// file << "Capacitance per unit micron (fF per micron), ";
// file << "Unit-length wire delay (ps), ";
// file << "FO4 delay (ps), ";
// file << "delay route to bank (including crossb delay) (ps), ";
// file << "Crossbar delay (ps), ";
// file << "Dyn read energy per access from closed page (nJ), ";
// file << "Dyn read energy per access from open page (nJ), ";
// file << "Leak power of an subbank with page closed (mW), ";
// file << "Leak power of a subbank with page open (mW), ";
// file << "Leak power of request and reply networks (mW), ";
// file << "Number of subbanks, ";
// file << "Page size in bits, ";
// file << "Activate power, ";
// file << "Read power, ";
// file << "Write power, ";
// file << "Precharge power, ";
// file << "tRCD, ";
// file << "CAS latency, ";
// file << "Precharge delay, ";
// file << "Perc dyn energy bitlines, ";
// file << "perc dyn energy wordlines, ";
// file << "perc dyn energy outside mat, ";
// file << "Area opt (perc), ";
// file << "Delay opt (perc), ";
// file << "Repeater opt (perc), ";
// file << "Aspect ratio";
file << endl;
}
file << g_ip->F_sz_nm << ", ";
file << g_ip->cache_sz << ", ";
file << g_ip->nbanks << ", ";
file << g_ip->tag_assoc << ", ";
file << g_ip->out_w << ", ";
file << fin_res.access_time*1e+9 << ", ";
file << fin_res.cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", ";
// file << fin_res.data_array2->delay_request_network*1e+9 << ", ";
// file << fin_res.data_array2->delay_inside_mat*1e+9 << ", ";
// file << fin_res.data_array2.delay_reply_network*1e+9 << ", ";
// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
// {
// file << fin_res.tag_array2->access_time*1e+9 << ", ";
// }
// else
// {
// file << 0 << ", ";
// }
// file << fin_res.data_array2->access_time*1e+9 << ", ";
// file << fin_res.data_array2->dram_refresh_period*1e+6 << ", ";
// file << fin_res.data_array2->dram_array_availability << ", ";
if (g_ip->fully_assoc || g_ip->pure_cam) {
file << fin_res.power.searchOp.dynamic*1e+9 << ", ";
} else {
file << "N/A" << ", ";
}
file << fin_res.power.readOp.dynamic*1e+9 << ", ";
file << fin_res.power.writeOp.dynamic*1e+9 << ", ";
// if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram))
// {
// file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", ";
// }
// else
// {
// file << "NA" << ", ";
// }
// file << fin_res.data_array2->power.readOp.dynamic*1e+9 << ", ";
// if (g_ip->fully_assoc || g_ip->pure_cam)
// {
// file << fin_res.power.searchOp.dynamic*1000/fin_res.cycle_time << ", ";
// }
// else
// {
// file << fin_res.power.readOp.dynamic*1000/fin_res.cycle_time << ", ";
// }
file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", ";
// file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", ";
// file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", ";
file << fin_res.area*1e-6 << ", ";
file << fin_res.data_array2->Ndwl << ", ";
file << fin_res.data_array2->Ndbl << ", ";
file << fin_res.data_array2->Nspd << ", ";
file << fin_res.data_array2->deg_bl_muxing << ", ";
file << fin_res.data_array2->Ndsam_lev_1 << ", ";
file << fin_res.data_array2->Ndsam_lev_2 << ", ";
file << fin_res.data_array2->area_efficiency << ", ";
if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) {
file << fin_res.tag_array2->Ndwl << ", ";
file << fin_res.tag_array2->Ndbl << ", ";
file << fin_res.tag_array2->Nspd << ", ";
file << fin_res.tag_array2->deg_bl_muxing << ", ";
file << fin_res.tag_array2->Ndsam_lev_1 << ", ";
file << fin_res.tag_array2->Ndsam_lev_2 << ", ";
file << fin_res.tag_array2->area_efficiency << ", ";
} else {
file << "N/A" << ", ";
file << "N/A"<< ", ";
file << "N/A" << ", ";
file << "N/A" << ", ";
file << "N/A" << ", ";
file << "N/A" << ", ";
file << "N/A" << ", ";
}
// file << g_tp.wire_inside_mat.R_per_um << ", ";
// file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", ";
// file << g_tp.unit_len_wire_del / 1e-12 << ", ";
// file << g_tp.FO4 / 1e-12 << ", ";
// file << fin_res.data_array.delay_route_to_bank / 1e-9 << ", ";
// file << fin_res.data_array.delay_crossbar / 1e-9 << ", ";
// file << fin_res.data_array.dyn_read_energy_from_closed_page / 1e-9 << ", ";
// file << fin_res.data_array.dyn_read_energy_from_open_page / 1e-9 << ", ";
// file << fin_res.data_array.leak_power_subbank_closed_page / 1e-3 << ", ";
// file << fin_res.data_array.leak_power_subbank_open_page / 1e-3 << ", ";
// file << fin_res.data_array.leak_power_request_and_reply_networks / 1e-3 << ", ";
// file << fin_res.data_array.number_subbanks << ", " ;
// file << fin_res.data_array.page_size_in_bits << ", " ;
// file << fin_res.data_array.activate_energy * 1e9 << ", " ;
// file << fin_res.data_array.read_energy * 1e9 << ", " ;
// file << fin_res.data_array.write_energy * 1e9 << ", " ;
// file << fin_res.data_array.precharge_energy * 1e9 << ", " ;
// file << fin_res.data_array.trcd * 1e9 << ", " ;
// file << fin_res.data_array.cas_latency * 1e9 << ", " ;
// file << fin_res.data_array.precharge_delay * 1e9 << ", " ;
// file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width;
file<<endl;
}
file.close();
}
void output_UCA(uca_org_t *fr) {
// if (NUCA)
if (0) {
cout << "\n\n Detailed Bank Stats:\n";
cout << " Bank Size (bytes): %d\n" <<
(int) (g_ip->cache_sz);
} else {
if (g_ip->data_arr_ram_cell_tech_type == 3) {
cout << "\n---------- CACTI version 6.5, Uniform Cache Access " <<
"Logic Process Based DRAM Model ----------\n";
} else if (g_ip->data_arr_ram_cell_tech_type == 4) {
cout << "\n---------- CACTI version 6.5, Uniform" <<
"Cache Access Commodity DRAM Model ----------\n";
} else {
cout << "\n---------- CACTI version 6.5, Uniform Cache Access "
"SRAM Model ----------\n";
}
cout << "\nCache Parameters:\n";
cout << " Total cache size (bytes): " <<
(int) (g_ip->cache_sz) << endl;
}
cout << " Number of banks: " << (int) g_ip->nbanks << endl;
if (g_ip->fully_assoc || g_ip->pure_cam)
cout << " Associativity: fully associative\n";
else {
if (g_ip->tag_assoc == 1)
cout << " Associativity: direct mapped\n";
else
cout << " Associativity: " <<
g_ip->tag_assoc << endl;
}
cout << " Block size (bytes): " << g_ip->line_sz << endl;
cout << " Read/write Ports: " <<
g_ip->num_rw_ports << endl;
cout << " Read ports: " <<
g_ip->num_rd_ports << endl;
cout << " Write ports: " <<
g_ip->num_wr_ports << endl;
if (g_ip->fully_assoc || g_ip->pure_cam)
cout << " search ports: " <<
g_ip->num_search_ports << endl;
cout << " Technology size (nm): " <<
g_ip->F_sz_nm << endl << endl;
cout << " Access time (ns): " << fr->access_time*1e9 << endl;
cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl;
if (g_ip->data_arr_ram_cell_tech_type >= 4) {
cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl;
cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl;
cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl;
cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl;
cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl;
cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl;
cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl;
cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl;
cout << " Refresh power (mW): " <<
fr->data_array2->refresh_power*1e3 << endl;
} else {
if ((g_ip->fully_assoc || g_ip->pure_cam)) {
cout << " Total dynamic associative search energy per access (nJ): " <<
fr->power.searchOp.dynamic*1e9 << endl;
// cout << " Total dynamic read energy per access (nJ): " <<
// fr->power.readOp.dynamic*1e9 << endl;
// cout << " Total dynamic write energy per access (nJ): " <<
// fr->power.writeOp.dynamic*1e9 << endl;
}
// else
// {
cout << " Total dynamic read energy per access (nJ): " <<
fr->power.readOp.dynamic*1e9 << endl;
cout << " Total dynamic write energy per access (nJ): " <<
fr->power.writeOp.dynamic*1e9 << endl;
// }
cout << " Total leakage power of a bank"
" (mW): " << fr->power.readOp.leakage*1e3 << endl;
cout << " Total gate leakage power of a bank"
" (mW): " << fr->power.readOp.gate_leakage*1e3 << endl;
}
if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) {
}
cout << " Cache height x width (mm): " <<
fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl;
cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl;
cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl;
cout << " Best Nspd : " << fr->data_array2->Nspd << endl;
cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl;
cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl;
cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl;
if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
!g_ip->is_main_mem) {
cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl;
cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl;
cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl;
cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl;
cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl;
cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl;
}
switch (fr->data_array2->wt) {
case (0):
cout << " Data array, H-tree wire type: Delay optimized global wires\n";
break;
case (1):
cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n";
break;
case (2):
cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n";
break;
case (3):
cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n";
break;
case (4):
cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n";
break;
case (5):
cout << " Data array, wire type: Low swing wires\n";
break;
default:
cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt << endl;
exit(0);
}
if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) {
switch (fr->tag_array2->wt) {
case (0):
cout << " Tag array, H-tree wire type: Delay optimized global wires\n";
break;
case (1):
cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n";
break;
case (2):
cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n";
break;
case (3):
cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n";
break;
case (4):
cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n";
break;
case (5):
cout << " Tag array, wire type: Low swing wires\n";
break;
default:
cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl;
exit(-1);
}
}
if (g_ip->print_detail) {
/* Delay stats */
/* data array stats */
cout << endl << "Time Components:" << endl << endl;
cout << " Data side (with Output driver) (ns): " <<
fr->data_array2->access_time / 1e-9 << endl;
cout << "\tH-tree input delay (ns): " <<
fr->data_array2->delay_route_to_bank * 1e9 +
fr->data_array2->delay_input_htree * 1e9 << endl;
if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
cout << "\tDecoder + wordline delay (ns): " <<
fr->data_array2->delay_row_predecode_driver_and_block * 1e9 +
fr->data_array2->delay_row_decoder * 1e9 << endl;
} else {
cout << "\tCAM search delay (ns): " <<
fr->data_array2->delay_matchlines * 1e9 << endl;
}
cout << "\tBitline delay (ns): " <<
fr->data_array2->delay_bitlines / 1e-9 << endl;
cout << "\tSense Amplifier delay (ns): " <<
fr->data_array2->delay_sense_amp * 1e9 << endl;
cout << "\tH-tree output delay (ns): " <<
fr->data_array2->delay_subarray_output_driver * 1e9 +
fr->data_array2->delay_dout_htree * 1e9 << endl;
if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
!g_ip->is_main_mem) {
/* tag array stats */
cout << endl << " Tag side (with Output driver) (ns): " <<
fr->tag_array2->access_time / 1e-9 << endl;
cout << "\tH-tree input delay (ns): " <<
fr->tag_array2->delay_route_to_bank * 1e9 +
fr->tag_array2->delay_input_htree * 1e9 << endl;
cout << "\tDecoder + wordline delay (ns): " <<
fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 +
fr->tag_array2->delay_row_decoder * 1e9 << endl;
cout << "\tBitline delay (ns): " <<
fr->tag_array2->delay_bitlines / 1e-9 << endl;
cout << "\tSense Amplifier delay (ns): " <<
fr->tag_array2->delay_sense_amp * 1e9 << endl;
cout << "\tComparator delay (ns): " <<
fr->tag_array2->delay_comparator * 1e9 << endl;
cout << "\tH-tree output delay (ns): " <<
fr->tag_array2->delay_subarray_output_driver * 1e9 +
fr->tag_array2->delay_dout_htree * 1e9 << endl;
}
/* Energy/Power stats */
cout << endl << endl << "Power Components:" << endl << endl;
if (!(g_ip->pure_cam || g_ip->fully_assoc)) {
cout << " Data array: Total dynamic read energy/access (nJ): " <<
fr->data_array2->power.readOp.dynamic * 1e9 << endl;
cout << "\tTotal leakage read/write power of a bank (mW): " <<
fr->data_array2->power.readOp.leakage * 1e3 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"address and data transfer) (nJ): " <<
(fr->data_array2->power_addr_input_htree.readOp.dynamic +
fr->data_array2->power_data_output_htree.readOp.dynamic +
fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
cout << "\tTotal leakage power in H-tree (that includes both "
"address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.leakage +
fr->data_array2->power_data_output_htree.readOp.leakage +
fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3
<< endl;
cout << "\tTotal gate leakage power in H-tree (that includes both "
"address and data network) ((mW)): " <<
(fr->data_array2->power_addr_input_htree.readOp.gate_leakage +
fr->data_array2->power_data_output_htree.readOp.gate_leakage +
fr->data_array2->power_routing_to_bank.readOp.gate_leakage) *
1e3 << endl;
cout << "\tOutput Htree inside bank Energy (nJ): " <<
fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
cout << "\tDecoder (nJ): " <<
fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
cout << "\tWordline (nJ): " <<
fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitline mux & associated drivers (nJ): " <<
fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tSense amp mux & associated drivers (nJ): " <<
fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines precharge and equalization circuit (nJ): " <<
fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines (nJ): " <<
fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
cout << "\tSense amplifier energy (nJ): " <<
fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
}
else if (g_ip->pure_cam) {
cout << " CAM array:" << endl;
cout << " Total dynamic associative search energy/access (nJ): " <<
fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"match key and data transfer) (nJ): " <<
(fr->data_array2->power_htree_in_search.searchOp.dynamic +
fr->data_array2->power_htree_out_search.searchOp.dynamic +
fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
(fr->data_array2->power_htree_in_search.searchOp.dynamic +
fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
cout << "\tSearchlines (nJ): " <<
fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
cout << "\tMatchlines (nJ): " <<
fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
cout << endl << " Total dynamic read energy/access (nJ): " <<
fr->data_array2->power.readOp.dynamic * 1e9 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"address and data transfer) (nJ): " <<
(fr->data_array2->power_addr_input_htree.readOp.dynamic +
fr->data_array2->power_data_output_htree.readOp.dynamic +
fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
cout << "\tOutput Htree inside bank Energy (nJ): " <<
fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
cout << "\tDecoder (nJ): " <<
fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
cout << "\tWordline (nJ): " <<
fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitline mux & associated drivers (nJ): " <<
fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tSense amp mux & associated drivers (nJ): " <<
fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines (nJ): " <<
fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
cout << "\tSense amplifier energy (nJ): " <<
fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
cout << endl << " Total leakage power of a bank (mW): " <<
fr->data_array2->power.readOp.leakage * 1e3 << endl;
} else {
cout << " Fully associative array:" << endl;
cout << " Total dynamic associative search energy/access (nJ): " <<
fr->data_array2->power.searchOp.dynamic * 1e9 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"match key and data transfer) (nJ): " <<
(fr->data_array2->power_htree_in_search.searchOp.dynamic +
fr->data_array2->power_htree_out_search.searchOp.dynamic +
fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl;
cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " <<
(fr->data_array2->power_htree_in_search.searchOp.dynamic +
fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl;
cout << "\tSearchlines (nJ): " <<
fr->data_array2->power_searchline.searchOp.dynamic * 1e9 +
fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl;
cout << "\tMatchlines (nJ): " <<
fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 +
fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl;
cout << "\tData portion wordline (nJ): " <<
fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl;
cout << "\tData Bitlines (nJ): " <<
fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 +
fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl;
cout << "\tSense amplifier energy (nJ): " <<
fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl;
cout << endl << " Total dynamic read energy/access (nJ): " <<
fr->data_array2->power.readOp.dynamic * 1e9 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"address and data transfer) (nJ): " <<
(fr->data_array2->power_addr_input_htree.readOp.dynamic +
fr->data_array2->power_data_output_htree.readOp.dynamic +
fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
cout << "\tOutput Htree inside bank Energy (nJ): " <<
fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
cout << "\tDecoder (nJ): " <<
fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
cout << "\tWordline (nJ): " <<
fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitline mux & associated drivers (nJ): " <<
fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tSense amp mux & associated drivers (nJ): " <<
fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines (nJ): " <<
fr->data_array2->power_bitlines.readOp.dynamic * 1e9 +
fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
cout << "\tSense amplifier energy (nJ): " <<
fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
cout << endl << " Total leakage power of a bank (mW): " <<
fr->data_array2->power.readOp.leakage * 1e3 << endl;
}
if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
!g_ip->is_main_mem) {
cout << endl << " Tag array: Total dynamic read energy/access (nJ): " <<
fr->tag_array2->power.readOp.dynamic * 1e9 << endl;
cout << "\tTotal leakage read/write power of a bank (mW): " <<
fr->tag_array2->power.readOp.leakage * 1e3 << endl;
cout << "\tTotal energy in H-tree (that includes both "
"address and data transfer) (nJ): " <<
(fr->tag_array2->power_addr_input_htree.readOp.dynamic +
fr->tag_array2->power_data_output_htree.readOp.dynamic +
fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl;
cout << "\tTotal leakage power in H-tree (that includes both "
"address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.leakage +
fr->tag_array2->power_data_output_htree.readOp.leakage +
fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3
<< endl;
cout << "\tTotal gate leakage power in H-tree (that includes both "
"address and data network) ((mW)): " <<
(fr->tag_array2->power_addr_input_htree.readOp.gate_leakage +
fr->tag_array2->power_data_output_htree.readOp.gate_leakage +
fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) *
1e3 << endl;
cout << "\tOutput Htree inside a bank Energy (nJ): " <<
fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl;
cout << "\tDecoder (nJ): " <<
fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 +
fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl;
cout << "\tWordline (nJ): " <<
fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitline mux & associated drivers (nJ): " <<
fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 +
fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 +
fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tSense amp mux & associated drivers (nJ): " <<
fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 +
fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 +
fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 +
fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 +
fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 +
fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines precharge and equalization circuit (nJ): " <<
fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl;
cout << "\tBitlines (nJ): " <<
fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl;
cout << "\tSense amplifier energy (nJ): " <<
fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl;
cout << "\tSub-array output driver (nJ): " <<
fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl;
}
cout << endl << endl << "Area Components:" << endl << endl;
/* Data array area stats */
if (!(g_ip->pure_cam || g_ip->fully_assoc))
cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
else if (g_ip->pure_cam)
cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
else
cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl;
cout << "\tHeight (mm): " <<
fr->data_array2->all_banks_height*1e-3 << endl;
cout << "\tWidth (mm): " <<
fr->data_array2->all_banks_width*1e-3 << endl;
if (g_ip->print_detail) {
cout << "\tArea efficiency (Memory cell area/Total area) - " <<
fr->data_array2->area_efficiency << " %" << endl;
cout << "\t\tMAT Height (mm): " <<
fr->data_array2->mat_height*1e-3 << endl;
cout << "\t\tMAT Length (mm): " <<
fr->data_array2->mat_length*1e-3 << endl;
cout << "\t\tSubarray Height (mm): " <<
fr->data_array2->subarray_height*1e-3 << endl;
cout << "\t\tSubarray Length (mm): " <<
fr->data_array2->subarray_length*1e-3 << endl;
}
/* Tag array area stats */
if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) &&
!g_ip->is_main_mem) {
cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl;
cout << "\tHeight (mm): " <<
fr->tag_array2->all_banks_height*1e-3 << endl;
cout << "\tWidth (mm): " <<
fr->tag_array2->all_banks_width*1e-3 << endl;
if (g_ip->print_detail) {
cout << "\tArea efficiency (Memory cell area/Total area) - " <<
fr->tag_array2->area_efficiency << " %" << endl;
cout << "\t\tMAT Height (mm): " <<
fr->tag_array2->mat_height*1e-3 << endl;
cout << "\t\tMAT Length (mm): " <<
fr->tag_array2->mat_length*1e-3 << endl;
cout << "\t\tSubarray Height (mm): " <<
fr->tag_array2->subarray_height*1e-3 << endl;
cout << "\t\tSubarray Length (mm): " <<
fr->tag_array2->subarray_length*1e-3 << endl;
}
}
Wire wpr;
wpr.print_wire();
}
}
//McPAT's plain interface, please keep !!!
uca_org_t cacti_interface(InputParameter * const local_interface) {
uca_org_t fin_res;
fin_res.valid = false;
g_ip = local_interface;
if (!g_ip->error_checking()) {
exit(0);
}
init_tech_params(g_ip->F_sz_um, false);
Wire winit; // Do not delete this line. It initializes wires.
solve(&fin_res);
return fin_res;
}
//McPAT's plain interface, please keep !!!
uca_org_t init_interface(InputParameter* const local_interface,
const string &name) {
uca_org_t fin_res;
fin_res.valid = false;
g_ip = local_interface;
if (!g_ip->error_checking(name)) {
exit(0);
}
init_tech_params(g_ip->F_sz_um, false);
Wire winit; // Do not delete this line. It initializes wires.
return fin_res;
}
void reconfigure(InputParameter *local_interface, uca_org_t *fin_res)
{
// Copy the InputParameter to global interface (g_ip) and do error checking.
g_ip = local_interface;
g_ip->error_checking();
// Initialize technology parameters
init_tech_params(g_ip->F_sz_um,false);
Wire winit; // Do not delete this line. It initializes wires.
// This corresponds to solve() in the initialization process.
update(fin_res);
}