2014-04-01 18:44:30 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
* McPAT/CACTI
|
|
|
|
* SOFTWARE LICENSE AGREEMENT
|
|
|
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
2014-06-03 22:32:59 +02:00
|
|
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
2014-04-01 18:44:30 +02:00
|
|
|
* All Rights Reserved
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are
|
|
|
|
* met: redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer;
|
|
|
|
* redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution;
|
|
|
|
* neither the name of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
|
|
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
2014-06-03 22:32:59 +02:00
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2014-04-01 18:44:30 +02:00
|
|
|
*
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
|
|
|
|
#include "Ucache.h"
|
|
|
|
#include "nuca.h"
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
unsigned int MIN_BANKSIZE = 65536;
|
2014-04-01 18:44:30 +02:00
|
|
|
#define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */
|
|
|
|
#define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */
|
|
|
|
#define CONTR_2_BANK_LAT 0
|
|
|
|
|
|
|
|
int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */];
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
Nuca::Nuca(
|
|
|
|
TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)
|
|
|
|
): deviceType(dt) {
|
|
|
|
init_cont();
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2014-06-03 22:32:59 +02:00
|
|
|
Nuca::init_cont() {
|
|
|
|
FILE *cont;
|
|
|
|
char line[5000];
|
|
|
|
char jk[5000];
|
|
|
|
cont = fopen("contention.dat", "r");
|
|
|
|
if (!cont) {
|
|
|
|
cout << "contention.dat file is missing!\n";
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int j = 2; j < 5; j++) {
|
|
|
|
for (int k = 0; k < ROUTER_TYPES; k++) {
|
|
|
|
for (int l = 0; l < 7; l++) {
|
|
|
|
int *temp = cont_stats[i/*l2 or l3*/][j/*core*/][k/*64 or 128 or 256 link bw*/][l /* no banks*/];
|
|
|
|
assert(fscanf(cont, "%[^\n]\n", line) != EOF);
|
|
|
|
sscanf(line, "%[^:]: %d %d %d %d %d %d %d %d", jk,
|
|
|
|
&temp[0], &temp[1], &temp[2], &temp[3],
|
|
|
|
&temp[4], &temp[5], &temp[6], &temp[7]);
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
fclose(cont);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void
|
|
|
|
Nuca::print_cont_stats() {
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
for (int j = 2; j < 5; j++) {
|
|
|
|
for (int k = 0; k < ROUTER_TYPES; k++) {
|
|
|
|
for (int l = 0; l < 7; l++) {
|
|
|
|
for (int m = 0; l < 7; l++) {
|
|
|
|
cout << cont_stats[i][j][k][l][m] << " ";
|
|
|
|
}
|
|
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
cout << endl;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
Nuca::~Nuca() {
|
|
|
|
for (int i = wt_min; i <= wt_max; i++) {
|
|
|
|
delete wire_vertical[i];
|
|
|
|
delete wire_horizontal[i];
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* converts latency (in s) to cycles depending upon the FREQUENCY (in GHz) */
|
2014-06-03 22:32:59 +02:00
|
|
|
int
|
|
|
|
Nuca::calc_cycles(double lat, double oper_freq) {
|
|
|
|
//TODO: convert latch delay to FO4 */
|
|
|
|
double cycle_time = (1.0 / (oper_freq * 1e9)); /*s*/
|
|
|
|
cycle_time -= LATCH_DELAY;
|
|
|
|
cycle_time -= FIXED_OVERHEAD;
|
|
|
|
|
|
|
|
return (int)ceil(lat / cycle_time);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
nuca_org_t::~nuca_org_t() {
|
2014-06-03 22:32:59 +02:00
|
|
|
// if(h_wire) delete h_wire;
|
|
|
|
// if(v_wire) delete v_wire;
|
|
|
|
// if(router) delete router;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Version - 6.0
|
|
|
|
*
|
|
|
|
* Perform exhaustive search across different bank organizatons,
|
|
|
|
* router configurations, grid organizations, and wire models and
|
|
|
|
* find an optimal NUCA organization
|
|
|
|
* For different bank count values
|
|
|
|
* 1. Optimal bank organization is calculated
|
|
|
|
* 2. For each bank organization, find different NUCA organizations
|
|
|
|
* using various router configurations, grid organizations,
|
|
|
|
* and wire models.
|
|
|
|
* 3. NUCA model with the least cost is picked for
|
|
|
|
* this particular bank count
|
|
|
|
* Finally include contention statistics and find the optimal
|
|
|
|
* NUCA configuration
|
|
|
|
*/
|
2014-06-03 22:32:59 +02:00
|
|
|
void
|
|
|
|
Nuca::sim_nuca() {
|
|
|
|
/* temp variables */
|
|
|
|
int it, ro, wr;
|
|
|
|
int num_cyc;
|
|
|
|
unsigned int i, j, k;
|
|
|
|
unsigned int r, c;
|
|
|
|
int l2_c;
|
|
|
|
int bank_count = 0;
|
|
|
|
uca_org_t ures;
|
|
|
|
nuca_org_t *opt_n;
|
|
|
|
mem_array tag, data;
|
|
|
|
list<nuca_org_t *> nuca_list;
|
|
|
|
Router *router_s[ROUTER_TYPES];
|
|
|
|
router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global));
|
|
|
|
router_s[0]->print_router();
|
|
|
|
router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global));
|
|
|
|
router_s[1]->print_router();
|
|
|
|
router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global));
|
|
|
|
router_s[2]->print_router();
|
|
|
|
|
|
|
|
int core_in; // to store no. of cores
|
|
|
|
|
|
|
|
/* to search diff grid organizations */
|
|
|
|
double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat,
|
|
|
|
curr_acclat;
|
|
|
|
double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power,
|
|
|
|
avg_leakage_power;
|
|
|
|
|
|
|
|
double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF;
|
|
|
|
int opt_rows = 0;
|
|
|
|
int opt_columns = 0;
|
|
|
|
double opt_totno_hops = 0;
|
|
|
|
double opt_avg_hop = 0;
|
|
|
|
double opt_dyn_power = 0, opt_leakage_power = 0;
|
|
|
|
min_values_t minval;
|
|
|
|
|
|
|
|
int bank_start = 0;
|
|
|
|
|
|
|
|
int flit_width = 0;
|
|
|
|
|
|
|
|
/* vertical and horizontal hop latency values */
|
|
|
|
int ver_hop_lat, hor_hop_lat; /* in cycles */
|
|
|
|
|
|
|
|
|
|
|
|
/* no. of different bank sizes to consider */
|
|
|
|
int iterations;
|
|
|
|
|
|
|
|
|
|
|
|
g_ip->nuca_cache_sz = g_ip->cache_sz;
|
|
|
|
nuca_list.push_back(new nuca_org_t());
|
|
|
|
|
|
|
|
if (g_ip->cache_level == 0) l2_c = 1;
|
|
|
|
else l2_c = 0;
|
|
|
|
|
|
|
|
if (g_ip->cores <= 4) core_in = 2;
|
|
|
|
else if (g_ip->cores <= 8) core_in = 3;
|
|
|
|
else if (g_ip->cores <= 16) core_in = 4;
|
2014-04-01 18:44:30 +02:00
|
|
|
else {
|
2014-06-03 22:32:59 +02:00
|
|
|
cout << "Number of cores should be <= 16!\n";
|
|
|
|
exit(0);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
// set the lower bound to an appropriate value. this depends on cache associativity
|
|
|
|
if (g_ip->assoc > 2) {
|
|
|
|
i = 2;
|
|
|
|
while (i != g_ip->assoc) {
|
|
|
|
MIN_BANKSIZE *= 2;
|
|
|
|
i *= 2;
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (g_ip->force_wiretype) {
|
|
|
|
if (g_ip->wt == Low_swing) {
|
|
|
|
wt_min = Low_swing;
|
|
|
|
wt_max = Low_swing;
|
|
|
|
} else {
|
|
|
|
wt_min = Global;
|
|
|
|
wt_max = Low_swing - 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
wt_min = Global;
|
|
|
|
wt_max = Low_swing;
|
|
|
|
}
|
|
|
|
if (g_ip->nuca_bank_count != 0) { // simulate just one bank
|
|
|
|
if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 &&
|
|
|
|
g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 &&
|
|
|
|
g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) {
|
|
|
|
fprintf(stderr, "Incorrect bank count value! Please fix the ",
|
|
|
|
"value in cache.cfg\n");
|
|
|
|
}
|
|
|
|
bank_start = (int)logtwo((double)g_ip->nuca_bank_count);
|
|
|
|
iterations = bank_start + 1;
|
|
|
|
g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count;
|
|
|
|
}
|
|
|
|
cout << "Simulating various NUCA configurations\n";
|
|
|
|
for (it = bank_start; it < iterations; it++) {
|
|
|
|
/* different bank count values */
|
|
|
|
ures.tag_array2 = &tag;
|
|
|
|
ures.data_array2 = &data;
|
2014-04-01 18:44:30 +02:00
|
|
|
/*
|
2014-06-03 22:32:59 +02:00
|
|
|
* find the optimal bank organization
|
2014-04-01 18:44:30 +02:00
|
|
|
*/
|
2014-06-03 22:32:59 +02:00
|
|
|
solve(&ures);
|
|
|
|
// output_UCA(&ures);
|
|
|
|
bank_count = g_ip->nuca_cache_sz / g_ip->cache_sz;
|
|
|
|
cout << "====" << g_ip->cache_sz << "\n";
|
|
|
|
|
|
|
|
for (wr = wt_min; wr <= wt_max; wr++) {
|
|
|
|
|
|
|
|
for (ro = 0; ro < ROUTER_TYPES; ro++) {
|
|
|
|
flit_width = (int) router_s[ro]->flit_size; //initialize router
|
|
|
|
nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time;
|
|
|
|
|
|
|
|
/* calculate router and wire parameters */
|
|
|
|
|
|
|
|
double vlength = ures.cache_ht; /* length of the wire (u)*/
|
|
|
|
double hlength = ures.cache_len; // u
|
|
|
|
|
|
|
|
/* find delay, area, and power for wires */
|
|
|
|
wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength);
|
|
|
|
wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength);
|
|
|
|
|
|
|
|
|
|
|
|
hor_hop_lat =
|
|
|
|
calc_cycles(wire_horizontal[wr]->delay,
|
|
|
|
1 /(nuca_list.back()->nuca_pda.cycle_time *
|
|
|
|
.001));
|
|
|
|
ver_hop_lat =
|
|
|
|
calc_cycles(wire_vertical[wr]->delay,
|
|
|
|
1 / (nuca_list.back()->nuca_pda.cycle_time *
|
|
|
|
.001));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* assume a grid like topology and explore for optimal network
|
|
|
|
* configuration using different row and column count values.
|
|
|
|
*/
|
|
|
|
for (c = 1; c <= (unsigned int)bank_count; c++) {
|
|
|
|
while (bank_count % c != 0) c++;
|
|
|
|
r = bank_count / c;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* to find the avg access latency of a NUCA cache, uncontended
|
|
|
|
* access time to each bank from the
|
|
|
|
* cache controller is calculated.
|
|
|
|
* avg latency =
|
|
|
|
* sum of the access latencies to individual banks)/bank
|
|
|
|
* count value.
|
|
|
|
*/
|
|
|
|
totno_hops = totno_hhops = totno_vhops = tot_lat = 0;
|
|
|
|
k = 1;
|
|
|
|
for (i = 0; i < r; i++) {
|
|
|
|
for (j = 0; j < c; j++) {
|
|
|
|
/*
|
|
|
|
* vertical hops including the
|
|
|
|
* first hop from the cache controller
|
|
|
|
*/
|
|
|
|
curr_hop = i + 1;
|
|
|
|
curr_hop += j; /* horizontal hops */
|
|
|
|
totno_hhops += j;
|
|
|
|
totno_vhops += (i + 1);
|
|
|
|
curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT +
|
|
|
|
j * hor_hop_lat);
|
|
|
|
|
|
|
|
tot_lat += curr_acclat;
|
|
|
|
totno_hops += curr_hop;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
avg_lat = tot_lat / bank_count;
|
|
|
|
avg_hop = totno_hops / bank_count;
|
|
|
|
avg_hhop = totno_hhops / bank_count;
|
|
|
|
avg_vhop = totno_vhops / bank_count;
|
|
|
|
|
|
|
|
/* net access latency */
|
|
|
|
curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay *
|
|
|
|
avg_hop) +
|
|
|
|
calc_cycles(ures.access_time,
|
|
|
|
1 /
|
|
|
|
(nuca_list.back()->nuca_pda.cycle_time *
|
|
|
|
.001));
|
|
|
|
|
|
|
|
/* avg access lat of nuca */
|
|
|
|
avg_dyn_power =
|
|
|
|
avg_hop *
|
|
|
|
(router_s[ro]->power.readOp.dynamic) + avg_hhop *
|
|
|
|
(wire_horizontal[wr]->power.readOp.dynamic) *
|
|
|
|
(g_ip->block_sz * 8 + 64) + avg_vhop *
|
|
|
|
(wire_vertical[wr]->power.readOp.dynamic) *
|
|
|
|
(g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic;
|
|
|
|
|
|
|
|
avg_leakage_power =
|
|
|
|
bank_count * router_s[ro]->power.readOp.leakage +
|
|
|
|
avg_hhop * (wire_horizontal[wr]->power.readOp.leakage *
|
|
|
|
wire_horizontal[wr]->delay) * flit_width +
|
|
|
|
avg_vhop * (wire_vertical[wr]->power.readOp.leakage *
|
|
|
|
wire_horizontal[wr]->delay);
|
|
|
|
|
|
|
|
if (curr_acclat < opt_acclat) {
|
|
|
|
opt_acclat = curr_acclat;
|
|
|
|
opt_tot_lat = tot_lat;
|
|
|
|
opt_avg_lat = avg_lat;
|
|
|
|
opt_totno_hops = totno_hops;
|
|
|
|
opt_avg_hop = avg_hop;
|
|
|
|
opt_rows = r;
|
|
|
|
opt_columns = c;
|
|
|
|
opt_dyn_power = avg_dyn_power;
|
|
|
|
opt_leakage_power = avg_leakage_power;
|
|
|
|
}
|
|
|
|
totno_hops = 0;
|
|
|
|
tot_lat = 0;
|
|
|
|
totno_hhops = 0;
|
|
|
|
totno_vhops = 0;
|
|
|
|
}
|
|
|
|
nuca_list.back()->wire_pda.power.readOp.dynamic =
|
|
|
|
opt_avg_hop * flit_width *
|
|
|
|
(wire_horizontal[wr]->power.readOp.dynamic +
|
|
|
|
wire_vertical[wr]->power.readOp.dynamic);
|
|
|
|
nuca_list.back()->avg_hops = opt_avg_hop;
|
|
|
|
/* network delay/power */
|
|
|
|
nuca_list.back()->h_wire = wire_horizontal[wr];
|
|
|
|
nuca_list.back()->v_wire = wire_vertical[wr];
|
|
|
|
nuca_list.back()->router = router_s[ro];
|
|
|
|
/* bank delay/power */
|
|
|
|
|
|
|
|
nuca_list.back()->bank_pda.delay = ures.access_time;
|
|
|
|
nuca_list.back()->bank_pda.power = ures.power;
|
|
|
|
nuca_list.back()->bank_pda.area.h = ures.cache_ht;
|
|
|
|
nuca_list.back()->bank_pda.area.w = ures.cache_len;
|
|
|
|
nuca_list.back()->bank_pda.cycle_time = ures.cycle_time;
|
|
|
|
|
|
|
|
num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/,
|
|
|
|
1 /
|
|
|
|
(nuca_list.back()->nuca_pda.cycle_time *
|
|
|
|
.001/*GHz*/));
|
|
|
|
if (num_cyc % 2 != 0) num_cyc++;
|
|
|
|
if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles
|
|
|
|
|
|
|
|
if (it < 7) {
|
|
|
|
nuca_list.back()->nuca_pda.delay = opt_acclat +
|
|
|
|
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
|
|
|
nuca_list.back()->contention =
|
|
|
|
cont_stats[l2_c][core_in][ro][it][num_cyc/2-1];
|
|
|
|
} else {
|
|
|
|
nuca_list.back()->nuca_pda.delay = opt_acclat +
|
|
|
|
cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
|
|
|
|
nuca_list.back()->contention =
|
|
|
|
cont_stats[l2_c][core_in][ro][7][num_cyc/2-1];
|
|
|
|
}
|
|
|
|
nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power;
|
|
|
|
nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power;
|
|
|
|
|
|
|
|
/* array organization */
|
|
|
|
nuca_list.back()->bank_count = bank_count;
|
|
|
|
nuca_list.back()->rows = opt_rows;
|
|
|
|
nuca_list.back()->columns = opt_columns;
|
|
|
|
calculate_nuca_area (nuca_list.back());
|
|
|
|
|
|
|
|
minval.update_min_values(nuca_list.back());
|
|
|
|
nuca_list.push_back(new nuca_org_t());
|
|
|
|
opt_acclat = BIGNUM;
|
|
|
|
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
g_ip->cache_sz /= 2;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
delete(nuca_list.back());
|
|
|
|
nuca_list.pop_back();
|
|
|
|
opt_n = find_optimal_nuca(&nuca_list, &minval);
|
|
|
|
print_nuca(opt_n);
|
|
|
|
g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
list<nuca_org_t *>::iterator niter;
|
|
|
|
for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) {
|
|
|
|
delete *niter;
|
|
|
|
}
|
|
|
|
nuca_list.clear();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (int i = 0; i < ROUTER_TYPES; i++) {
|
|
|
|
delete router_s[i];
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
g_ip->display_ip();
|
|
|
|
// g_ip->force_cache_config = true;
|
|
|
|
// g_ip->ndwl = 8;
|
|
|
|
// g_ip->ndbl = 16;
|
|
|
|
// g_ip->nspd = 4;
|
|
|
|
// g_ip->ndcm = 1;
|
|
|
|
// g_ip->ndsam1 = 8;
|
|
|
|
// g_ip->ndsam2 = 32;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void
|
|
|
|
Nuca::print_nuca (nuca_org_t *fr) {
|
|
|
|
printf("\n---------- CACTI version 6.5, Non-uniform Cache Access "
|
|
|
|
"----------\n\n");
|
|
|
|
printf("Optimal number of banks - %d\n", fr->bank_count);
|
|
|
|
printf("Grid organization rows x columns - %d x %d\n",
|
|
|
|
fr->rows, fr->columns);
|
|
|
|
printf("Network frequency - %g GHz\n",
|
|
|
|
(1 / fr->nuca_pda.cycle_time)*1e3);
|
|
|
|
printf("Cache dimension (mm x mm) - %g x %g\n",
|
|
|
|
fr->nuca_pda.area.h,
|
|
|
|
fr->nuca_pda.area.w);
|
|
|
|
|
|
|
|
fr->router->print_router();
|
|
|
|
|
|
|
|
printf("\n\nWire stats:\n");
|
|
|
|
if (fr->h_wire->wt == Global) {
|
|
|
|
printf("\tWire type - Full swing global wires with least "
|
|
|
|
"possible delay\n");
|
|
|
|
} else if (fr->h_wire->wt == Global_5) {
|
|
|
|
printf("\tWire type - Full swing global wires with "
|
|
|
|
"5%% delay penalty\n");
|
|
|
|
} else if (fr->h_wire->wt == Global_10) {
|
|
|
|
printf("\tWire type - Full swing global wires with "
|
|
|
|
"10%% delay penalty\n");
|
|
|
|
} else if (fr->h_wire->wt == Global_20) {
|
|
|
|
printf("\tWire type - Full swing global wires with "
|
|
|
|
"20%% delay penalty\n");
|
|
|
|
} else if (fr->h_wire->wt == Global_30) {
|
|
|
|
printf("\tWire type - Full swing global wires with "
|
|
|
|
"30%% delay penalty\n");
|
|
|
|
} else if (fr->h_wire->wt == Low_swing) {
|
|
|
|
printf("\tWire type - Low swing wires\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("\tHorizontal link delay - %g (ns)\n",
|
|
|
|
fr->h_wire->delay*1e9);
|
|
|
|
printf("\tVertical link delay - %g (ns)\n",
|
|
|
|
fr->v_wire->delay*1e9);
|
|
|
|
printf("\tDelay/length - %g (ns/mm)\n",
|
|
|
|
fr->h_wire->delay*1e9 / fr->bank_pda.area.w);
|
|
|
|
printf("\tHorizontal link energy -dynamic/access %g (nJ)\n"
|
|
|
|
"\t -leakage %g (nW)\n\n",
|
|
|
|
fr->h_wire->power.readOp.dynamic*1e9,
|
|
|
|
fr->h_wire->power.readOp.leakage*1e9);
|
|
|
|
printf("\tVertical link energy -dynamic/access %g (nJ)\n"
|
|
|
|
"\t -leakage %g (nW)\n\n",
|
|
|
|
fr->v_wire->power.readOp.dynamic*1e9,
|
|
|
|
fr->v_wire->power.readOp.leakage*1e9);
|
|
|
|
printf("\n\n");
|
|
|
|
fr->v_wire->print_wire();
|
|
|
|
printf("\n\nBank stats:\n");
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
nuca_org_t *
|
|
|
|
Nuca::find_optimal_nuca (list<nuca_org_t *> *n, min_values_t *minval) {
|
|
|
|
double cost = 0;
|
|
|
|
double min_cost = BIGNUM;
|
|
|
|
nuca_org_t *res = NULL;
|
|
|
|
float d, a, dp, lp, c;
|
|
|
|
int v;
|
|
|
|
dp = g_ip->dynamic_power_wt_nuca;
|
|
|
|
lp = g_ip->leakage_power_wt_nuca;
|
|
|
|
a = g_ip->area_wt_nuca;
|
|
|
|
d = g_ip->delay_wt_nuca;
|
|
|
|
c = g_ip->cycle_time_wt_nuca;
|
|
|
|
|
|
|
|
list<nuca_org_t *>::iterator niter;
|
|
|
|
|
|
|
|
|
|
|
|
for (niter = n->begin(); niter != n->end(); niter++) {
|
|
|
|
fprintf(stderr, "\n-----------------------------"
|
|
|
|
"---------------\n");
|
|
|
|
|
|
|
|
|
|
|
|
printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t "
|
|
|
|
"bank_dpower = %g \tleak = %g \tcycle = %g\n",
|
|
|
|
(*niter)->bank_count,
|
|
|
|
(*niter)->nuca_pda.delay,
|
|
|
|
(*niter)->nuca_pda.power.readOp.dynamic,
|
|
|
|
(*niter)->h_wire->wt,
|
|
|
|
(*niter)->bank_pda.power.readOp.dynamic,
|
|
|
|
(*niter)->nuca_pda.power.readOp.leakage,
|
|
|
|
(*niter)->nuca_pda.cycle_time);
|
|
|
|
|
|
|
|
|
|
|
|
if (g_ip->ed == 1) {
|
|
|
|
cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
|
|
|
|
((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
res = ((*niter));
|
|
|
|
}
|
|
|
|
} else if (g_ip->ed == 2) {
|
|
|
|
cost = ((*niter)->nuca_pda.delay / minval->min_delay) *
|
|
|
|
((*niter)->nuca_pda.delay / minval->min_delay) *
|
|
|
|
((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn);
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
res = ((*niter));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* check whether the current organization
|
|
|
|
* meets the input deviation constraints
|
|
|
|
*/
|
|
|
|
v = check_nuca_org((*niter), minval);
|
|
|
|
if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling
|
|
|
|
|
|
|
|
if (v) {
|
|
|
|
cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) +
|
|
|
|
c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) +
|
|
|
|
dp * ((*niter)->nuca_pda.power.readOp.dynamic /
|
|
|
|
minval->min_dyn) +
|
|
|
|
lp * ((*niter)->nuca_pda.power.readOp.leakage /
|
|
|
|
minval->min_leakage) +
|
|
|
|
a * ((*niter)->nuca_pda.area.get_area() /
|
|
|
|
minval->min_area));
|
|
|
|
fprintf(stderr, "cost = %g\n", cost);
|
|
|
|
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
res = ((*niter));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
niter = n->erase(niter);
|
|
|
|
if (niter != n->begin())
|
|
|
|
niter --;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
return res;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
int
|
|
|
|
Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) {
|
|
|
|
if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) >
|
|
|
|
g_ip->delay_dev_nuca) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) /
|
|
|
|
minval->min_dyn)*100 >
|
|
|
|
g_ip->dynamic_power_dev_nuca) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) /
|
|
|
|
minval->min_leakage)*100 >
|
|
|
|
g_ip->leakage_power_dev_nuca) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
|
|
|
|
g_ip->cycle_time_dev_nuca) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) *
|
|
|
|
100 >
|
|
|
|
g_ip->area_dev_nuca) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void
|
|
|
|
Nuca::calculate_nuca_area (nuca_org_t *nuca) {
|
|
|
|
nuca->nuca_pda.area.h =
|
|
|
|
nuca->rows * ((nuca->h_wire->wire_width +
|
|
|
|
nuca->h_wire->wire_spacing)
|
|
|
|
* nuca->router->flit_size +
|
|
|
|
nuca->bank_pda.area.h);
|
|
|
|
|
|
|
|
nuca->nuca_pda.area.w =
|
|
|
|
nuca->columns * ((nuca->v_wire->wire_width +
|
|
|
|
nuca->v_wire->wire_spacing)
|
|
|
|
* nuca->router->flit_size +
|
|
|
|
nuca->bank_pda.area.w);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|