2014-04-01 18:44:30 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
* McPAT/CACTI
|
|
|
|
* SOFTWARE LICENSE AGREEMENT
|
|
|
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
2014-06-03 22:32:59 +02:00
|
|
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
2014-04-01 18:44:30 +02:00
|
|
|
* All Rights Reserved
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are
|
|
|
|
* met: redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer;
|
|
|
|
* redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution;
|
|
|
|
* neither the name of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
|
|
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
2014-06-03 22:32:59 +02:00
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2014-04-01 18:44:30 +02:00
|
|
|
*
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
#include <pthread.h>
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cmath>
|
|
|
|
#include <ctime>
|
|
|
|
#include <iostream>
|
|
|
|
#include <list>
|
|
|
|
|
|
|
|
#include "Ucache.h"
|
|
|
|
#include "area.h"
|
|
|
|
#include "bank.h"
|
|
|
|
#include "basic_circuit.h"
|
|
|
|
#include "component.h"
|
|
|
|
#include "const.h"
|
|
|
|
#include "decoder.h"
|
|
|
|
#include "parameter.h"
|
|
|
|
#include "subarray.h"
|
|
|
|
#include "uca.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
const uint32_t nthreads = NTHREADS;
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void min_values_t::update_min_values(const min_values_t * val) {
|
|
|
|
min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay;
|
|
|
|
min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn;
|
|
|
|
min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage;
|
|
|
|
min_area = (min_area > val->min_area) ? val->min_area : min_area;
|
|
|
|
min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void min_values_t::update_min_values(const uca_org_t & res) {
|
|
|
|
min_delay = (min_delay > res.access_time) ? res.access_time : min_delay;
|
|
|
|
min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn;
|
|
|
|
min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage;
|
|
|
|
min_area = (min_area > res.area) ? res.area : min_area;
|
|
|
|
min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void min_values_t::update_min_values(const nuca_org_t * res) {
|
|
|
|
min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay;
|
|
|
|
min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn;
|
|
|
|
min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage;
|
|
|
|
min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area;
|
|
|
|
min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void min_values_t::update_min_values(const mem_array * res) {
|
|
|
|
min_delay = (min_delay > res->access_time) ? res->access_time : min_delay;
|
|
|
|
min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn;
|
|
|
|
min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage;
|
|
|
|
min_area = (min_area > res->area) ? res->area : min_area;
|
|
|
|
min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void * calc_time_mt_wrapper(void * void_obj) {
|
|
|
|
calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj;
|
|
|
|
uint32_t tid = calc_obj->tid;
|
|
|
|
list<mem_array *> & data_arr = calc_obj->data_arr;
|
|
|
|
list<mem_array *> & tag_arr = calc_obj->tag_arr;
|
|
|
|
bool is_tag = calc_obj->is_tag;
|
|
|
|
bool pure_ram = calc_obj->pure_ram;
|
|
|
|
bool pure_cam = calc_obj->pure_cam;
|
|
|
|
bool is_main_mem = calc_obj->is_main_mem;
|
|
|
|
double Nspd_min = calc_obj->Nspd_min;
|
|
|
|
min_values_t * data_res = calc_obj->data_res;
|
|
|
|
min_values_t * tag_res = calc_obj->tag_res;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
data_arr.clear();
|
|
|
|
data_arr.push_back(new mem_array);
|
|
|
|
tag_arr.clear();
|
|
|
|
tag_arr.push_back(new mem_array);
|
|
|
|
|
|
|
|
uint32_t Ndwl_niter = _log2(MAXDATAN) + 1;
|
|
|
|
uint32_t Ndbl_niter = _log2(MAXDATAN) + 1;
|
|
|
|
uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1;
|
|
|
|
uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter;
|
|
|
|
|
|
|
|
|
|
|
|
bool is_valid_partition;
|
|
|
|
int wt_min, wt_max;
|
|
|
|
|
|
|
|
if (g_ip->force_wiretype) {
|
|
|
|
if (g_ip->wt == 0) {
|
|
|
|
wt_min = Low_swing;
|
|
|
|
wt_max = Low_swing;
|
|
|
|
} else {
|
|
|
|
wt_min = Global;
|
|
|
|
wt_max = Low_swing - 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
wt_min = Global;
|
|
|
|
wt_max = Low_swing;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) {
|
|
|
|
for (int wr = wt_min; wr <= wt_max; wr++) {
|
|
|
|
for (uint32_t iter = tid; iter < niter; iter += nthreads) {
|
|
|
|
// reconstruct Ndwl, Ndbl, Ndcm
|
|
|
|
unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter));
|
|
|
|
unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter);
|
|
|
|
unsigned int Ndcm = 1 << (iter % Ndcm_niter);
|
|
|
|
for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX;
|
|
|
|
Ndsam_lev_1 *= 2) {
|
|
|
|
for (unsigned int Ndsam_lev_2 = 1;
|
|
|
|
Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) {
|
|
|
|
//for debuging
|
|
|
|
if (g_ip->force_cache_config && is_tag == false) {
|
|
|
|
wr = g_ip->wt;
|
|
|
|
Ndwl = g_ip->ndwl;
|
|
|
|
Ndbl = g_ip->ndbl;
|
|
|
|
Ndcm = g_ip->ndcm;
|
|
|
|
if (g_ip->nspd != 0) {
|
|
|
|
Nspd = g_ip->nspd;
|
|
|
|
}
|
|
|
|
if (g_ip->ndsam1 != 0) {
|
|
|
|
Ndsam_lev_1 = g_ip->ndsam1;
|
|
|
|
Ndsam_lev_2 = g_ip->ndsam2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_tag == true) {
|
|
|
|
is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl,
|
|
|
|
Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
|
|
|
|
tag_arr.back(), 0, NULL, NULL,
|
|
|
|
is_main_mem);
|
|
|
|
}
|
|
|
|
// If it's a fully-associative cache, the data array partition parameters are identical to that of
|
|
|
|
// the tag array, so compute data array partition properties also here.
|
|
|
|
if (is_tag == false || g_ip->fully_assoc) {
|
|
|
|
is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl,
|
|
|
|
Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2,
|
|
|
|
data_arr.back(), 0, NULL, NULL,
|
|
|
|
is_main_mem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_valid_partition) {
|
|
|
|
if (is_tag == true) {
|
|
|
|
tag_arr.back()->wt = (enum Wire_type) wr;
|
|
|
|
tag_res->update_min_values(tag_arr.back());
|
|
|
|
tag_arr.push_back(new mem_array);
|
|
|
|
}
|
|
|
|
if (is_tag == false || g_ip->fully_assoc) {
|
|
|
|
data_arr.back()->wt = (enum Wire_type) wr;
|
|
|
|
data_res->update_min_values(data_arr.back());
|
|
|
|
data_arr.push_back(new mem_array);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (g_ip->force_cache_config && is_tag == false) {
|
|
|
|
wr = wt_max;
|
|
|
|
iter = niter;
|
|
|
|
if (g_ip->nspd != 0) {
|
|
|
|
Nspd = MAXDATASPD;
|
|
|
|
}
|
|
|
|
if (g_ip->ndsam1 != 0) {
|
|
|
|
Ndsam_lev_1 = MAX_COL_MUX + 1;
|
|
|
|
Ndsam_lev_2 = MAX_COL_MUX + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
delete data_arr.back();
|
|
|
|
delete tag_arr.back();
|
|
|
|
data_arr.pop_back();
|
|
|
|
tag_arr.pop_back();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
#ifndef DEBUG
|
|
|
|
pthread_exit(NULL);
|
|
|
|
#else
|
|
|
|
return NULL;
|
|
|
|
#endif
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool calculate_time(
|
|
|
|
bool is_tag,
|
|
|
|
int pure_ram,
|
|
|
|
bool pure_cam,
|
|
|
|
double Nspd,
|
|
|
|
unsigned int Ndwl,
|
|
|
|
unsigned int Ndbl,
|
|
|
|
unsigned int Ndcm,
|
|
|
|
unsigned int Ndsam_lev_1,
|
|
|
|
unsigned int Ndsam_lev_2,
|
|
|
|
mem_array *ptr_array,
|
|
|
|
int flag_results_populate,
|
|
|
|
results_mem_array *ptr_results,
|
|
|
|
uca_org_t *ptr_fin_res,
|
2014-06-03 22:32:59 +02:00
|
|
|
bool is_main_mem) {
|
|
|
|
DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (dyn_p.is_valid == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
UCA * uca = new UCA(dyn_p);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
//For the final solution, populate the ptr_results data structure
|
|
|
|
//-- TODO: copy only necessary variables
|
|
|
|
if (flag_results_populate) {
|
|
|
|
} else {
|
|
|
|
int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir;
|
|
|
|
int num_mats = uca->bank.dp.num_mats;
|
|
|
|
bool is_fa = uca->bank.dp.fully_assoc;
|
|
|
|
bool pure_cam = uca->bank.dp.pure_cam;
|
2014-04-01 18:44:30 +02:00
|
|
|
ptr_array->Ndwl = Ndwl;
|
2014-06-03 22:32:59 +02:00
|
|
|
ptr_array->Ndbl = Ndbl;
|
|
|
|
ptr_array->Nspd = Nspd;
|
|
|
|
ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing;
|
|
|
|
ptr_array->Ndsam_lev_1 = Ndsam_lev_1;
|
|
|
|
ptr_array->Ndsam_lev_2 = Ndsam_lev_2;
|
|
|
|
ptr_array->access_time = uca->access_time;
|
|
|
|
ptr_array->cycle_time = uca->cycle_time;
|
|
|
|
ptr_array->multisubbank_interleave_cycle_time =
|
|
|
|
uca->multisubbank_interleave_cycle_time;
|
|
|
|
ptr_array->area_ram_cells = uca->area_all_dataramcells;
|
|
|
|
ptr_array->area = uca->area.get_area();
|
|
|
|
ptr_array->height = uca->area.h;
|
|
|
|
ptr_array->width = uca->area.w;
|
|
|
|
ptr_array->mat_height = uca->bank.mat.area.h;
|
|
|
|
ptr_array->mat_length = uca->bank.mat.area.w;
|
|
|
|
ptr_array->subarray_height = uca->bank.mat.subarray.area.h;
|
|
|
|
ptr_array->subarray_length = uca->bank.mat.subarray.area.w;
|
|
|
|
ptr_array->power = uca->power;
|
|
|
|
ptr_array->delay_senseamp_mux_decoder =
|
|
|
|
MAX(uca->delay_array_to_sa_mux_lev_1_decoder,
|
|
|
|
uca->delay_array_to_sa_mux_lev_2_decoder);
|
|
|
|
ptr_array->delay_before_subarray_output_driver =
|
|
|
|
uca->delay_before_subarray_output_driver;
|
|
|
|
ptr_array->delay_from_subarray_output_driver_to_output =
|
|
|
|
uca->delay_from_subarray_out_drv_to_out;
|
|
|
|
|
|
|
|
ptr_array->delay_route_to_bank = uca->htree_in_add->delay;
|
|
|
|
ptr_array->delay_input_htree = uca->bank.htree_in_add->delay;
|
|
|
|
ptr_array->delay_row_predecode_driver_and_block =
|
|
|
|
uca->bank.mat.r_predec->delay;
|
|
|
|
ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay;
|
|
|
|
ptr_array->delay_bitlines = uca->bank.mat.delay_bitline;
|
|
|
|
ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline;
|
|
|
|
ptr_array->delay_sense_amp = uca->bank.mat.delay_sa;
|
|
|
|
ptr_array->delay_subarray_output_driver =
|
|
|
|
uca->bank.mat.delay_subarray_out_drv_htree;
|
|
|
|
ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay;
|
|
|
|
ptr_array->delay_comparator = uca->bank.mat.delay_comparator;
|
|
|
|
|
|
|
|
ptr_array->all_banks_height = uca->area.h;
|
|
|
|
ptr_array->all_banks_width = uca->area.w;
|
|
|
|
ptr_array->area_efficiency = uca->area_all_dataramcells * 100 /
|
|
|
|
(uca->area.get_area());
|
|
|
|
|
|
|
|
ptr_array->power_routing_to_bank = uca->power_routing_to_bank;
|
|
|
|
ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power;
|
|
|
|
ptr_array->power_data_input_htree = uca->bank.htree_in_data->power;
|
|
|
|
ptr_array->power_data_output_htree = uca->bank.htree_out_data->power;
|
|
|
|
|
|
|
|
ptr_array->power_row_predecoder_drivers =
|
|
|
|
uca->bank.mat.r_predec->driver_power;
|
|
|
|
ptr_array->power_row_predecoder_drivers.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_predecoder_drivers.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_predecoder_drivers.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_row_predecoder_blocks =
|
|
|
|
uca->bank.mat.r_predec->block_power;
|
|
|
|
ptr_array->power_row_predecoder_blocks.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_predecoder_blocks.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_predecoder_blocks.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders;
|
|
|
|
ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_bit_mux_predecoder_drivers =
|
|
|
|
uca->bank.mat.b_mux_predec->driver_power;
|
|
|
|
ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_bit_mux_predecoder_blocks =
|
|
|
|
uca->bank.mat.b_mux_predec->block_power;
|
|
|
|
ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders;
|
|
|
|
ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_decoders.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bit_mux_decoders.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers =
|
|
|
|
uca->bank.mat.sa_mux_lev_1_predec->driver_power;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks =
|
|
|
|
uca->bank.mat.sa_mux_lev_1_predec->block_power;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_decoders =
|
|
|
|
uca->bank.mat.power_sa_mux_lev_1_decoders;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers =
|
|
|
|
uca->bank.mat.sa_mux_lev_2_predec->driver_power;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks =
|
|
|
|
uca->bank.mat.sa_mux_lev_2_predec->block_power;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_decoders =
|
|
|
|
uca->bank.mat.power_sa_mux_lev_2_decoders;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_bitlines = uca->bank.mat.power_bitline;
|
|
|
|
ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_sense_amps = uca->bank.mat.power_sa;
|
|
|
|
ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_prechg_eq_drivers =
|
|
|
|
uca->bank.mat.power_bl_precharge_eq_drv;
|
|
|
|
ptr_array->power_prechg_eq_drivers.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_prechg_eq_drivers.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_prechg_eq_drivers.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_output_drivers_at_subarray =
|
|
|
|
uca->bank.mat.power_subarray_out_drv;
|
|
|
|
ptr_array->power_output_drivers_at_subarray.readOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *=
|
|
|
|
num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
ptr_array->power_comparators = uca->bank.mat.power_comparator;
|
|
|
|
ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir;
|
|
|
|
|
|
|
|
if (is_fa || pure_cam) {
|
|
|
|
ptr_array->power_htree_in_search =
|
|
|
|
uca->bank.htree_in_search->power;
|
|
|
|
ptr_array->power_htree_out_search =
|
|
|
|
uca->bank.htree_out_search->power;
|
|
|
|
ptr_array->power_searchline = uca->bank.mat.power_searchline;
|
|
|
|
ptr_array->power_searchline.searchOp.dynamic *= num_mats;
|
|
|
|
ptr_array->power_searchline_precharge =
|
|
|
|
uca->bank.mat.power_searchline_precharge;
|
|
|
|
ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats;
|
|
|
|
ptr_array->power_matchlines = uca->bank.mat.power_matchline;
|
|
|
|
ptr_array->power_matchlines.searchOp.dynamic *= num_mats;
|
|
|
|
ptr_array->power_matchline_precharge =
|
|
|
|
uca->bank.mat.power_matchline_precharge;
|
|
|
|
ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats;
|
|
|
|
ptr_array->power_matchline_to_wordline_drv =
|
|
|
|
uca->bank.mat.power_ml_to_ram_wl_drv;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr_array->activate_energy = uca->activate_energy;
|
|
|
|
ptr_array->read_energy = uca->read_energy;
|
|
|
|
ptr_array->write_energy = uca->write_energy;
|
|
|
|
ptr_array->precharge_energy = uca->precharge_energy;
|
|
|
|
ptr_array->refresh_power = uca->refresh_power;
|
|
|
|
ptr_array->leak_power_subbank_closed_page =
|
|
|
|
uca->leak_power_subbank_closed_page;
|
|
|
|
ptr_array->leak_power_subbank_open_page =
|
|
|
|
uca->leak_power_subbank_open_page;
|
|
|
|
ptr_array->leak_power_request_and_reply_networks =
|
|
|
|
uca->leak_power_request_and_reply_networks;
|
|
|
|
|
|
|
|
ptr_array->precharge_delay = uca->precharge_delay;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
delete uca;
|
|
|
|
return true;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
bool check_uca_org(uca_org_t & u, min_values_t *minval) {
|
|
|
|
if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
|
|
|
|
g_ip->delay_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
|
|
|
|
g_ip->dynamic_power_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.power.readOp.leakage - minval->min_leakage) /
|
|
|
|
minval->min_leakage) * 100 >
|
|
|
|
g_ip->leakage_power_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 >
|
|
|
|
g_ip->cycle_time_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.area - minval->min_area) / minval->min_area)*100 >
|
|
|
|
g_ip->area_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
bool check_mem_org(mem_array & u, const min_values_t *minval) {
|
|
|
|
if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) >
|
|
|
|
g_ip->delay_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 >
|
|
|
|
g_ip->dynamic_power_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.power.readOp.leakage - minval->min_leakage) /
|
|
|
|
minval->min_leakage) * 100 >
|
|
|
|
g_ip->leakage_power_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 >
|
|
|
|
g_ip->cycle_time_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (((u.area - minval->min_area) / minval->min_area) * 100 >
|
|
|
|
g_ip->area_dev) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void find_optimal_uca(uca_org_t *res, min_values_t * minval,
|
|
|
|
list<uca_org_t> & ulist) {
|
|
|
|
double cost = 0;
|
|
|
|
double min_cost = BIGNUM;
|
|
|
|
float d, a, dp, lp, c;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
dp = g_ip->dynamic_power_wt;
|
|
|
|
lp = g_ip->leakage_power_wt;
|
|
|
|
a = g_ip->area_wt;
|
|
|
|
d = g_ip->delay_wt;
|
|
|
|
c = g_ip->cycle_time_wt;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (ulist.empty() == true) {
|
|
|
|
cout << "ERROR: no valid cache organizations found" << endl;
|
|
|
|
exit(0);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
|
|
|
|
for (list<uca_org_t>::iterator niter = ulist.begin(); niter != ulist.end();
|
|
|
|
niter++) {
|
|
|
|
if (g_ip->ed == 1) {
|
|
|
|
cost = ((niter)->access_time / minval->min_delay) *
|
|
|
|
((niter)->power.readOp.dynamic / minval->min_dyn);
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
*res = (*(niter));
|
|
|
|
}
|
|
|
|
} else if (g_ip->ed == 2) {
|
|
|
|
cost = ((niter)->access_time / minval->min_delay) *
|
|
|
|
((niter)->access_time / minval->min_delay) *
|
|
|
|
((niter)->power.readOp.dynamic / minval->min_dyn);
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
*res = (*(niter));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* check whether the current organization
|
|
|
|
* meets the input deviation constraints
|
|
|
|
*/
|
|
|
|
bool v = check_uca_org(*niter, minval);
|
|
|
|
|
|
|
|
if (v) {
|
|
|
|
cost = (d * ((niter)->access_time / minval->min_delay) +
|
|
|
|
c * ((niter)->cycle_time / minval->min_cyc) +
|
|
|
|
dp * ((niter)->power.readOp.dynamic / minval->min_dyn) +
|
|
|
|
lp *
|
|
|
|
((niter)->power.readOp.leakage / minval->min_leakage) +
|
|
|
|
a * ((niter)->area / minval->min_area));
|
|
|
|
|
|
|
|
if (min_cost > cost) {
|
|
|
|
min_cost = cost;
|
|
|
|
*res = (*(niter));
|
|
|
|
niter = ulist.erase(niter);
|
|
|
|
if (niter != ulist.begin())
|
|
|
|
niter--;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
niter = ulist.erase(niter);
|
|
|
|
if (niter != ulist.begin())
|
|
|
|
niter--;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (min_cost == BIGNUM) {
|
|
|
|
cout << "ERROR: no cache organizations met optimization criteria"
|
|
|
|
<< endl;
|
|
|
|
exit(0);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void filter_tag_arr(const min_values_t * min, list<mem_array *> & list) {
|
|
|
|
double cost = BIGNUM;
|
|
|
|
double cur_cost;
|
|
|
|
double wt_delay = g_ip->delay_wt;
|
|
|
|
double wt_dyn = g_ip->dynamic_power_wt;
|
|
|
|
double wt_leakage = g_ip->leakage_power_wt;
|
|
|
|
double wt_cyc = g_ip->cycle_time_wt;
|
|
|
|
double wt_area = g_ip->area_wt;
|
|
|
|
mem_array * res = NULL;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (list.empty() == true) {
|
|
|
|
cout << "ERROR: no valid tag organizations found" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
while (list.empty() != true) {
|
|
|
|
bool v = check_mem_org(*list.back(), min);
|
|
|
|
if (v) {
|
|
|
|
cur_cost = wt_delay * (list.back()->access_time / min->min_delay) +
|
|
|
|
wt_dyn * (list.back()->power.readOp.dynamic /
|
|
|
|
min->min_dyn) +
|
|
|
|
wt_leakage * (list.back()->power.readOp.leakage /
|
|
|
|
min->min_leakage) +
|
|
|
|
wt_area * (list.back()->area / min->min_area) +
|
|
|
|
wt_cyc * (list.back()->cycle_time / min->min_cyc);
|
|
|
|
} else {
|
|
|
|
cur_cost = BIGNUM;
|
|
|
|
}
|
|
|
|
if (cur_cost < cost) {
|
|
|
|
if (res != NULL) {
|
|
|
|
delete res;
|
|
|
|
}
|
|
|
|
cost = cur_cost;
|
|
|
|
res = list.back();
|
|
|
|
} else {
|
|
|
|
delete list.back();
|
|
|
|
}
|
|
|
|
list.pop_back();
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
if (!res) {
|
|
|
|
cout << "ERROR: no valid tag organizations found" << endl;
|
|
|
|
exit(0);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
list.push_back(res);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void filter_data_arr(list<mem_array *> & curr_list) {
|
|
|
|
if (curr_list.empty() == true) {
|
|
|
|
cout << "ERROR: no valid data array organizations found" << endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
list<mem_array *>::iterator iter;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) {
|
|
|
|
mem_array * m = *iter;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (m == NULL) exit(1);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay >
|
|
|
|
0.5) &&
|
|
|
|
((m->power.readOp.dynamic - m->arr_min->min_dyn) /
|
|
|
|
m->arr_min->min_dyn > 0.5)) {
|
|
|
|
delete m;
|
|
|
|
iter = curr_list.erase(iter);
|
|
|
|
iter --;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Performs exhaustive search across different sub-array sizes,
|
|
|
|
* wire types and aspect ratios to find an optimal UCA organization
|
|
|
|
* 1. First different valid tag array organizations are calculated
|
|
|
|
* and stored in tag_arr array
|
|
|
|
* 2. The exhaustive search is repeated to find valid data array
|
|
|
|
* organizations and stored in data_arr array
|
|
|
|
* 3. Cache area, delay, power, and cycle time for different
|
|
|
|
* cache organizations are calculated based on the
|
|
|
|
* above results
|
|
|
|
* 4. Cache model with least cost is picked from sol_list
|
|
|
|
*/
|
2014-06-03 22:32:59 +02:00
|
|
|
void solve(uca_org_t *fin_res) {
|
|
|
|
bool is_dram = false;
|
|
|
|
int pure_ram = g_ip->pure_ram;
|
|
|
|
bool pure_cam = g_ip->pure_cam;
|
|
|
|
|
|
|
|
init_tech_params(g_ip->F_sz_um, false);
|
|
|
|
|
|
|
|
|
|
|
|
list<mem_array *> tag_arr (0);
|
|
|
|
list<mem_array *> data_arr(0);
|
|
|
|
list<mem_array *>::iterator miter;
|
|
|
|
list<uca_org_t> sol_list(1, uca_org_t());
|
|
|
|
|
|
|
|
fin_res->tag_array.access_time = 0;
|
|
|
|
fin_res->tag_array.Ndwl = 0;
|
|
|
|
fin_res->tag_array.Ndbl = 0;
|
|
|
|
fin_res->tag_array.Nspd = 0;
|
|
|
|
fin_res->tag_array.deg_bl_muxing = 0;
|
|
|
|
fin_res->tag_array.Ndsam_lev_1 = 0;
|
|
|
|
fin_res->tag_array.Ndsam_lev_2 = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// distribute calculate_time() execution to multiple threads
|
|
|
|
calc_time_mt_wrapper_struct * calc_array =
|
|
|
|
new calc_time_mt_wrapper_struct[nthreads];
|
|
|
|
pthread_t threads[nthreads];
|
|
|
|
|
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
calc_array[t].tid = t;
|
|
|
|
calc_array[t].pure_ram = pure_ram;
|
|
|
|
calc_array[t].pure_cam = pure_cam;
|
|
|
|
calc_array[t].data_res = new min_values_t();
|
|
|
|
calc_array[t].tag_res = new min_values_t();
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
bool is_tag;
|
|
|
|
uint32_t ram_cell_tech_type;
|
|
|
|
|
|
|
|
// If it's a cache, first calculate the area, delay and power for all tag array partitions.
|
|
|
|
if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache
|
|
|
|
is_tag = true;
|
|
|
|
ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type;
|
|
|
|
is_dram = ((ram_cell_tech_type == lp_dram) ||
|
|
|
|
(ram_cell_tech_type == comm_dram));
|
|
|
|
init_tech_params(g_ip->F_sz_um, is_tag);
|
|
|
|
|
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
calc_array[t].is_tag = is_tag;
|
|
|
|
calc_array[t].is_main_mem = false;
|
|
|
|
calc_array[t].Nspd_min = 0.125;
|
|
|
|
#ifndef DEBUG
|
|
|
|
pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
|
|
|
|
(void *)(&(calc_array[t])));
|
|
|
|
#else
|
|
|
|
calc_time_mt_wrapper((void *)(&(calc_array[t])));
|
|
|
|
#endif
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
#ifndef DEBUG
|
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
pthread_join(threads[t], NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
calc_array[t].data_arr.sort(mem_array::lt);
|
|
|
|
data_arr.merge(calc_array[t].data_arr, mem_array::lt);
|
|
|
|
calc_array[t].tag_arr.sort(mem_array::lt);
|
|
|
|
tag_arr.merge(calc_array[t].tag_arr, mem_array::lt);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
// calculate the area, delay and power for all data array partitions (for cache or plain RAM).
|
|
|
|
// in the new cacti, cam, fully_associative cache are processed as single array in the data portion
|
2014-04-01 18:44:30 +02:00
|
|
|
is_tag = false;
|
|
|
|
ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type;
|
|
|
|
is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram));
|
|
|
|
init_tech_params(g_ip->F_sz_um, is_tag);
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
calc_array[t].is_tag = is_tag;
|
|
|
|
calc_array[t].is_main_mem = g_ip->is_main_mem;
|
|
|
|
if (!(pure_cam || g_ip->fully_assoc)) {
|
|
|
|
calc_array[t].Nspd_min = (double)(g_ip->out_w) /
|
|
|
|
(double)(g_ip->block_sz * 8);
|
|
|
|
} else {
|
|
|
|
calc_array[t].Nspd_min = 1;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
#ifndef DEBUG
|
|
|
|
pthread_create(&threads[t], NULL, calc_time_mt_wrapper,
|
|
|
|
(void *)(&(calc_array[t])));
|
|
|
|
#else
|
|
|
|
calc_time_mt_wrapper((void *)(&(calc_array[t])));
|
|
|
|
#endif
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
#ifndef DEBUG
|
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
pthread_join(threads[t], NULL);
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
#endif
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
data_arr.clear();
|
2014-06-03 22:32:59 +02:00
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
calc_array[t].data_arr.sort(mem_array::lt);
|
|
|
|
data_arr.merge(calc_array[t].data_arr, mem_array::lt);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
min_values_t * d_min = new min_values_t();
|
|
|
|
min_values_t * t_min = new min_values_t();
|
|
|
|
min_values_t * cache_min = new min_values_t();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
d_min->update_min_values(calc_array[t].data_res);
|
|
|
|
t_min->update_min_values(calc_array[t].tag_res);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
|
|
|
(*miter)->arr_min = d_min;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
filter_data_arr(data_arr);
|
|
|
|
if (!(pure_ram || pure_cam || g_ip->fully_assoc)) {
|
|
|
|
filter_tag_arr(t_min, tag_arr);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (pure_ram || pure_cam || g_ip->fully_assoc) {
|
|
|
|
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
|
|
|
uca_org_t & curr_org = sol_list.back();
|
|
|
|
curr_org.tag_array2 = NULL;
|
|
|
|
curr_org.data_array2 = (*miter);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
curr_org.find_delay();
|
|
|
|
curr_org.find_energy();
|
|
|
|
curr_org.find_area();
|
|
|
|
curr_org.find_cyc();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
//update min values for the entire cache
|
|
|
|
cache_min->update_min_values(curr_org);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
sol_list.push_back(uca_org_t());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
while (tag_arr.empty() != true) {
|
|
|
|
mem_array * arr_temp = (tag_arr.back());
|
|
|
|
tag_arr.pop_back();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (miter = data_arr.begin(); miter != data_arr.end(); miter++) {
|
|
|
|
uca_org_t & curr_org = sol_list.back();
|
|
|
|
curr_org.tag_array2 = arr_temp;
|
|
|
|
curr_org.data_array2 = (*miter);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
curr_org.find_delay();
|
|
|
|
curr_org.find_energy();
|
|
|
|
curr_org.find_area();
|
|
|
|
curr_org.find_cyc();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
//update min values for the entire cache
|
|
|
|
cache_min->update_min_values(curr_org);
|
|
|
|
|
|
|
|
sol_list.push_back(uca_org_t());
|
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
sol_list.pop_back();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
find_optimal_uca(fin_res, cache_min, sol_list);
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
sol_list.clear();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) {
|
|
|
|
if (*miter != fin_res->data_array2) {
|
|
|
|
delete *miter;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
data_arr.clear();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
for (uint32_t t = 0; t < nthreads; t++) {
|
|
|
|
delete calc_array[t].data_res;
|
|
|
|
delete calc_array[t].tag_res;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
delete [] calc_array;
|
|
|
|
delete cache_min;
|
|
|
|
delete d_min;
|
|
|
|
delete t_min;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void update(uca_org_t *fin_res)
|
|
|
|
{
|
|
|
|
if(fin_res->tag_array2)
|
|
|
|
{
|
|
|
|
init_tech_params(g_ip->F_sz_um,true);
|
2014-06-03 22:32:59 +02:00
|
|
|
DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam,
|
|
|
|
fin_res->tag_array2->Nspd,
|
|
|
|
fin_res->tag_array2->Ndwl,
|
|
|
|
fin_res->tag_array2->Ndbl,
|
|
|
|
fin_res->tag_array2->Ndcm,
|
|
|
|
fin_res->tag_array2->Ndsam_lev_1,
|
|
|
|
fin_res->tag_array2->Ndsam_lev_2,
|
|
|
|
g_ip->is_main_mem);
|
2014-04-01 18:44:30 +02:00
|
|
|
if(tag_arr_dyn_p.is_valid)
|
|
|
|
{
|
|
|
|
UCA * tag_arr = new UCA(tag_arr_dyn_p);
|
|
|
|
fin_res->tag_array2->power = tag_arr->power;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-06-03 22:32:59 +02:00
|
|
|
cout << "ERROR: Cannot retrieve array structure for leakage feedback"
|
|
|
|
<< endl;
|
2014-04-01 18:44:30 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
init_tech_params(g_ip->F_sz_um,false);
|
2014-06-03 22:32:59 +02:00
|
|
|
DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam,
|
|
|
|
fin_res->data_array2->Nspd,
|
|
|
|
fin_res->data_array2->Ndwl,
|
|
|
|
fin_res->data_array2->Ndbl,
|
|
|
|
fin_res->data_array2->Ndcm,
|
|
|
|
fin_res->data_array2->Ndsam_lev_1,
|
|
|
|
fin_res->data_array2->Ndsam_lev_2,
|
|
|
|
g_ip->is_main_mem);
|
2014-04-01 18:44:30 +02:00
|
|
|
if(data_arr_dyn_p.is_valid)
|
|
|
|
{
|
|
|
|
UCA * data_arr = new UCA(data_arr_dyn_p);
|
|
|
|
fin_res->data_array2->power = data_arr->power;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-06-03 22:32:59 +02:00
|
|
|
cout << "ERROR: Cannot retrieve array structure for leakage feedback"
|
|
|
|
<< endl;
|
2014-04-01 18:44:30 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
fin_res->find_energy();
|
|
|
|
}
|
|
|
|
|