2014-04-01 18:44:30 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
* McPAT
|
|
|
|
* SOFTWARE LICENSE AGREEMENT
|
|
|
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
2014-06-03 22:32:59 +02:00
|
|
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
2014-04-01 18:44:30 +02:00
|
|
|
* All Rights Reserved
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are
|
|
|
|
* met: redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer;
|
|
|
|
* redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution;
|
|
|
|
* neither the name of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
|
|
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
2014-06-03 22:32:59 +02:00
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2014-04-01 18:44:30 +02:00
|
|
|
*
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#include <iostream>
|
2014-06-03 22:32:59 +02:00
|
|
|
#include <math.h>
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
#include "area.h"
|
|
|
|
#include "array.h"
|
2014-06-03 22:32:59 +02:00
|
|
|
#include "common.h"
|
2014-04-01 18:44:30 +02:00
|
|
|
#include "decoder.h"
|
|
|
|
#include "parameter.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
double ArrayST::area_efficiency_threshold = 20.0;
|
|
|
|
int ArrayST::ed = 0;
|
|
|
|
//Fixed number, make sure timing can be satisfied.
|
|
|
|
int ArrayST::delay_wt = 100;
|
|
|
|
int ArrayST::cycle_time_wt = 1000;
|
|
|
|
//Fixed number, This is used to exhaustive search for individual components.
|
|
|
|
int ArrayST::area_wt = 10;
|
|
|
|
//Fixed number, This is used to exhaustive search for individual components.
|
|
|
|
int ArrayST::dynamic_power_wt = 10;
|
|
|
|
int ArrayST::leakage_power_wt = 10;
|
|
|
|
//Fixed number, make sure timing can be satisfied.
|
|
|
|
int ArrayST::delay_dev = 1000000;
|
|
|
|
int ArrayST::cycle_time_dev = 100;
|
|
|
|
//Fixed number, This is used to exhaustive search for individual components.
|
|
|
|
int ArrayST::area_dev = 1000000;
|
|
|
|
//Fixed number, This is used to exhaustive search for individual components.
|
|
|
|
int ArrayST::dynamic_power_dev = 1000000;
|
|
|
|
int ArrayST::leakage_power_dev = 1000000;
|
|
|
|
int ArrayST::cycle_time_dev_threshold = 10;
|
|
|
|
|
|
|
|
|
|
|
|
ArrayST::ArrayST(XMLNode* _xml_data,
|
|
|
|
const InputParameter *configure_interface, string _name,
|
|
|
|
enum Device_ty device_ty_, double _clockRate,
|
|
|
|
bool opt_local_, enum Core_type core_ty_, bool _is_default)
|
|
|
|
: McPATComponent(_xml_data), l_ip(*configure_interface),
|
|
|
|
device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_),
|
|
|
|
is_default(_is_default) {
|
|
|
|
name = _name;
|
|
|
|
clockRate = _clockRate;
|
|
|
|
if (l_ip.cache_sz < MIN_BUFFER_SIZE)
|
|
|
|
l_ip.cache_sz = MIN_BUFFER_SIZE;
|
|
|
|
|
|
|
|
if (!l_ip.error_checking(name)) {
|
|
|
|
exit(1);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
output_data.reset();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
computeEnergy();
|
|
|
|
computeArea();
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void ArrayST::compute_base_power() {
|
|
|
|
local_result = cacti_interface(&l_ip);
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void ArrayST::computeArea() {
|
|
|
|
area.set_area(local_result.area);
|
|
|
|
output_data.area = local_result.area / 1e6;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
void ArrayST::computeEnergy() {
|
|
|
|
list<uca_org_t > candidate_solutions(0);
|
|
|
|
list<uca_org_t >::iterator candidate_iter, min_dynamic_energy_iter;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
uca_org_t* temp_res = NULL;
|
|
|
|
local_result.valid = false;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
double throughput = l_ip.throughput;
|
|
|
|
double latency = l_ip.latency;
|
|
|
|
bool throughput_overflow = true;
|
|
|
|
bool latency_overflow = true;
|
|
|
|
compute_base_power();
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if ((local_result.cycle_time - throughput) <= 1e-10 )
|
|
|
|
throughput_overflow = false;
|
|
|
|
if ((local_result.access_time - latency) <= 1e-10)
|
|
|
|
latency_overflow = false;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (opt_for_clk && opt_local) {
|
|
|
|
if (throughput_overflow || latency_overflow) {
|
|
|
|
l_ip.ed = ed;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
l_ip.delay_wt = delay_wt;
|
|
|
|
l_ip.cycle_time_wt = cycle_time_wt;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
l_ip.area_wt = area_wt;
|
|
|
|
l_ip.dynamic_power_wt = dynamic_power_wt;
|
|
|
|
l_ip.leakage_power_wt = leakage_power_wt;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
l_ip.delay_dev = delay_dev;
|
|
|
|
l_ip.cycle_time_dev = cycle_time_dev;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
l_ip.area_dev = area_dev;
|
|
|
|
l_ip.dynamic_power_dev = dynamic_power_dev;
|
|
|
|
l_ip.leakage_power_dev = leakage_power_dev;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
//Reset overflow flag before start optimization iterations
|
|
|
|
throughput_overflow = true;
|
|
|
|
latency_overflow = true;
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
//Clean up the result for optimized for ED^2P
|
|
|
|
temp_res = &local_result;
|
|
|
|
temp_res->cleanup();
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
while ((throughput_overflow || latency_overflow) &&
|
|
|
|
l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
|
|
|
compute_base_power();
|
|
|
|
|
|
|
|
//This is the time_dev to be used for next iteration
|
|
|
|
l_ip.cycle_time_dev -= cycle_time_dev_threshold;
|
|
|
|
|
|
|
|
// from best area to worst area -->worst timing to best timing
|
|
|
|
if ((((local_result.cycle_time - throughput) <= 1e-10 ) &&
|
|
|
|
(local_result.access_time - latency) <= 1e-10) ||
|
|
|
|
(local_result.data_array2->area_efficiency <
|
|
|
|
area_efficiency_threshold && l_ip.assoc == 0)) {
|
|
|
|
//if no satisfiable solution is found,the most aggressive one
|
|
|
|
//is left
|
|
|
|
candidate_solutions.push_back(local_result);
|
|
|
|
if (((local_result.cycle_time - throughput) <= 1e-10) &&
|
|
|
|
((local_result.access_time - latency) <= 1e-10)) {
|
|
|
|
//ensure stop opt not because of cam
|
|
|
|
throughput_overflow = false;
|
|
|
|
latency_overflow = false;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
} else {
|
|
|
|
if ((local_result.cycle_time - throughput) <= 1e-10)
|
|
|
|
throughput_overflow = false;
|
|
|
|
if ((local_result.access_time - latency) <= 1e-10)
|
|
|
|
latency_overflow = false;
|
|
|
|
|
|
|
|
//if not >10 local_result is the last result, it cannot be
|
|
|
|
//cleaned up
|
|
|
|
if (l_ip.cycle_time_dev > cycle_time_dev_threshold) {
|
|
|
|
//Only solutions not saved in the list need to be
|
|
|
|
//cleaned up
|
|
|
|
temp_res = &local_result;
|
|
|
|
temp_res->cleanup();
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
}
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
if (l_ip.assoc > 0) {
|
|
|
|
//For array structures except CAM and FA, Give warning but still
|
|
|
|
//provide a result with best timing found
|
|
|
|
if (throughput_overflow == true)
|
|
|
|
cout << "Warning: " << name
|
|
|
|
<< " array structure cannot satisfy throughput constraint."
|
|
|
|
<< endl;
|
|
|
|
if (latency_overflow == true)
|
|
|
|
cout << "Warning: " << name
|
|
|
|
<< " array structure cannot satisfy latency constraint."
|
|
|
|
<< endl;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
double min_dynamic_energy = BIGNUM;
|
|
|
|
if (candidate_solutions.empty() == false) {
|
|
|
|
local_result.valid = true;
|
|
|
|
for (candidate_iter = candidate_solutions.begin();
|
|
|
|
candidate_iter != candidate_solutions.end();
|
|
|
|
++candidate_iter) {
|
|
|
|
if (min_dynamic_energy >
|
|
|
|
(candidate_iter)->power.readOp.dynamic) {
|
|
|
|
min_dynamic_energy =
|
|
|
|
(candidate_iter)->power.readOp.dynamic;
|
|
|
|
min_dynamic_energy_iter = candidate_iter;
|
|
|
|
local_result = *(min_dynamic_energy_iter);
|
|
|
|
} else {
|
|
|
|
candidate_iter->cleanup() ;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
|
|
|
|
|
|
|
}
|
2014-06-03 22:32:59 +02:00
|
|
|
candidate_solutions.clear();
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
double long_channel_device_reduction =
|
|
|
|
longer_channel_device_reduction(device_ty, core_ty);
|
|
|
|
|
|
|
|
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
|
|
|
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
|
|
|
double total_overhead = macro_layout_overhead * chip_PR_overhead;
|
|
|
|
local_result.area *= total_overhead;
|
|
|
|
|
|
|
|
//maintain constant power density
|
|
|
|
double pppm_t[4] = {total_overhead, 1, 1, total_overhead};
|
|
|
|
|
|
|
|
double sckRation = g_tp.sckt_co_eff;
|
|
|
|
local_result.power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.power.readOp.longer_channel_leakage =
|
|
|
|
local_result.power.readOp.leakage * long_channel_device_reduction;
|
|
|
|
local_result.power = local_result.power * pppm_t;
|
|
|
|
|
|
|
|
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.data_array2->power.readOp.longer_channel_leakage =
|
|
|
|
local_result.data_array2->power.readOp.leakage *
|
|
|
|
long_channel_device_reduction;
|
|
|
|
local_result.data_array2->power = local_result.data_array2->power * pppm_t;
|
|
|
|
|
|
|
|
|
|
|
|
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) {
|
|
|
|
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.tag_array2->power.readOp.longer_channel_leakage =
|
|
|
|
local_result.tag_array2->power.readOp.leakage *
|
|
|
|
long_channel_device_reduction;
|
|
|
|
local_result.tag_array2->power =
|
|
|
|
local_result.tag_array2->power * pppm_t;
|
|
|
|
}
|
2014-04-01 18:44:30 +02:00
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
power = local_result.power;
|
|
|
|
|
|
|
|
output_data.peak_dynamic_power = power.readOp.dynamic * clockRate;
|
|
|
|
output_data.subthreshold_leakage_power = power.readOp.leakage;
|
|
|
|
output_data.gate_leakage_power = power.readOp.gate_leakage;
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void ArrayST::leakage_feedback(double temperature)
|
|
|
|
{
|
|
|
|
// Update the temperature. l_ip is already set and error-checked in the creator function.
|
|
|
|
l_ip.temp = (unsigned int)round(temperature/10.0)*10;
|
|
|
|
|
|
|
|
// This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
|
|
|
|
reconfigure(&l_ip,&local_result);
|
|
|
|
|
|
|
|
// Scale the power values. This is part of ArrayST::optimize_array().
|
|
|
|
double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty);
|
|
|
|
|
|
|
|
double macro_layout_overhead = g_tp.macro_layout_overhead;
|
|
|
|
double chip_PR_overhead = g_tp.chip_layout_overhead;
|
|
|
|
double total_overhead = macro_layout_overhead*chip_PR_overhead;
|
|
|
|
|
|
|
|
double pppm_t[4] = {total_overhead,1,1,total_overhead};
|
|
|
|
|
|
|
|
double sckRation = g_tp.sckt_co_eff;
|
|
|
|
local_result.power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.power.readOp.longer_channel_leakage = local_result.power.readOp.leakage*long_channel_device_reduction;
|
|
|
|
local_result.power = local_result.power* pppm_t;
|
|
|
|
|
|
|
|
local_result.data_array2->power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.data_array2->power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.data_array2->power.readOp.longer_channel_leakage = local_result.data_array2->power.readOp.leakage*long_channel_device_reduction;
|
|
|
|
local_result.data_array2->power = local_result.data_array2->power* pppm_t;
|
|
|
|
|
|
|
|
if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache)
|
|
|
|
{
|
|
|
|
local_result.tag_array2->power.readOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.writeOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.searchOp.dynamic *= sckRation;
|
|
|
|
local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks;
|
|
|
|
local_result.tag_array2->power.readOp.longer_channel_leakage = local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction;
|
|
|
|
local_result.tag_array2->power = local_result.tag_array2->power* pppm_t;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-03 22:32:59 +02:00
|
|
|
ArrayST::~ArrayST() {
|
|
|
|
local_result.cleanup();
|
2014-04-01 18:44:30 +02:00
|
|
|
}
|