gem5/ext/mcpat/noc.cc
Anthony Gutierrez e553a7bfa7 ext: add McPAT source
this patch adds the source for mcpat, a power, area, and timing modeling
framework.
2014-04-01 12:44:30 -04:00

356 lines
19 KiB
C++

/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
*
***************************************************************************/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <string>
#include "XML_Parse.h"
#include "basic_circuit.h"
#include "const.h"
#include "io.h"
#include "noc.h"
#include "parameter.h"
NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_)
:XML(XML_interface),
ithNoC(ithNoC_),
interface_ip(*interface_ip_),
router(0),
link_bus(0),
link_bus_exist(false),
router_exist(false),
M_traffic_pattern(M_traffic_pattern_)
{
/*
* initialize, compute and optimize individual components.
*/
if (XML->sys.Embedded)
{
interface_ip.wt =Global_30;
interface_ip.wire_is_mat_type = 0;
interface_ip.wire_os_mat_type = 1;
}
else
{
interface_ip.wt =Global;
interface_ip.wire_is_mat_type = 2;
interface_ip.wire_os_mat_type = 2;
}
set_noc_param();
local_result=init_interface(&interface_ip);
scktRatio = g_tp.sckt_co_eff;
if (nocdynp.type)
{/*
* if NOC compute router, router links must be computed separately
* and called from external
* since total chip area must be known first
*/
init_router();
}
else
{
init_link_bus(link_len_); //if bus compute bus
}
// //clock power
// clockNetwork.init_wire_external(is_default, &interface_ip);
// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
// clockNetwork.end_wiring_level =5;//toplevel metal
// clockNetwork.start_wiring_level =5;//toplevel metal
// clockNetwork.num_regs = corepipe.tot_stage_vector;
// clockNetwork.optimize_wire();
}
void NoC::init_router()
{
router = new Router(nocdynp.flit_size,
nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc,
nocdynp.virtual_channel_per_port, &(g_tp.peri_global),
nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern);
//router->print_router();
area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes);
double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device);
router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction;
router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction;
router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction;
router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction;
router_exist = true;
}
void NoC ::init_link_bus(double link_len_)
{
// if (nocdynp.min_ports==1 )
if (nocdynp.type)
link_name = "Links";
else
link_name = "Bus";
link_len=link_len_;
assert(link_len>0);
interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate;
interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate;
link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2;
if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors
link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size,
link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc);
link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area()
* nocdynp.global_linked_ports);
area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes);
link_bus_exist = true;
}
void NoC::computeEnergy(bool is_tdp)
{
//power_point_product_masks
double pppm_t[4] = {1,1,1,1};
double M=nocdynp.duty_cycle;
if (is_tdp)
{
//init stats for TDP
stats_t.readAc.access = M;
tdp_stats = stats_t;
if (router_exist)
{
set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern
router->power = router->power*pppm_t;
set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes);
power = power + router->power*pppm_t;
}
if (link_bus_exist)
{
if (nocdynp.type)
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports,
nocdynp.global_linked_ports, nocdynp.global_linked_ports);
//reset traffic pattern; local port do not have router links
else
set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports,
nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern
link_bus_tot_per_Router.power = link_bus->power*pppm_t;
set_pppm(pppm_t, nocdynp.total_nodes,
nocdynp.total_nodes,
nocdynp.total_nodes,
nocdynp.total_nodes);
power = power + link_bus_tot_per_Router.power*pppm_t;
}
}
else
{
//init stats for runtime power (RTP)
stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses;
rtp_stats = stats_t;
set_pppm(pppm_t, 1, 0 , 0, 0);
if (router_exist)
{
router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ;
router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ;
router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ;
router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t +
router->power*pppm_lkg;//TDP power must be calculated first!
rt_power = rt_power + router->rt_power;
}
if (link_bus_exist)
{
set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access);
link_bus->rt_power = link_bus->power * pppm_t;
rt_power = rt_power + link_bus->rt_power;
}
}
}
void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
double M =M_traffic_pattern*nocdynp.duty_cycle;
/*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc;
* When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to
* be applied together with McPAT's extra traffic pattern.
* */
if (is_tdp)
{
cout << name << endl;
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
cout << indent_str << "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
if (router_exist)
{
cout << indent_str << "Router: " << endl;
cout << indent_str_next << "Area = " << router->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next<< "Peak Dynamic = " << router->power.readOp.dynamic*nocdynp.clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->power.readOp.longer_channel_leakage:router->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << router->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next<< "Runtime Dynamic = " << router->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
if (plevel >2){
cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl;
cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)
*nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< indent_str<< "Crossbar:" << endl;
cout << indent_str<< indent_str_next << "Area = " << router->crossbar.area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->crossbar.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->crossbar.power.readOp.longer_channel_leakage:router->crossbar.power.readOp.leakage) << " W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->crossbar.power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->crossbar.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< indent_str<< "Arbiter:" << endl;
cout << indent_str<< indent_str_next << "Peak Dynamic = " << router->arbiter.power.readOp.dynamic*nocdynp.clockRate*nocdynp.min_ports*M << " W" << endl;
cout << indent_str<< indent_str_next << "Subthreshold Leakage = "
<< (long_channel? router->arbiter.power.readOp.longer_channel_leakage:router->arbiter.power.readOp.leakage) << " W" << endl;
cout << indent_str<< indent_str_next << "Gate Leakage = " << router->arbiter.power.readOp.gate_leakage << " W" << endl;
cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->arbiter.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout <<endl;
}
}
if (link_bus_exist)
{
cout << indent_str << (nocdynp.type? "Per Router ":"") << link_name<<": " << endl;
cout << indent_str_next << "Area = " << link_bus_tot_per_Router.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next<< "Peak Dynamic = " << link_bus_tot_per_Router.power.readOp.dynamic*
nocdynp.clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? link_bus_tot_per_Router.power.readOp.longer_channel_leakage:link_bus_tot_per_Router.power.readOp.leakage)
<<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << link_bus_tot_per_Router.power.readOp.gate_leakage
<< " W" << endl;
cout << indent_str_next<< "Runtime Dynamic = " << link_bus->rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl;
cout<<endl;
}
}
else
{
// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
}
}
void NoC::set_noc_param()
{
nocdynp.type = XML->sys.NoC[ithNoC].type;
nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate;
nocdynp.clockRate *= 1e6;
nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6);
nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits;
if (nocdynp.type)
{
nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports;
nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1);
/*
* Except local i/o ports, all ports needs links( global_linked_ports);
* However only min_ports can be fully active simultaneously
* since the fewer number of ports (input or output ) is the bottleneck.
*/
}
else
{
nocdynp.input_ports = 1;
nocdynp.output_ports = 1;
nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports);
nocdynp.global_linked_ports = 1;
}
nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port;
nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc;
nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes;
nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes;
nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes;
nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle;
nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link;
nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput;
nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency;
nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage;
nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc;
assert (nocdynp.chip_coverage <=1);
assert (nocdynp.route_over_perc <=1);
if (nocdynp.type)
name = "NOC";
else
name = "BUSES";
}
NoC ::~NoC(){
if(router) {delete router; router = 0;}
if(link_bus) {delete link_bus; link_bus = 0;}
}