gem5/ext/mcpat/core.cc
Anthony Gutierrez e553a7bfa7 ext: add McPAT source
this patch adds the source for mcpat, a power, area, and timing modeling
framework.
2014-04-01 12:44:30 -04:00

4135 lines
250 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
*
***************************************************************************/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <string>
#include "XML_Parse.h"
#include "basic_circuit.h"
#include "const.h"
#include "core.h"
#include "io.h"
#include "parameter.h"
//#include "globalvar.h"
InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
IB (0),
BTB (0),
ID_inst (0),
ID_operand (0),
ID_misc (0),
exist(exist_)
{
if (!exist) return;
int idx, tag, data, size, line, assoc, banks;
bool debug= false, is_default = true;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7];
//Assuming all L1 caches are virtually idxed physically tagged.
//cache
size = (int)XML->sys.core[ithCore].icache.icache_config[0];
line = (int)XML->sys.core[ithCore].icache.icache_config[1];
assoc = (int)XML->sys.core[ithCore].icache.icache_config[2];
banks = (int)XML->sys.core[ithCore].icache.icache_config[3];
idx = debug?9:int(ceil(log2(size/line/assoc)));
tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0];
interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1];
interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2];
interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3];
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5];
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
// interface_ip.obj_func_dyn_energy = 0;
// interface_ip.obj_func_dyn_power = 0;
// interface_ip.obj_func_leak_power = 0;
// interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty);
scktRatio = g_tp.sckt_co_eff;
chip_PR_overhead = g_tp.chip_layout_overhead;
macro_PR_overhead = g_tp.macro_layout_overhead;
icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area);
area.set_area(area.get_area()+ icache.caches->local_result.area);
//output_data_csv(icache.caches.local_result);
/*
*iCache controllers
*miss buffer Each MSHR contains enough state
*to handle one or more accesses of any type to a single memory line.
*Due to the generality of the MSHR mechanism,
*the amount of state involved is non-trivial:
*including the address, pointers to the cache entry and destination register,
*written data, and various other pieces of state.
*/
interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area);
area.set_area(area.get_area()+ icache.missb->local_result.area);
//output_data_csv(icache.missb.local_result);
//fill buffer
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
data = icache.caches->l_ip.line_sz;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1];
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area);
area.set_area(area.get_area()+ icache.ifb->local_result.area);
//output_data_csv(icache.ifb.local_result);
//prefetch buffer
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports;
icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area);
area.set_area(area.get_area()+ icache.prefetchb->local_result.area);
//output_data_csv(icache.prefetchb.local_result);
//Instruction buffer
data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer.
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
interface_ip.pure_cam = false;
interface_ip.line_sz = int(ceil(data/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64?
XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
//NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions.
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
IB->area.set_area(IB->area.get_area()+ IB->local_result.area);
area.set_area(area.get_area()+ IB->local_result.area);
//output_data_csv(IB.IB.local_result);
// inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width;
// inst_decoder.init_decoder(is_default, &interface_ip);
// inst_decoder.full_decoder_power();
if (coredynp.predictionW>0)
{
/*
* BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
* It is only a cache without all the buffers in the cache controller since it is more like a
* look up table than a cache with cache controller. When access miss, no load from other places
* such as main memory (not actively fill the misses), it is passively updated under two circumstances:
* 1) when BPT@ID stage finds out current is a taken branch while BTB missed
* 2) When BPT@ID stage predicts differently than BTB
* 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
* 4) when EXEU find out wrong target has been provided from BTB.
*
*/
size = XML->sys.core[ithCore].BTB.BTB_config[0];
line = XML->sys.core[ithCore].BTB.BTB_config[1];
assoc = XML->sys.core[ithCore].BTB.BTB_config[2];
banks = XML->sys.core[ithCore].BTB.BTB_config[3];
idx = debug?9:int(ceil(log2(size/line/assoc)));
// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
interface_ip.pure_cam = false;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = debug?32768:size;
interface_ip.line_sz = debug?64:line;
interface_ip.assoc = debug?8:assoc;
interface_ip.nbanks = debug?1:banks;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate;
interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty);
BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area);
area.set_area(area.get_area()+ BTB->local_result.area);
///cout<<"area="<<area<<endl;
BPT = new BranchPredictor(XML, ithCore, &interface_ip,coredynp);
area.set_area(area.get_area()+ BPT->area.get_area());
}
ID_inst = new inst_decoder(is_default, &interface_ip,
coredynp.opcode_length, 1/*Decoder should not know how many by itself*/,
coredynp.x86,
Core_device, coredynp.core_ty);
ID_operand = new inst_decoder(is_default, &interface_ip,
coredynp.arch_ireg_width, 1,
coredynp.x86,
Core_device, coredynp.core_ty);
ID_misc = new inst_decoder(is_default, &interface_ip,
8/* Prefix field etc upto 14B*/, 1,
coredynp.x86,
Core_device, coredynp.core_ty);
//TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer.
//So the dynamic power should be multiplied by a few times.
area.set_area(area.get_area()+ (ID_inst->area.get_area()
+ID_operand->area.get_area()
+ID_misc->area.get_area())*coredynp.decodeW);
}
BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
globalBPT(0),
localBPT(0),
L1_localBPT(0),
L2_localBPT(0),
chooser(0),
RAS(0),
exist(exist_)
{
/*
* Branch Predictor, accessed during ID stage.
* McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264,
* including global predictor, local two level predictor, and Chooser.
* The Branch predictor also includes a RAS (return address stack) for function calls
* Branch predictors are tagged by thread ID and modeled as 1-way associative $
* However RAS return address stacks are duplicated for each thread.
* TODO:Data Width need to be computed more precisely *
*/
if (!exist) return;
int tag, data;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
interface_ip.assoc = 1;
interface_ip.pure_cam = false;
if (coredynp.multithreaded)
{
tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS);
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
}
else
{
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
}
//Global predictor
data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0));
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area);
area.set_area(area.get_area()+ globalBPT->local_result.area);
//Local BPT (Level 1)
data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0));
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area);
area.set_area(area.get_area()+ L1_localBPT->local_result.area);
//Local BPT (Level 2)
data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0));
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty);
L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area);
area.set_area(area.get_area()+ L2_localBPT->local_result.area);
//Chooser
data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0));
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty);
chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area);
area.set_area(area.get_area()+ chooser->local_result.area);
//RAS return address stacks are Duplicated for each thread.
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
data = int(ceil(coredynp.pc_width/8.0));
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.predictionW;
interface_ip.num_wr_ports = coredynp.predictionW;
interface_ip.num_se_rd_ports = 0;
RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty);
RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads);
}
SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
int_inst_window(0),
fp_inst_window(0),
ROB(0),
instruction_selection(0),
exist(exist_)
{
if (!exist) return;
int tag, data;
bool is_default=true;
string tmp_name;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
if ((coredynp.core_ty==Inorder && coredynp.multithreaded))
{
//Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors
tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design
data = XML->sys.core[ithCore].instruction_length;
//NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures
//Software Developers Manual
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = int(ceil(data/8.0));
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.peak_issueW;
interface_ip.num_wr_ports = coredynp.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = coredynp.peak_issueW;
int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
//output_data_csv(iRS.RS.local_result);
Iw_height =int_inst_window->local_result.cache_ht;
/*
* selection logic
* In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
* instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
* at the issue stage.
*/
instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads,
&interface_ip, Core_device, coredynp.core_ty);
}
if (coredynp.core_ty==OOO)
{
/*
* CAM based instruction window
* For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
* For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
* It is written once and read twice(two operands) before an instruction can be issued.
* X86 instruction can be very long up to 15B. add instruction length in XML
*/
if(coredynp.scheu_ty==PhysicalRegFile)
{
tag = coredynp.phy_ireg_width;
// Each time only half of the tag is compared, but two tag should be stored.
// This underestimate the search power
data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0));
//Data width being divided by 2 means only after both operands available the whole data will be read out.
//This is modeled using two equivalent readouts with half of the data width
tmp_name = "InstIssueQueue";
}
else
{
tag = coredynp.phy_ireg_width;
// Each time only half of the tag is compared, but two tag should be stored.
// This underestimate the search power
data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+
2*coredynp.int_data_width)/2.0)/8.0));
//Data width being divided by 2 means only after both operands available the whole data will be read out.
//This is modeled using two equivalent readouts with half of the data width
tmp_name = "IntReservationStation";
}
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 0;
interface_ip.throughput = 2*1.0/clockRate;
interface_ip.latency = 2*1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.peak_issueW;
interface_ip.num_wr_ports = coredynp.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = coredynp.peak_issueW;
int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines);
Iw_height =int_inst_window->local_result.cache_ht;
//FU inst window
if(coredynp.scheu_ty==PhysicalRegFile)
{
tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared
data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0));
tmp_name = "FPIssueQueue";
}
else
{
tag = 2*coredynp.phy_ireg_width;
data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+
2*coredynp.fp_data_width)/8.0));
tmp_name = "FPReservationStation";
}
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 0;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.fp_issueW;
interface_ip.num_wr_ports = coredynp.fp_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = coredynp.fp_issueW;
fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty);
fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines);
fp_Iw_height =fp_inst_window->local_result.cache_ht;
if (XML->sys.core[ithCore].ROB_size >0)
{
/*
* if ROB_size = 0, then the target processor does not support hardware-based
* speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
* means branch must be resolved before instruction issued into instruction window, since
* there is no change to flush miss-predict branch path after instructions are issued in this situation.
*
* ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
* One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
* However, this approach is abandoned due to its high power and poor scalablility.
* McPAT uses current implementation of ROB as circular buffer.
* ROB is written once when instruction is issued and read once when the instruction is committed. *
*/
int robExtra = int(ceil(5 + log2(coredynp.num_hthreads)));
//5 bits are: busy, Issued, Finished, speculative, valid
if(coredynp.scheu_ty==PhysicalRegFile)
{
//PC is to id the instruction for recover exception.
//inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update
// data = int(ceil((robExtra+coredynp.pc_width +
// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0));
data = int(ceil((robExtra+coredynp.pc_width +
coredynp.phy_ireg_width)/8.0));
}
else
{
//in RS based OOO, ROB also contains value of destination reg
// data = int(ceil((robExtra+coredynp.pc_width +
// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
data = int(ceil((robExtra + coredynp.pc_width +
coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0));
}
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.peak_commitW;
interface_ip.num_wr_ports = coredynp.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines);
ROB_height =ROB->local_result.cache_ht;
}
instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size,
coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty);
}
}
LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
LSQ(0),
exist(exist_)
{
if (!exist) return;
int idx, tag, data, size, line, assoc, banks;
bool debug= false;
int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7];
interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
//Dcache
size = (int)XML->sys.core[ithCore].dcache.dcache_config[0];
line = (int)XML->sys.core[ithCore].dcache.dcache_config[1];
assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2];
banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3];
idx = debug?9:int(ceil(log2(size/line/assoc)));
tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0];
interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1];
interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2];
interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3];
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5];
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.is_cache = true;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least.
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty);
dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area);
area.set_area(area.get_area()+ dcache.caches->local_result.area);
//output_data_csv(dcache.caches.local_result);
//dCache controllers
//miss buffer
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area);
area.set_area(area.get_area()+ dcache.missb->local_result.area);
//output_data_csv(dcache.missb.local_result);
//fill buffer
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
data = dcache.caches->l_ip.line_sz;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1];
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area);
area.set_area(area.get_area()+ dcache.ifb->local_result.area);
//output_data_csv(dcache.ifb.local_result);
//prefetch buffer
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge.
data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution.
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data))));
interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty);
dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area);
area.set_area(area.get_area()+ dcache.prefetchb->local_result.area);
//output_data_csv(dcache.prefetchb.local_result);
//WBB
if (cache_p==Write_back)
{
tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;
data = dcache.caches->l_ip.line_sz;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = data;
interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 2;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty);
dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area);
area.set_area(area.get_area()+ dcache.wbb->local_result.area);
//output_data_csv(dcache.wbb.local_result);
}
/*
* LSU--in-order processors do not have separate load queue: unified lsq
* partitioned among threads
* it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
*/
tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS;
data = XML->sys.machine_bits;
interface_ip.is_cache = true;
interface_ip.line_sz = int(ceil(data/32.0))*4;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty);
LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area);
area.set_area(area.get_area()+ LSQ->local_result.area);
area.set_area(area.get_area()*cdb_overhead);
//output_data_csv(LSQ.LSQ.local_result);
lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
interface_ip.line_sz = int(ceil(data/32.0))*4;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports;
LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty);
LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area);
area.set_area(area.get_area()+ LoadQ->local_result.area);
area.set_area(area.get_area()*cdb_overhead);
//output_data_csv(LoadQ.LoadQ.local_result);
lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/
}
}
MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
itlb(0),
dtlb(0),
exist(exist_)
{
if (!exist) return;
int tag, data;
bool debug= false;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.specific_tag = 1;
//Itlb TLBs are partioned among threads according to Nigara and Nehalem
tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
interface_ip.tag_w = tag;
interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;
itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty);
itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area);
area.set_area(area.get_area()+ itlb->local_result.area);
//output_data_csv(itlb.tlb.local_result);
//dtlb
tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS;
data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size)));
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 0;
interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate;
interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports;
dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty);
dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area);
area.set_area(area.get_area()+ dtlb->local_result.area);
//output_data_csv(dtlb.tlb.local_result);
}
RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
IRF (0),
FRF (0),
RFWIN (0),
exist(exist_)
{
/*
* processors have separate architectural register files for each thread.
* therefore, the bypass buses need to travel across all the register files.
*/
if (!exist) return;
int data;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
//**********************************IRF***************************************
data = coredynp.int_data_width;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = int(ceil(data/32.0))*4;
interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
interface_ip.num_rd_ports = 2*coredynp.peak_issueW;
interface_ip.num_wr_ports = coredynp.peak_issueW;
interface_ip.num_se_rd_ports = 0;
IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead);
//area.set_area(area.get_area()*cdb_overhead);
//output_data_csv(IRF.RF.local_result);
//**********************************FRF***************************************
data = coredynp.fp_data_width;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = int(ceil(data/32.0))*4;
interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width;
interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width;
interface_ip.num_se_rd_ports = 0;
FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty);
FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead);
//area.set_area(area.get_area()*cdb_overhead);
//output_data_csv(FRF.RF.local_result);
int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead);
//since a EXU is associated with each pipeline, the cdb should not have longer length.
if (coredynp.regWindowing)
{
//*********************************REG_WIN************************************
data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = int(ceil(data/8.0));
interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 4.0/clockRate;
interface_ip.latency = 4.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen.
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty);
RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines);
//output_data_csv(RFWIN.RF.local_result);
}
}
EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
lsq_height(lsq_height_),
coredynp(dyn_p_),
rfu(0),
scheu(0),
fp_u(0),
exeu(0),
mul(0),
int_bypass(0),
intTagBypass(0),
int_mul_bypass(0),
intTag_mul_Bypass(0),
fp_bypass(0),
fpTagBypass(0),
exist(exist_)
{
if (!exist) return;
double fu_height = 0.0;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
rfu = new RegFU(XML, ithCore, &interface_ip,coredynp);
scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp);
exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU);
area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() );
fu_height = exeu->FU_height;
if (coredynp.num_fpus >0)
{
fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU);
area.set_area(area.get_area()+ fp_u->area.get_area());
}
if (coredynp.num_muls >0)
{
mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL);
area.set_area(area.get_area()+ mul->area.get_area());
fu_height += mul->FU_height;
}
/*
* broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast
* integer by pass has two paths and fp has 3 paths.
* on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus
*/
if (XML->sys.Embedded)
{
interface_ip.wt =Global_30;
interface_ip.wire_is_mat_type = 0;
interface_ip.wire_os_mat_type = 0;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
}
else
{
interface_ip.wt =Global;
interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used
interface_ip.wire_os_mat_type = 2;
interface_ip.throughput = 10.0/clockRate; //Do not care
interface_ip.latency = 10.0/clockRate;
}
if (coredynp.core_ty==Inorder)
{
int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32),
rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area());
intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
if (coredynp.num_muls>0)
{
int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
}
if (coredynp.num_fpus>0)
{
fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5),
rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState,
rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
}
}
else
{//OOO
if (coredynp.scheu_ty==PhysicalRegFile)
{
/* For physical register based OOO,
* data broadcast interconnects cover across functional units, lsq, inst windows and register files,
* while tag broadcast interconnects also cover across ROB
*/
int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
if (coredynp.num_muls>0)
{
int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
}
if (coredynp.num_fpus>0)
{
fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
}
}
else
{
/*
* In RS based processor both data and tag are broadcast together,
* covering functional units, lsq, nst windows, register files, and ROBs
*/
int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area());
bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area());
if (coredynp.num_muls>0)
{
int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)),
rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width,
rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area());
bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area());
}
if (coredynp.num_fpus>0)
{
fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)),
rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width,
rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3,
false, 1.0, coredynp.opt_local, coredynp.core_ty);
bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area());
bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area());
}
}
}
area.set_area(area.get_area()+ bypass.area.get_area());
}
RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
coredynp(dyn_p_),
iFRAT(0),
fFRAT(0),
iRRAT(0),
fRRAT(0),
ifreeL(0),
ffreeL(0),
idcl(0),
fdcl(0),
RAHT(0),
exist(exist_)
{
/*
* Although renaming logic maybe be used in in-order processors,
* McPAT assumes no renaming logic is used since the performance gain is very limited and
* the only major inorder processor with renaming logic is Itainium
* that is a VLIW processor and different from current McPAT's model.
* physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT;
* i,f prefix mean int and fp
* RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires.
* FRAT will be read twice and written once per instruction;
* RRAT will be write once per instruction when committing and reads out all when context switch
* checkpointing is implicit
* Renaming logic is duplicated for each different hardware threads
*
* No Dual-RAT is needed in RS-based OOO processors,
* however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
* to make sure all the renamings associated with the ROB to be released are updated at the same time.
* RAM scheme has # ARchi Reg entry with each entry hold phy reg tag,
* CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag,
*
* Both RAM and CAM have same DCL
*/
if (!exist) return;
int tag, data, out_w;
// interface_ip.wire_is_mat_type = 0;
// interface_ip.wire_os_mat_type = 0;
// interface_ip.wt = Global_30;
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
if (coredynp.core_ty==OOO)
{
//integer pipeline
if (coredynp.scheu_ty==PhysicalRegFile)
{
if (coredynp.rm_ty ==RAMbased)
{ //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions
data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
// data = int(ceil(coredynp.phy_ireg_width/8.0));
out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//the extra one port is for GCs
interface_ip.num_rd_ports = 2*coredynp.decodeW;
interface_ip.num_wr_ports = coredynp.decodeW;
interface_ip.num_se_rd_ports = 0;
iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ iFRAT->area.get_area());
// //RAHT According to Intel, combine GC with FRAT is very costly.
// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry);
// out_w = data;
// interface_ip.is_cache = false;
// interface_ip.pure_cam = false;
// interface_ip.pure_ram = true;
// interface_ip.line_sz = data;
// interface_ip.cache_sz = data*coredynp.globalCheckpoint;
// interface_ip.assoc = 1;
// interface_ip.nbanks = 1;
// interface_ip.out_w = out_w*8;
// interface_ip.access_mode = 0;
// interface_ip.throughput = 1.0/clockRate;
// interface_ip.latency = 1.0/clockRate;
// interface_ip.obj_func_dyn_energy = 0;
// interface_ip.obj_func_dyn_power = 0;
// interface_ip.obj_func_leak_power = 0;
// interface_ip.obj_func_cycle_t = 1;
// interface_ip.num_rw_ports = 1;//the extra one port is for GCs
// interface_ip.num_rd_ports = 2*coredynp.decodeW;
// interface_ip.num_wr_ports = coredynp.decodeW;
// interface_ip.num_se_rd_ports = 0;
// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT");
// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
// area.set_area(area.get_area()+ iFRAT->area.get_area());
//FRAT floating point
data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
out_w = int(ceil(coredynp.phy_freg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//the extra one port is for GCs
interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
interface_ip.num_wr_ports = coredynp.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ fFRAT->area.get_area());
}
else if ((coredynp.rm_ty ==CAMbased))
{
//FRAT
tag = coredynp.arch_ireg_width;
data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
out_w = int(ceil (coredynp.arch_ireg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//for GCs
interface_ip.num_rd_ports = coredynp.decodeW;
interface_ip.num_wr_ports = coredynp.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= 2*coredynp.decodeW;
iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ iFRAT->area.get_area());
//FRAT for FP
tag = coredynp.arch_freg_width;
data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out
out_w = int(ceil (coredynp.arch_freg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//for GCs
interface_ip.num_rd_ports = coredynp.fp_decodeW;
interface_ip.num_wr_ports = coredynp.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ fFRAT->area.get_area());
}
//RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping
data = int(ceil(coredynp.phy_ireg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width;
interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width;
interface_ip.num_se_rd_ports = 0;
iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ iRRAT->area.get_area());
//RRAT for FP
data = int(ceil(coredynp.phy_freg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = coredynp.fp_decodeW;
interface_ip.num_wr_ports = coredynp.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ fRRAT->area.get_area());
//Freelist of renaming unit always RAM based
//Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
// 2)When instruction commits the Phyregisters/ROB needed to be recycled.
//therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
data = int(ceil(coredynp.phy_ireg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//TODO
interface_ip.num_rd_ports = coredynp.decodeW;
interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width;
//every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers
interface_ip.num_se_rd_ports = 0;
ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ ifreeL->area.get_area());
//freelist for FP
data = int(ceil(coredynp.phy_freg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*coredynp.num_ffreelist_entries;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;
interface_ip.num_rd_ports = coredynp.fp_decodeW;
interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width;
interface_ip.num_se_rd_ports = 0;
ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ ffreeL->area.get_area());
idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
else if (coredynp.scheu_ty==ReservationStation){
if (coredynp.rm_ty ==RAMbased){
/*
* however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry,
* to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time.
* RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than
* CAM based RAT so that it is more scalable as number of ROB/physical regs increases.
*/
tag = coredynp.phy_ireg_width;
data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0));
out_w = int(ceil(coredynp.phy_ireg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//the extra one port is for GCs
interface_ip.num_rd_ports = 2*coredynp.decodeW;
interface_ip.num_wr_ports = coredynp.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= coredynp.commitW;//TODO
iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
iFRAT->local_result.adjust_area();
iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ iFRAT->area.get_area());
//FP
tag = coredynp.phy_freg_width;
data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0));
out_w = int(ceil(coredynp.phy_freg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//the extra one port is for GCs
interface_ip.num_rd_ports = 2*coredynp.fp_decodeW;
interface_ip.num_wr_ports = coredynp.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width
fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
fFRAT->local_result.adjust_area();
fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ fFRAT->area.get_area());
}
else if ((coredynp.rm_ty ==CAMbased))
{
//FRAT
tag = coredynp.arch_ireg_width;
data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
out_w = int(ceil (coredynp.arch_ireg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//for GCs
interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO
interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width;
iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ iFRAT->area.get_area());
//FRAT
tag = coredynp.arch_freg_width;
data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out
out_w = int(ceil (coredynp.arch_freg_width/8.0));
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size;
interface_ip.assoc = 0;
interface_ip.nbanks = 1;
interface_ip.out_w = out_w*8;
interface_ip.specific_tag = 1;
interface_ip.tag_w = tag;
interface_ip.access_mode = 2;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//for GCs
interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO;
interface_ip.num_wr_ports = coredynp.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports= 2*coredynp.fp_decodeW;
fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty);
fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ fFRAT->area.get_area());
}
//No RRAT for RS based OOO
//Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
data = int(ceil(coredynp.phy_ireg_width/8.0));
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.line_sz = data;
interface_ip.cache_sz = data*coredynp.num_ifreelist_entries;
interface_ip.assoc = 1;
interface_ip.nbanks = 1;
interface_ip.out_w = interface_ip.line_sz*8;
interface_ip.access_mode = 1;
interface_ip.throughput = 1.0/clockRate;
interface_ip.latency = 1.0/clockRate;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;//TODO
interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;
interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width;
interface_ip.num_se_rd_ports = 0;
ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty);
ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads);
area.set_area(area.get_area()+ ifreeL->area.get_area());
idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
}
if (coredynp.core_ty==Inorder&& coredynp.issueW>1)
{
/* Dependency check logic will only present when decode(issue) width>1.
* Multiple issue in order processor can do without renaming, but dcl is a must.
*/
idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR
fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width);
}
}
Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_)
:XML(XML_interface),
ithCore(ithCore_),
interface_ip(*interface_ip_),
ifu (0),
lsu (0),
mmu (0),
exu (0),
rnu (0),
corepipe (0),
undiffCore (0),
l2cache (0)
{
/*
* initialize, compute and optimize individual components.
*/
double pipeline_area_per_unit;
if (XML->sys.Private_L2)
{
l2cache = new SharedCache(XML,ithCore, &interface_ip);
}
// interface_ip.wire_is_mat_type = 2;
// interface_ip.wire_os_mat_type = 2;
// interface_ip.wt =Global_30;
set_core_param();
clockRate = coredynp.clockRate;
executionTime = coredynp.executionTime;
ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp);
lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp);
mmu = new MemManU (XML, ithCore, &interface_ip,coredynp);
exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp);
undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp);
if (coredynp.core_ty==OOO)
{
rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp);
}
corepipe = new Pipeline(&interface_ip,coredynp);
if (coredynp.core_ty==OOO)
{
pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0;
if (rnu->exist)
{
rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
}
}
else {
pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0;
}
//area.set_area(area.get_area()+ corepipe->area.get_area());
if (ifu->exist)
{
ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + ifu->area.get_area());
}
if (lsu->exist)
{
lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + lsu->area.get_area());
}
if (exu->exist)
{
exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area()+exu->area.get_area());
}
if (mmu->exist)
{
mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area()+mmu->area.get_area());
}
if (coredynp.core_ty==OOO)
{
if (rnu->exist)
{
area.set_area(area.get_area() + rnu->area.get_area());
}
}
if (undiffCore->exist)
{
area.set_area(area.get_area() + undiffCore->area.get_area());
}
if (XML->sys.Private_L2)
{
area.set_area(area.get_area() + l2cache->area.get_area());
}
// //clock power
// clockNetwork.init_wire_external(is_default, &interface_ip);
// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb
// clockNetwork.end_wiring_level =5;//toplevel metal
// clockNetwork.start_wiring_level =5;//toplevel metal
// clockNetwork.num_regs = corepipe.tot_stage_vector;
// clockNetwork.optimize_wire();
}
void BranchPredictor::computeEnergy(bool is_tdp)
{
if (!exist) return;
double r_access;
double w_access;
if (is_tdp)
{
r_access = coredynp.predictionW*coredynp.BR_duty_cycle;
w_access = 0*coredynp.BR_duty_cycle;
globalBPT->stats_t.readAc.access = r_access;
globalBPT->stats_t.writeAc.access = w_access;
globalBPT->tdp_stats = globalBPT->stats_t;
L1_localBPT->stats_t.readAc.access = r_access;
L1_localBPT->stats_t.writeAc.access = w_access;
L1_localBPT->tdp_stats = L1_localBPT->stats_t;
L2_localBPT->stats_t.readAc.access = r_access;
L2_localBPT->stats_t.writeAc.access = w_access;
L2_localBPT->tdp_stats = L2_localBPT->stats_t;
chooser->stats_t.readAc.access = r_access;
chooser->stats_t.writeAc.access = w_access;
chooser->tdp_stats = chooser->stats_t;
RAS->stats_t.readAc.access = r_access;
RAS->stats_t.writeAc.access = w_access;
RAS->tdp_stats = RAS->stats_t;
}
else
{
//The resolution of BPT accesses is coarse, but this is
//because most simulators cannot track finer grained details
r_access = XML->sys.core[ithCore].branch_instructions;
w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0
globalBPT->stats_t.readAc.access = r_access;
globalBPT->stats_t.writeAc.access = w_access;
globalBPT->rtp_stats = globalBPT->stats_t;
L1_localBPT->stats_t.readAc.access = r_access;
L1_localBPT->stats_t.writeAc.access = w_access;
L1_localBPT->rtp_stats = L1_localBPT->stats_t;
L2_localBPT->stats_t.readAc.access = r_access;
L2_localBPT->stats_t.writeAc.access = w_access;
L2_localBPT->rtp_stats = L2_localBPT->stats_t;
chooser->stats_t.readAc.access = r_access;
chooser->stats_t.writeAc.access = w_access;
chooser->rtp_stats = chooser->stats_t;
RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls;
RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls;
RAS->rtp_stats = RAS->stats_t;
}
globalBPT->power_t.reset();
L1_localBPT->power_t.reset();
L2_localBPT->power_t.reset();
chooser->power_t.reset();
RAS->power_t.reset();
globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access +
globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic;
L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access +
L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic;
L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access +
L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic;
chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access +
chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic;
RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access +
RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic;
if (is_tdp)
{
globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg;
RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power;
}
else
{
globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg;
L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg;
L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg;
chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg;
RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread;
rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power;
}
}
void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << indent_str<< "Global Predictor:" << endl;
cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str << "Local Predictor:" << endl;
cout << indent_str << "L1_Local Predictor:" << endl;
cout << indent_str_next << "Area = " << L1_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << L1_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? L1_localBPT->power.readOp.longer_channel_leakage:L1_localBPT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << L1_localBPT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << L1_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str << "L2_Local Predictor:" << endl;
cout << indent_str_next << "Area = " << L2_localBPT->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << L2_localBPT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? L2_localBPT->power.readOp.longer_channel_leakage:L2_localBPT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << L2_localBPT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << L2_localBPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str << "Chooser:" << endl;
cout << indent_str_next << "Area = " << chooser->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << chooser->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? chooser->power.readOp.longer_channel_leakage:chooser->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << chooser->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << chooser->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str << "RAS:" << endl;
cout << indent_str_next << "Area = " << RAS->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << RAS->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? RAS->power.readOp.longer_channel_leakage:RAS->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << RAS->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << RAS->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
else
{
// cout << indent_str_next << "Global Predictor Peak Dynamic = " << globalBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Global Predictor Subthreshold Leakage = " << globalBPT->rt_power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Global Predictor Gate Leakage = " << globalBPT->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Local Predictor Peak Dynamic = " << L1_localBPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Local Predictor Subthreshold Leakage = " << L1_localBPT->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Local Predictor Gate Leakage = " << L1_localBPT->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Chooser Peak Dynamic = " << chooser->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Chooser Subthreshold Leakage = " << chooser->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Chooser Gate Leakage = " << chooser->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "RAS Peak Dynamic = " << RAS->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "RAS Subthreshold Leakage = " << RAS->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "RAS Gate Leakage = " << RAS->rt_power.readOp.gate_leakage << " W" << endl;
}
}
void InstFetchU::computeEnergy(bool is_tdp)
{
if (!exist) return;
if (is_tdp)
{
//init stats for Peak
icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle;
icache.caches->stats_t.readAc.miss = 0;
icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
icache.caches->tdp_stats = icache.caches->stats_t;
icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports;
icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports;
icache.missb->tdp_stats = icache.missb->stats_t;
icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports;
icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
icache.ifb->tdp_stats = icache.ifb->stats_t;
icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports;
icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports;
icache.prefetchb->tdp_stats = icache.prefetchb->stats_t;
IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width;
IB->tdp_stats = IB->stats_t;
if (coredynp.predictionW>0)
{
BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses;
BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses;
}
ID_inst->stats_t.readAc.access = coredynp.decodeW;
ID_operand->stats_t.readAc.access = coredynp.decodeW;
ID_misc->stats_t.readAc.access = coredynp.decodeW;
ID_inst->tdp_stats = ID_inst->stats_t;
ID_operand->tdp_stats = ID_operand->stats_t;
ID_misc->tdp_stats = ID_misc->stats_t;
}
else
{
//init stats for Runtime Dynamic (RTP)
icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses;
icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses;
icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss;
icache.caches->rtp_stats = icache.caches->stats_t;
icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
icache.missb->rtp_stats = icache.missb->stats_t;
icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
icache.ifb->rtp_stats = icache.ifb->stats_t;
icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss;
icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss;
icache.prefetchb->rtp_stats = icache.prefetchb->stats_t;
IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions;
IB->rtp_stats = IB->stats_t;
if (coredynp.predictionW>0)
{
BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions;
BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions;
BTB->rtp_stats = BTB->stats_t;
}
ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions;
ID_inst->rtp_stats = ID_inst->stats_t;
ID_operand->rtp_stats = ID_operand->stats_t;
ID_misc->rtp_stats = ID_misc->stats_t;
}
icache.power_t.reset();
IB->power_t.reset();
// ID_inst->power_t.reset();
// ID_operand->power_t.reset();
// ID_misc->power_t.reset();
if (coredynp.predictionW>0)
{
BTB->power_t.reset();
}
icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+
//icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+
icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel
icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache
icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic +
icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic +
icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic;
icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic +
icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic;
IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access +
IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic;
if (coredynp.predictionW>0)
{
BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access +
BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic;
BPT->computeEnergy(is_tdp);
}
if (is_tdp)
{
// icache.power = icache.power_t +
// (icache.caches->local_result.power)*pppm_lkg +
// (icache.missb->local_result.power +
// icache.ifb->local_result.power +
// icache.prefetchb->local_result.power)*pppm_Isub;
icache.power = icache.power_t +
(icache.caches->local_result.power +
icache.missb->local_result.power +
icache.ifb->local_result.power +
icache.prefetchb->local_result.power)*pppm_lkg;
IB->power = IB->power_t + IB->local_result.power*pppm_lkg;
power = power + icache.power + IB->power;
if (coredynp.predictionW>0)
{
BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg;
power = power + BTB->power + BPT->power;
}
ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic;
ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic;
ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic;
ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access;
ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access;
ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access;
power = power + (ID_inst->power +
ID_operand->power +
ID_misc->power);
}
else
{
// icache.rt_power = icache.power_t +
// (icache.caches->local_result.power)*pppm_lkg +
// (icache.missb->local_result.power +
// icache.ifb->local_result.power +
// icache.prefetchb->local_result.power)*pppm_Isub;
icache.rt_power = icache.power_t +
(icache.caches->local_result.power +
icache.missb->local_result.power +
icache.ifb->local_result.power +
icache.prefetchb->local_result.power)*pppm_lkg;
IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg;
rt_power = rt_power + icache.rt_power + IB->rt_power;
if (coredynp.predictionW>0)
{
BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg;
rt_power = rt_power + BTB->rt_power + BPT->rt_power;
}
ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access;
ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access;
ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access;
rt_power = rt_power + (ID_inst->rt_power +
ID_operand->rt_power +
ID_misc->rt_power);
}
}
void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << indent_str<< "Instruction Cache:" << endl;
cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (coredynp.predictionW>0)
{
cout << indent_str<< "Branch Target Buffer:" << endl;
cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (BPT->exist)
{
cout << indent_str<< "Branch Predictor:" << endl;
cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel>3)
{
BPT->displayEnergy(indent+4, plevel, is_tdp);
}
}
}
cout << indent_str<< "Instruction Buffer:" << endl;
cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "Instruction Decoder:" << endl;
cout << indent_str_next << "Area = " << (ID_inst->area.get_area() +
ID_operand->area.get_area() +
ID_misc->area.get_area())*coredynp.decodeW*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << (ID_inst->power.readOp.dynamic +
ID_operand->power.readOp.dynamic +
ID_misc->power.readOp.dynamic)*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? (ID_inst->power.readOp.longer_channel_leakage +
ID_operand->power.readOp.longer_channel_leakage +
ID_misc->power.readOp.longer_channel_leakage):
(ID_inst->power.readOp.leakage +
ID_operand->power.readOp.leakage +
ID_misc->power.readOp.leakage)) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << (ID_inst->power.readOp.gate_leakage +
ID_operand->power.readOp.gate_leakage +
ID_misc->power.readOp.gate_leakage) << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << (ID_inst->rt_power.readOp.dynamic +
ID_operand->rt_power.readOp.dynamic +
ID_misc->rt_power.readOp.dynamic)/executionTime << " W" << endl;
cout <<endl;
}
else
{
// cout << indent_str_next << "Instruction Cache Peak Dynamic = " << icache.rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Instruction Cache Subthreshold Leakage = " << icache.rt_power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Instruction Cache Gate Leakage = " << icache.rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Instruction Buffer Peak Dynamic = " << IB->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Instruction Buffer Subthreshold Leakage = " << IB->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Instruction Buffer Gate Leakage = " << IB->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Branch Target Buffer Peak Dynamic = " << BTB->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Branch Target Buffer Subthreshold Leakage = " << BTB->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Branch Target Buffer Gate Leakage = " << BTB->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Branch Predictor Peak Dynamic = " << BPT->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Branch Predictor Subthreshold Leakage = " << BPT->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Branch Predictor Gate Leakage = " << BPT->rt_power.readOp.gate_leakage << " W" << endl;
}
}
void RENAMINGU::computeEnergy(bool is_tdp)
{
if (!exist) return;
double pppm_t[4] = {1,1,1,1};
if (is_tdp)
{//init stats for Peak
if (coredynp.core_ty==OOO){
if (coredynp.scheu_ty==PhysicalRegFile)
{
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
iFRAT->tdp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
fFRAT->tdp_stats = fFRAT->stats_t;
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
iFRAT->tdp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
fFRAT->tdp_stats = fFRAT->stats_t;
}
iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports;
iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports;
iRRAT->tdp_stats = iRRAT->stats_t;
fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports;
fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports;
fRRAT->tdp_stats = fRRAT->stats_t;
ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;;
ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
ifreeL->tdp_stats = ifreeL->stats_t;
ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports;
ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports;
ffreeL->tdp_stats = ffreeL->stats_t;
}
else if (coredynp.scheu_ty==ReservationStation){
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports;
iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports;
iFRAT->tdp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports;
fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports;
fFRAT->tdp_stats = fFRAT->stats_t;
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports;
iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports;
iFRAT->tdp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports;
fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports;
fFRAT->tdp_stats = fFRAT->stats_t;
}
//Unified free list for both int and fp
ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;
ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports;
ifreeL->tdp_stats = ifreeL->stats_t;
}
idcl->stats_t.readAc.access = coredynp.decodeW;
fdcl->stats_t.readAc.access = coredynp.decodeW;
idcl->tdp_stats = idcl->stats_t;
fdcl->tdp_stats = fdcl->stats_t;
}
else
{
if (coredynp.issueW>1)
{
idcl->stats_t.readAc.access = coredynp.decodeW;
fdcl->stats_t.readAc.access = coredynp.decodeW;
idcl->tdp_stats = idcl->stats_t;
fdcl->tdp_stats = fdcl->stats_t;
}
}
}
else
{//init stats for Runtime Dynamic (RTP)
if (coredynp.core_ty==OOO){
if (coredynp.scheu_ty==PhysicalRegFile)
{
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
iFRAT->rtp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
fFRAT->rtp_stats = fFRAT->stats_t;
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
iFRAT->rtp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
fFRAT->rtp_stats = fFRAT->stats_t;
}
iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16
iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
iRRAT->rtp_stats = iRRAT->stats_t;
fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16
fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
fRRAT->rtp_stats = fRRAT->stats_t;
ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes;
ifreeL->rtp_stats = ifreeL->stats_t;
ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes;
ffreeL->rtp_stats = ffreeL->stats_t;
}
else if (coredynp.scheu_ty==ReservationStation){
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs.
iFRAT->rtp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions;
fFRAT->rtp_stats = fFRAT->stats_t;
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads;
iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes;
iFRAT->rtp_stats = iFRAT->stats_t;
fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads;
fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes;
fFRAT->rtp_stats = fFRAT->stats_t;
}
//Unified free list for both int and fp since the ROB act as physcial registers
ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads +
XML->sys.core[ithCore].fp_rename_reads;
ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes +
XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group
//are terminated early
ifreeL->rtp_stats = ifreeL->stats_t;
}
idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads;
fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes;
idcl->rtp_stats = idcl->stats_t;
fdcl->rtp_stats = fdcl->stats_t;
}
else
{
if (coredynp.issueW>1)
{
idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions;
fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions;
idcl->rtp_stats = idcl->stats_t;
fdcl->rtp_stats = fdcl->stats_t;
}
}
}
/* Compute engine */
if (coredynp.core_ty==OOO)
{
if (coredynp.scheu_ty==PhysicalRegFile)
{
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->power_t.reset();
fFRAT->power_t.reset();
iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
*(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
*(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->power_t.reset();
fFRAT->power_t.reset();
iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
*(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
*(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
}
iRRAT->power_t.reset();
fRRAT->power_t.reset();
ifreeL->power_t.reset();
ffreeL->power_t.reset();
iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic
+iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic);
fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic
+fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic);
ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
+ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic
+ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic);
}
else if (coredynp.scheu_ty==ReservationStation)
{
if (coredynp.rm_ty ==RAMbased)
{
iFRAT->power_t.reset();
fFRAT->power_t.reset();
iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
*(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic)
+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic
+iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic);
fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
*(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic)
+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic
+fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic);
}
else if ((coredynp.rm_ty ==CAMbased))
{
iFRAT->power_t.reset();
fFRAT->power_t.reset();
iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access
*(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic)
+iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic);
fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access
*(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic)
+fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic);
}
ifreeL->power_t.reset();
ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic
+ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic);
}
}
else
{
if (coredynp.issueW>1)
{
idcl->power_t.reset();
fdcl->power_t.reset();
set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
idcl->power_t = idcl->power * pppm_t;
set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access);
fdcl->power_t = fdcl->power * pppm_t;
}
}
//assign value to tpd and rtp
if (is_tdp)
{
if (coredynp.core_ty==OOO)
{
if (coredynp.scheu_ty==PhysicalRegFile)
{
iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
power = power + (iFRAT->power + fFRAT->power)
+ (iRRAT->power + fRRAT->power)
+ (ifreeL->power + ffreeL->power);
}
else if (coredynp.scheu_ty==ReservationStation)
{
iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
power = power + (iFRAT->power + fFRAT->power)
+ ifreeL->power;
}
}
else
{
power = power + idcl->power_t + fdcl->power_t;
}
}
else
{
if (coredynp.core_ty==OOO)
{
if (coredynp.scheu_ty==PhysicalRegFile)
{
iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread;
fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread;
ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread;
rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
+ (iRRAT->rt_power + fRRAT->rt_power)
+ (ifreeL->rt_power + ffreeL->rt_power);
}
else if (coredynp.scheu_ty==ReservationStation)
{
iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t;
fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t;
ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread;
rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power)
+ ifreeL->rt_power;
}
}
else
{
rt_power = rt_power + idcl->power_t + fdcl->power_t;
}
}
}
void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
if (coredynp.core_ty==OOO)
{
cout << indent_str<< "Int Front End RAT:" << endl;
cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "FP Front End RAT:" << endl;
cout << indent_str_next << "Area = " << fFRAT->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << fFRAT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? fFRAT->power.readOp.longer_channel_leakage:fFRAT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << fFRAT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << fFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<<"Free List:" << endl;
cout << indent_str_next << "Area = " << ifreeL->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << ifreeL->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? ifreeL->power.readOp.longer_channel_leakage:ifreeL->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << ifreeL->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << ifreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (coredynp.scheu_ty==PhysicalRegFile)
{
cout << indent_str<< "Int Retire RAT: " << endl;
cout << indent_str_next << "Area = " << iRRAT->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << iRRAT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? iRRAT->power.readOp.longer_channel_leakage:iRRAT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << iRRAT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << iRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "FP Retire RAT:" << endl;
cout << indent_str_next << "Area = " << fRRAT->area.get_area() *1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << fRRAT->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? fRRAT->power.readOp.longer_channel_leakage:fRRAT->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << fRRAT->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << fRRAT->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "FP Free List:" << endl;
cout << indent_str_next << "Area = " << ffreeL->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << ffreeL->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? ffreeL->power.readOp.longer_channel_leakage:ffreeL->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << ffreeL->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << ffreeL->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
}
else
{
cout << indent_str<< "Int DCL:" << endl;
cout << indent_str_next << "Peak Dynamic = " << idcl->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? idcl->power.readOp.longer_channel_leakage:idcl->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << idcl->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << idcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout << indent_str<<"FP DCL:" << endl;
cout << indent_str_next << "Peak Dynamic = " << fdcl->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? fdcl->power.readOp.longer_channel_leakage:fdcl->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << fdcl->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << fdcl->rt_power.readOp.dynamic/executionTime << " W" << endl;
}
}
else
{
if (coredynp.core_ty==OOO)
{
cout << indent_str_next << "Int Front End RAT Peak Dynamic = " << iFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Int Front End RAT Subthreshold Leakage = " << iFRAT->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Int Front End RAT Gate Leakage = " << iFRAT->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "FP Front End RAT Peak Dynamic = " << fFRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "FP Front End RAT Subthreshold Leakage = " << fFRAT->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "FP Front End RAT Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Free List Peak Dynamic = " << ifreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Free List Subthreshold Leakage = " << ifreeL->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
if (coredynp.scheu_ty==PhysicalRegFile)
{
cout << indent_str_next << "Int Retire RAT Peak Dynamic = " << iRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Int Retire RAT Subthreshold Leakage = " << iRRAT->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Int Retire RAT Gate Leakage = " << iRRAT->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "FP Retire RAT Peak Dynamic = " << fRRAT->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "FP Retire RAT Subthreshold Leakage = " << fRRAT->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "FP Retire RAT Gate Leakage = " << fRRAT->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "FP Free List Peak Dynamic = " << ffreeL->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "FP Free List Subthreshold Leakage = " << ffreeL->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "FP Free List Gate Leakage = " << fFRAT->rt_power.readOp.gate_leakage << " W" << endl;
}
}
else
{
cout << indent_str_next << "Int DCL Peak Dynamic = " << idcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Int DCL Subthreshold Leakage = " << idcl->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Int DCL Gate Leakage = " << idcl->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "FP DCL Peak Dynamic = " << fdcl->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "FP DCL Subthreshold Leakage = " << fdcl->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "FP DCL Gate Leakage = " << fdcl->rt_power.readOp.gate_leakage << " W" << endl;
}
}
}
void SchedulerU::computeEnergy(bool is_tdp)
{
if (!exist) return;
double ROB_duty_cycle;
// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0
// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1;
ROB_duty_cycle = 1;
//init stats
if (is_tdp)
{
if (coredynp.core_ty==OOO)
{
int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
int_inst_window->tdp_stats = int_inst_window->stats_t;
fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines;
fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines;
fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines;
fp_inst_window->tdp_stats = fp_inst_window->stats_t;
if (XML->sys.core[ithCore].ROB_size >0)
{
ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle;
ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle;
ROB->tdp_stats = ROB->stats_t;
/*
* When inst commits, ROB must be read.
* Because for Physcial register based cores, physical register tag in ROB
* need to be read out and write into RRAT/CAM based RAT.
* For RS based cores, register content that stored in ROB must be
* read out and stored in architectural registers.
*
* if no-register is involved, the ROB read out operation when instruction commits can be ignored.
* assuming 20% insts. belong this type.
* TODO: ROB duty_cycle need to be revisited
*/
}
}
else if (coredynp.multithreaded)
{
int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports;
int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports;
int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines;
int_inst_window->tdp_stats = int_inst_window->stats_t;
}
}
else
{//rtp
if (coredynp.core_ty==OOO)
{
int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads;
int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes;
int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses;
int_inst_window->rtp_stats = int_inst_window->stats_t;
fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads;
fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes;
fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses;
fp_inst_window->rtp_stats = fp_inst_window->stats_t;
if (XML->sys.core[ithCore].ROB_size >0)
{
ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads;
ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes;
/* ROB need to be updated in RS based OOO when new values are produced,
* this update may happen before the commit stage when ROB entry is released
* 1. ROB write at instruction inserted in
* 2. ROB write as results produced (for RS based OOO only)
* 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF
* For Physical reg based OOO, no data stored in ROB, but register tags need to be
* read out and used to set the RRAT and to recycle the register tag to free list buffer
*/
ROB->rtp_stats = ROB->stats_t;
}
}
else if (coredynp.multithreaded)
{
int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions;
int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions);
int_inst_window->rtp_stats = int_inst_window->stats_t;
}
}
//computation engine
if (coredynp.core_ty==OOO)
{
int_inst_window->power_t.reset();
fp_inst_window->power_t.reset();
/* each instruction needs to write to scheduler, read out when all resources and source operands are ready
* two search ops with one for each source operand
*
*/
int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
+ int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
+ int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
+ int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic;
fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access
+ fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access
+ fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access
+ fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
if (XML->sys.core[ithCore].ROB_size >0)
{
ROB->power_t.reset();
ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access +
ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic;
}
}
else if (coredynp.multithreaded)
{
int_inst_window->power_t.reset();
int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access
+ int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access
+ int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access
+ int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic;
}
//assign values
if (is_tdp)
{
if (coredynp.core_ty==OOO)
{
int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
power = power + int_inst_window->power + fp_inst_window->power;
if (XML->sys.core[ithCore].ROB_size >0)
{
ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg;
power = power + ROB->power;
}
}
else if (coredynp.multithreaded)
{
// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
power = power + int_inst_window->power;
}
}
else
{//rtp
if (coredynp.core_ty==OOO)
{
int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power;
if (XML->sys.core[ithCore].ROB_size >0)
{
ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg;
rt_power = rt_power + ROB->rt_power;
}
}
else if (coredynp.multithreaded)
{
// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg;
rt_power = rt_power + int_inst_window->rt_power;
}
}
// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1);
// cout<<"Scheduler power="<<power.readOp.dynamic<<"leakage="<<power.readOp.leakage<<endl;
// cout<<"IW="<<int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.readAc.access +
// + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access<<"leakage="<<int_inst_window->local_result.power.readOp.leakage<<endl;
// cout<<"selection"<<instruction_selection->power.readOp.dynamic<<"leakage"<<instruction_selection->power.readOp.leakage<<endl;
}
void SchedulerU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
if (coredynp.core_ty==OOO)
{
cout << indent_str << "Instruction Window:" << endl;
cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str << "FP Instruction Window:" << endl;
cout << indent_str_next << "Area = " << fp_inst_window->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << fp_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? fp_inst_window->power.readOp.longer_channel_leakage:fp_inst_window->power.readOp.leakage ) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << fp_inst_window->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << fp_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (XML->sys.core[ithCore].ROB_size >0)
{
cout << indent_str<<"ROB:" << endl;
cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
}
else if (coredynp.multithreaded)
{
cout << indent_str << "Instruction Window:" << endl;
cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
}
else
{
if (coredynp.core_ty==OOO)
{
cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "FP Instruction Window Peak Dynamic = " << fp_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "FP Instruction Window Subthreshold Leakage = " << fp_inst_window->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "FP Instruction Window Gate Leakage = " << fp_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
if (XML->sys.core[ithCore].ROB_size >0)
{
cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl;
}
}
else if (coredynp.multithreaded)
{
cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl;
}
}
}
void LoadStoreU::computeEnergy(bool is_tdp)
{
if (!exist) return;
if (is_tdp)
{
//init stats for Peak
dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
dcache.caches->stats_t.readAc.miss = 0;
dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle;
dcache.caches->stats_t.writeAc.miss = 0;
dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
dcache.caches->tdp_stats = dcache.caches->stats_t;
dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports;
dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports;
dcache.missb->tdp_stats = dcache.missb->stats_t;
dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports;
dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
dcache.ifb->tdp_stats = dcache.ifb->stats_t;
dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports;
dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports;
dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t;
if (cache_p==Write_back)
{
dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports;
dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports;
dcache.wbb->tdp_stats = dcache.wbb->stats_t;
}
LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
LSQ->tdp_stats = LSQ->stats_t;
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
LoadQ->tdp_stats = LoadQ->stats_t;
}
}
else
{
//init stats for Runtime Dynamic (RTP)
dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses;
dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses;
dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss;
dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses;
dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses;
dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss;
dcache.caches->rtp_stats = dcache.caches->stats_t;
if (cache_p==Write_back)
{
dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.missb->rtp_stats = dcache.missb->stats_t;
dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.ifb->rtp_stats = dcache.ifb->stats_t;
dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss;
dcache.wbb->rtp_stats = dcache.wbb->stats_t;
}
else
{
dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
dcache.missb->rtp_stats = dcache.missb->stats_t;
dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
dcache.ifb->rtp_stats = dcache.ifb->stats_t;
dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss;
dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss;
dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t;
}
LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered
LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;
LSQ->rtp_stats = LSQ->stats_t;
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions;
LoadQ->rtp_stats = LoadQ->stats_t;
}
}
dcache.power_t.reset();
LSQ->power_t.reset();
dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+
dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+
dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+
dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic);
if (cache_p==Write_back)
{//write miss will generate a write later
dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic;
}
dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic +
dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write
dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic +
dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic;
dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic +
dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic;
if (cache_p==Write_back)
{
dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic
+ dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic;
}
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
LoadQ->power_t.reset();
LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+
LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ
LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
+ LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
}
else
{
LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic)
+ LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ
}
if (is_tdp)
{
// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg +
// (dcache.missb->local_result.power +
// dcache.ifb->local_result.power +
// dcache.prefetchb->local_result.power +
// dcache.wbb->local_result.power)*pppm_Isub;
dcache.power = dcache.power_t + (dcache.caches->local_result.power +
dcache.missb->local_result.power +
dcache.ifb->local_result.power +
dcache.prefetchb->local_result.power) *pppm_lkg;
if (cache_p==Write_back)
{
dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg;
}
LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
power = power + dcache.power + LSQ->power;
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
power = power + LoadQ->power;
}
}
else
{
// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
// dcache.missb->local_result.power +
// dcache.ifb->local_result.power +
// dcache.prefetchb->local_result.power +
// dcache.wbb->local_result.power)*pppm_lkg;
dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power +
dcache.missb->local_result.power +
dcache.ifb->local_result.power +
dcache.prefetchb->local_result.power )*pppm_lkg;
if (cache_p==Write_back)
{
dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg;
}
LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg;
rt_power = rt_power + dcache.rt_power + LSQ->rt_power;
if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0))
{
LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg;
rt_power = rt_power + LoadQ->rt_power;
}
}
}
void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << indent_str << "Data Cache:" << endl;
cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (coredynp.core_ty==Inorder)
{
cout << indent_str << "Load/Store Queue:" << endl;
cout << indent_str_next << "Area = " << LSQ->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
else
{
if (XML->sys.core[ithCore].load_buffer_size >0)
{
cout << indent_str << "LoadQ:" << endl;
cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
cout << indent_str<< "StoreQ:" << endl;
cout << indent_str_next << "Area = " << LSQ->area.get_area() *1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << LSQ->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? LSQ->power.readOp.longer_channel_leakage:LSQ->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << LSQ->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << LSQ->rt_power.readOp.dynamic/executionTime<< " W" << endl;
cout <<endl;
}
}
else
{
cout << indent_str_next << "Data Cache Peak Dynamic = " << dcache.rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Data Cache Subthreshold Leakage = " << dcache.rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Data Cache Gate Leakage = " << dcache.rt_power.readOp.gate_leakage << " W" << endl;
if (coredynp.core_ty==Inorder)
{
cout << indent_str_next << "Load/Store Queue Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Load/Store Queue Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Load/Store Queue Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
}
else
{
cout << indent_str_next << "LoadQ Peak Dynamic = " << LoadQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "LoadQ Subthreshold Leakage = " << LoadQ->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "LoadQ Gate Leakage = " << LoadQ->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "StoreQ Peak Dynamic = " << LSQ->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "StoreQ Subthreshold Leakage = " << LSQ->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "StoreQ Gate Leakage = " << LSQ->rt_power.readOp.gate_leakage << " W" << endl;
}
}
}
void MemManU::computeEnergy(bool is_tdp)
{
if (!exist) return;
if (is_tdp)
{
//init stats for Peak
itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports;
itlb->stats_t.readAc.miss = 0;
itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
itlb->tdp_stats = itlb->stats_t;
dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle;
dtlb->stats_t.readAc.miss = 0;
dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
dtlb->tdp_stats = dtlb->stats_t;
}
else
{
//init stats for Runtime Dynamic (RTP)
itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses;
itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses;
itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss;
itlb->rtp_stats = itlb->stats_t;
dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses;
dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses;
dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss;
dtlb->rtp_stats = dtlb->stats_t;
}
itlb->power_t.reset();
dtlb->power_t.reset();
itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
+itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic;
dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits
+dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic;
if (is_tdp)
{
itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg;
dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
power = power + itlb->power + dtlb->power;
}
else
{
itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg;
dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg;
rt_power = rt_power + itlb->rt_power + dtlb->rt_power;
}
}
void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << indent_str << "Itlb:" << endl;
cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "Dtlb:" << endl;
cout << indent_str_next << "Area = " << dtlb->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << dtlb->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? dtlb->power.readOp.longer_channel_leakage:dtlb->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << dtlb->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << dtlb->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
else
{
cout << indent_str_next << "Itlb Peak Dynamic = " << itlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Itlb Subthreshold Leakage = " << itlb->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Itlb Gate Leakage = " << itlb->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Dtlb Peak Dynamic = " << dtlb->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Dtlb Subthreshold Leakage = " << dtlb->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Dtlb Gate Leakage = " << dtlb->rt_power.readOp.gate_leakage << " W" << endl;
}
}
void RegFU::computeEnergy(bool is_tdp)
{
/*
* Architecture RF and physical RF cannot be present at the same time.
* Therefore, the RF stats can only refer to either ARF or PRF;
* And the same stats can be used for both.
*/
if (!exist) return;
if (is_tdp)
{
//init stats for Peak
IRF->stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+
(coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+
(coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines;
//Rule of Thumb: about 10% RF related instructions do not need to access ALUs
IRF->tdp_stats = IRF->stats_t;
FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines;
FRF->tdp_stats = FRF->stats_t;
if (coredynp.regWindowing)
{
RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports;
RFWIN->tdp_stats = RFWIN->stats_t;
}
}
else
{
//init stats for Runtime Dynamic (RTP)
IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy
IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes;
IRF->rtp_stats = IRF->stats_t;
FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads;
FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes;
FRF->rtp_stats = FRF->stats_t;
if (coredynp.regWindowing)
{
RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16;
RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16;
RFWIN->rtp_stats = RFWIN->stats_t;
IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads +
XML->sys.core[ithCore].function_calls*16;
IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes +
XML->sys.core[ithCore].function_calls*16;
IRF->rtp_stats = IRF->stats_t;
FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads +
XML->sys.core[ithCore].function_calls*16;;
FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+
XML->sys.core[ithCore].function_calls*16;;
FRF->rtp_stats = FRF->stats_t;
}
}
IRF->power_t.reset();
FRF->power_t.reset();
IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic
+IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic);
FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic
+FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic);
if (coredynp.regWindowing)
{
RFWIN->power_t.reset();
RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic +
RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic);
}
if (is_tdp)
{
IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
power = power + (IRF->power + FRF->power);
if (coredynp.regWindowing)
{
RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
power = power + RFWIN->power;
}
}
else
{
IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread;
FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread;
rt_power = rt_power + (IRF->power_t + FRF->power_t);
if (coredynp.regWindowing)
{
RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg;
rt_power = rt_power + RFWIN->rt_power;
}
}
}
void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{ cout << indent_str << "Integer RF:" << endl;
cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
cout << indent_str<< "Floating Point RF:" << endl;
cout << indent_str_next << "Area = " << FRF->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << FRF->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? FRF->power.readOp.longer_channel_leakage:FRF->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << FRF->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << FRF->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (coredynp.regWindowing)
{
cout << indent_str << "Register Windows:" << endl;
cout << indent_str_next << "Area = " << RFWIN->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << RFWIN->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? RFWIN->power.readOp.longer_channel_leakage:RFWIN->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << RFWIN->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << RFWIN->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
}
else
{
cout << indent_str_next << "Integer RF Peak Dynamic = " << IRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Integer RF Subthreshold Leakage = " << IRF->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Integer RF Gate Leakage = " << IRF->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Floating Point RF Peak Dynamic = " << FRF->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Floating Point RF Subthreshold Leakage = " << FRF->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Floating Point RF Gate Leakage = " << FRF->rt_power.readOp.gate_leakage << " W" << endl;
if (coredynp.regWindowing)
{
cout << indent_str_next << "Register Windows Peak Dynamic = " << RFWIN->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Register Windows Subthreshold Leakage = " << RFWIN->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Register Windows Gate Leakage = " << RFWIN->rt_power.readOp.gate_leakage << " W" << endl;
}
}
}
void EXECU::computeEnergy(bool is_tdp)
{
if (!exist) return;
double pppm_t[4] = {1,1,1,1};
// rfu->power.reset();
// rfu->rt_power.reset();
// scheu->power.reset();
// scheu->rt_power.reset();
// exeu->power.reset();
// exeu->rt_power.reset();
rfu->computeEnergy(is_tdp);
scheu->computeEnergy(is_tdp);
exeu->computeEnergy(is_tdp);
if (coredynp.num_fpus >0)
{
fp_u->computeEnergy(is_tdp);
}
if (coredynp.num_muls >0)
{
mul->computeEnergy(is_tdp);
}
if (is_tdp)
{
set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t;
if (coredynp.num_muls >0)
{
set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction.
bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
power = power + mul->power;
}
if (coredynp.num_fpus>0)
{
set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction.
bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ;
power = power + fp_u->power;
}
power = power + rfu->power + exeu->power + bypass.power + scheu->power;
}
else
{
set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses);
bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t;
bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t;
if (coredynp.num_muls >0)
{
set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction.
bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t;
rt_power = rt_power + mul->rt_power;
}
if (coredynp.num_fpus>0)
{
set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses);
bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t;
bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t;
rt_power = rt_power + fp_u->rt_power;
}
rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power;
}
}
void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
if (!exist) return;
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl;
if (is_tdp)
{
cout << indent_str << "Register Files:" << endl;
cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel>3){
rfu->displayEnergy(indent+4,is_tdp);
}
cout << indent_str << "Instruction Scheduler:" << endl;
cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel>3){
scheu->displayEnergy(indent+4,is_tdp);
}
exeu->displayEnergy(indent,is_tdp);
if (coredynp.num_fpus>0)
{
fp_u->displayEnergy(indent,is_tdp);
}
if (coredynp.num_muls >0)
{
mul->displayEnergy(indent,is_tdp);
}
cout << indent_str << "Results Broadcast Bus:" << endl;
cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
}
else
{
cout << indent_str_next << "Register Files Peak Dynamic = " << rfu->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Register Files Subthreshold Leakage = " << rfu->rt_power.readOp.leakage <<" W" << endl;
cout << indent_str_next << "Register Files Gate Leakage = " << rfu->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Instruction Sheduler Peak Dynamic = " << scheu->rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Instruction Sheduler Subthreshold Leakage = " << scheu->rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Instruction Sheduler Gate Leakage = " << scheu->rt_power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Results Broadcast Bus Peak Dynamic = " << bypass.rt_power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Results Broadcast Bus Subthreshold Leakage = " << bypass.rt_power.readOp.leakage << " W" << endl;
cout << indent_str_next << "Results Broadcast Bus Gate Leakage = " << bypass.rt_power.readOp.gate_leakage << " W" << endl;
}
}
void Core::computeEnergy(bool is_tdp)
{
//power_point_product_masks
double pppm_t[4] = {1,1,1,1};
double rtp_pipeline_coe;
double num_units = 4.0;
if (is_tdp)
{
ifu->computeEnergy(is_tdp);
lsu->computeEnergy(is_tdp);
mmu->computeEnergy(is_tdp);
exu->computeEnergy(is_tdp);
if (coredynp.core_ty==OOO)
{
num_units = 5.0;
rnu->computeEnergy(is_tdp);
set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
if (rnu->exist)
{
rnu->power = rnu->power + corepipe->power*pppm_t;
power = power + rnu->power;
}
}
if (ifu->exist)
{
set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
ifu->power = ifu->power + corepipe->power*pppm_t;
// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl;
power = power + ifu->power;
// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
}
if (lsu->exist)
{
set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
lsu->power = lsu->power + corepipe->power*pppm_t;
// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
power = power + lsu->power;
// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
}
if (exu->exist)
{
set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
exu->power = exu->power + corepipe->power*pppm_t;
// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
power = power + exu->power;
// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
}
if (mmu->exist)
{
set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
mmu->power = mmu->power + corepipe->power*pppm_t;
// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
power = power + mmu->power;
// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl;
}
power = power + undiffCore->power;
if (XML->sys.Private_L2)
{
l2cache->computeEnergy(is_tdp);
set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1);
//l2cache->power = l2cache->power*pppm_t;
power = power + l2cache->power*pppm_t;
}
}
else
{
ifu->computeEnergy(is_tdp);
lsu->computeEnergy(is_tdp);
mmu->computeEnergy(is_tdp);
exu->computeEnergy(is_tdp);
if (coredynp.core_ty==OOO)
{
num_units = 5.0;
rnu->computeEnergy(is_tdp);
set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
if (rnu->exist)
{
rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t;
rt_power = rt_power + rnu->rt_power;
}
}
else
{
if (XML->sys.homogeneous_cores==1)
{
rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores;
}
else
{
rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles;
}
set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units);
}
if (ifu->exist)
{
ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t;
rt_power = rt_power + ifu->rt_power ;
}
if (lsu->exist)
{
lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t;
rt_power = rt_power + lsu->rt_power;
}
if (exu->exist)
{
exu->rt_power = exu->rt_power + corepipe->power*pppm_t;
rt_power = rt_power + exu->rt_power;
}
if (mmu->exist)
{
mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t;
rt_power = rt_power + mmu->rt_power ;
}
rt_power = rt_power + undiffCore->power;
// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
if (XML->sys.Private_L2)
{
l2cache->computeEnergy(is_tdp);
//set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1);
//l2cache->rt_power = l2cache->rt_power*pppm_t;
rt_power = rt_power + l2cache->rt_power;
}
}
}
void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp)
{
string indent_str(indent, ' ');
string indent_str_next(indent+2, ' ');
bool long_channel = XML->sys.longer_channel_device;
if (is_tdp)
{
cout << "Core:" << endl;
cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str << "Subthreshold Leakage = "
<< (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl;
//cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl;
cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl;
cout<<endl;
if (ifu->exist)
{
cout << indent_str << "Instruction Fetch Unit:" << endl;
cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl;
cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel >2){
ifu->displayEnergy(indent+4,plevel,is_tdp);
}
}
if (coredynp.core_ty==OOO)
{
if (rnu->exist)
{
cout << indent_str<< "Renaming Unit:" << endl;
cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel >2){
rnu->displayEnergy(indent+4,plevel,is_tdp);
}
}
}
if (lsu->exist)
{
cout << indent_str<< "Load Store Unit:" << endl;
cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel >2){
lsu->displayEnergy(indent+4,plevel,is_tdp);
}
}
if (mmu->exist)
{
cout << indent_str<< "Memory Management Unit:" << endl;
cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel >2){
mmu->displayEnergy(indent+4,plevel,is_tdp);
}
}
if (exu->exist)
{
cout << indent_str<< "Execution Unit:" << endl;
cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl;
cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl;
cout << indent_str_next << "Subthreshold Leakage = "
<< (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl;
//cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl;
cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl;
cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl;
cout <<endl;
if (plevel >2){
exu->displayEnergy(indent+4,plevel,is_tdp);
}
}
// if (plevel >2)
// {
// if (undiffCore->exist)
// {
// cout << indent_str << "Undifferentiated Core" << endl;
// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl;
// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl;
//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Subthreshold Leakage = "
// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl;
// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl;
// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl;
// cout <<endl;
// }
// }
if (XML->sys.Private_L2)
{
l2cache->displayEnergy(4,is_tdp);
}
}
else
{
// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl;
// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl;
// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl;
// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl;
}
}
InstFetchU ::~InstFetchU(){
if (!exist) return;
if(IB) {delete IB; IB = 0;}
if(ID_inst) {delete ID_inst; ID_inst = 0;}
if(ID_operand) {delete ID_operand; ID_operand = 0;}
if(ID_misc) {delete ID_misc; ID_misc = 0;}
if (coredynp.predictionW>0)
{
if(BTB) {delete BTB; BTB = 0;}
if(BPT) {delete BPT; BPT = 0;}
}
}
BranchPredictor ::~BranchPredictor(){
if (!exist) return;
if(globalBPT) {delete globalBPT; globalBPT = 0;}
if(localBPT) {delete localBPT; localBPT = 0;}
if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;}
if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;}
if(chooser) {delete chooser; chooser = 0;}
if(RAS) {delete RAS; RAS = 0;}
}
RENAMINGU ::~RENAMINGU(){
if (!exist) return;
if(iFRAT ) {delete iFRAT; iFRAT = 0;}
if(fFRAT ) {delete fFRAT; fFRAT =0;}
if(iRRAT) {delete iRRAT; iRRAT = 0;}
if(iFRAT) {delete iFRAT; iFRAT = 0;}
if(ifreeL) {delete ifreeL;ifreeL= 0;}
if(ffreeL) {delete ffreeL;ffreeL= 0;}
if(idcl) {delete idcl; idcl = 0;}
if(fdcl) {delete fdcl; fdcl = 0;}
if(RAHT) {delete RAHT; RAHT = 0;}
}
LoadStoreU ::~LoadStoreU(){
if (!exist) return;
if(LSQ) {delete LSQ; LSQ = 0;}
}
MemManU ::~MemManU(){
if (!exist) return;
if(itlb) {delete itlb; itlb = 0;}
if(dtlb) {delete dtlb; dtlb = 0;}
}
RegFU ::~RegFU(){
if (!exist) return;
if(IRF) {delete IRF; IRF = 0;}
if(FRF) {delete FRF; FRF = 0;}
if(RFWIN) {delete RFWIN; RFWIN = 0;}
}
SchedulerU ::~SchedulerU(){
if (!exist) return;
if(int_inst_window) {delete int_inst_window; int_inst_window = 0;}
if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;}
if(ROB) {delete ROB; ROB = 0;}
if(instruction_selection) {delete instruction_selection;instruction_selection = 0;}
}
EXECU ::~EXECU(){
if (!exist) return;
if(int_bypass) {delete int_bypass; int_bypass = 0;}
if(intTagBypass) {delete intTagBypass; intTagBypass =0;}
if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;}
if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;}
if(fp_bypass) {delete fp_bypass;fp_bypass = 0;}
if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;}
if(fp_u) {delete fp_u;fp_u = 0;}
if(exeu) {delete exeu;exeu = 0;}
if(mul) {delete mul;mul = 0;}
if(rfu) {delete rfu;rfu = 0;}
if(scheu) {delete scheu; scheu = 0;}
}
Core ::~Core(){
if(ifu) {delete ifu; ifu = 0;}
if(lsu) {delete lsu; lsu = 0;}
if(rnu) {delete rnu; rnu = 0;}
if(mmu) {delete mmu; mmu = 0;}
if(exu) {delete exu; exu = 0;}
if(corepipe) {delete corepipe; corepipe = 0;}
if(undiffCore) {delete undiffCore;undiffCore = 0;}
if(l2cache) {delete l2cache;l2cache = 0;}
}
void Core::set_core_param()
{
coredynp.opt_local = XML->sys.core[ithCore].opt_local;
coredynp.x86 = XML->sys.core[ithCore].x86;
coredynp.Embedded = XML->sys.Embedded;
coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type;
coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme;
coredynp.fetchW = XML->sys.core[ithCore].fetch_width;
coredynp.decodeW = XML->sys.core[ithCore].decode_width;
coredynp.issueW = XML->sys.core[ithCore].issue_width;
coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width;
coredynp.commitW = XML->sys.core[ithCore].commit_width;
coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width;
coredynp.predictionW = XML->sys.core[ithCore].prediction_width;
coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width;
coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width;
coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core;
coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core;
coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core;
coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads;
coredynp.multithreaded = coredynp.num_hthreads>1? true:false;
coredynp.instruction_length = XML->sys.core[ithCore].instruction_length;
coredynp.pc_width = XML->sys.virtual_address_width;
coredynp.opcode_length = XML->sys.core[ithCore].opcode_width;
coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width;
coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0];
coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0];
coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1];
coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1];
coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32;
coredynp.fp_data_width = coredynp.int_data_width;
coredynp.v_address_width = XML->sys.virtual_address_width;
coredynp.p_address_width = XML->sys.physical_address_width;
coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme;
coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size)));
coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size)));
coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size;
coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size;
coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle;
coredynp.total_cycles = XML->sys.core[ithCore].total_cycles;
coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles;
coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles;
//Max power duty cycle for peak power estimation
// if (coredynp.core_ty==OOO)
// {
// coredynp.IFU_duty_cycle = 1;
// coredynp.LSU_duty_cycle = 1;
// coredynp.MemManU_I_duty_cycle =1;
// coredynp.MemManU_D_duty_cycle =1;
// coredynp.ALU_duty_cycle =1;
// coredynp.MUL_duty_cycle =1;
// coredynp.FPU_duty_cycle =1;
// coredynp.ALU_cdb_duty_cycle =1;
// coredynp.MUL_cdb_duty_cycle =1;
// coredynp.FPU_cdb_duty_cycle =1;
// }
// else
// {
coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle;
coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle;
coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle;
coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle;
coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle;
coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle;
coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle;
coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle;
coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle;
coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle;
coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle;
// }
if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder)))
{
cout<<"Invalid Core Type"<<endl;
exit(0);
}
// if (coredynp.core_ty==OOO)
// {
// cout<<"OOO processor models are being updated and will be available in next release"<<endl;
// exit(0);
// }
if (!((coredynp.scheu_ty==PhysicalRegFile)||(coredynp.scheu_ty==ReservationStation)))
{
cout<<"Invalid OOO Scheduler Type"<<endl;
exit(0);
}
if (!((coredynp.rm_ty ==RAMbased)||(coredynp.rm_ty ==CAMbased)))
{
cout<<"Invalid OOO Renaming Type"<<endl;
exit(0);
}
if (coredynp.core_ty==OOO)
{
if (coredynp.scheu_ty==PhysicalRegFile)
{
coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_IRF_size)));
coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size)));
coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size;
coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size;
}
else if (coredynp.scheu_ty==ReservationStation)
{//ROB serves as Phy RF in RS based OOO
coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size)));
coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size;
coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size;
}
}
coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference.
coredynp.perThreadState = 8;
coredynp.instruction_length = 32;
coredynp.clockRate = XML->sys.core[ithCore].clock_rate;
coredynp.clockRate *= 1e6;
coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false;
coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate;
set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0);
}