gem5/ext/mcpat/core.cc
Yasuko Eckert 0deef376d9 ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes
Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known
issues/concernts we did not have a chance to address in this patch.

High-level changes in this patch include:
 1) Making XML parsing modular and hierarchical:
   - Shift parsing responsibility into the components
   - Read XML in a (mostly) context-free recursive manner so that McPAT input
     files can contain arbitrary component hierarchies
 2) Making power, energy, and area calculations a hierarchical and recursive
    process
   - Components track their subcomponents and recursively call compute
     functions in stages
   - Make C++ object hierarchy reflect inheritance of classes of components
     with similar structures
   - Simplify computeArea() and computeEnergy() functions to eliminate
     successive calls to calculate separate TDP vs. runtime energy
   - Remove Processor component (now unnecessary) and introduce a more abstract
     System component
 3) Standardizing McPAT output across all components
   - Use a single, common data structure for storing and printing McPAT output
   - Recursively call print functions through component hierarchy
 4) For caches, allow splitting data array and tag array reads and writes for
    better accuracy
 5) Improving the usability of CACTI by printing more helpful warning and error
    messages
 6) Minor: Impose more rigorous code style for clarity (more work still to be
    done)
Overall, these changes greatly reduce the amount of replicated code, and they
improve McPAT runtime and decrease memory footprint.
2014-06-03 13:32:59 -07:00

4010 lines
173 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*****************************************************************************
* McPAT
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <sstream>
#include <string>
#include "basic_circuit.h"
#include "basic_components.h"
#include "common.h"
#include "const.h"
#include "core.h"
#include "io.h"
#include "parameter.h"
int RegFU::RFWIN_ACCESS_MULTIPLIER = 16;
// The five bits are: busy, Issued, Finished, speculative, valid
int SchedulerU::ROB_STATUS_BITS = 5;
InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL),
BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int idx, tag, data, size, line, assoc, banks;
bool is_default = true;
clockRate = core_params.clockRate;
name = "Instruction Fetch Unit";
// Check if there is an icache child:
int i;
icache = NULL;
for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = childXML->getAttribute("type");
if (!type)
warnMissingComponentType(childXML->getAttribute("id"));
STRCMP(type, "CacheUnit") {
XMLCSTR name = childXML->getAttribute("name");
if (strcmp(name, "Instruction Cache") == 0 ||
strcmp(name, "icache") == 0) {
icache = new CacheUnit(childXML, &interface_ip);
children.push_back(icache);
}
}
}
set_params_stats();
//Instruction buffer
data = core_params.instruction_length * core_params.peak_issueW;
line = int(ceil(data / BITS_PER_BYTE));
size = core_params.num_hthreads * core_params.instruction_buffer_size *
line;
if (size < MIN_BUFFER_SIZE) {
size = MIN_BUFFER_SIZE;
}
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.instruction_buffer_assoc;
interface_ip.nbanks = core_params.instruction_buffer_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0;
interface_ip.tag_w = core_params.instruction_buffer_tag_width;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports =
core_params.number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
interface_ip.pure_cam = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
IB->area.set_area(IB->area.get_area() + IB->local_result.area);
area.set_area(area.get_area() + IB->local_result.area);
if (core_params.predictionW > 0) {
/*
* BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged
* It is only a cache without all the buffers in the cache controller since it is more like a
* look up table than a cache with cache controller. When access miss, no load from other places
* such as main memory (not actively fill the misses), it is passively updated under two circumstances:
* 1) when BPT@ID stage finds out current is a taken branch while BTB missed
* 2) When BPT@ID stage predicts differently than BTB
* 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid)
* 4) when EXEU find out wrong target has been provided from BTB.
*
*/
size = inst_fetch_params.btb_size;
line = inst_fetch_params.btb_block_size;
assoc = inst_fetch_params.btb_assoc;
banks = inst_fetch_params.btb_num_banks;
idx = int(ceil(log2(size / line / assoc)));
tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads)))
+ EXTRA_TAG_BITS;
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = assoc;
interface_ip.nbanks = banks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 1;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
interface_ip.pure_cam = false;
interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate;
interface_ip.latency = inst_fetch_params.btb_latency / clockRate;
BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
area.set_area(area.get_area() + BTB->local_result.area);
BPT = new BranchPredictor(xml_data, &interface_ip,
core_params, core_stats);
area.set_area(area.get_area() + BPT->area.get_area());
}
ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder",
is_default, &interface_ip,
core_params.opcode_width,
core_params.decodeW,
core_params.x86, clockRate,
Core_device, core_params.core_ty);
ID_operand = new InstructionDecoder(xml_data,
"Instruction Operand Decoder",
is_default, &interface_ip,
core_params.arch_ireg_width,
core_params.decodeW,
core_params.x86, clockRate,
Core_device, core_params.core_ty);
ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder",
is_default, &interface_ip,
core_params.micro_opcode_length,
core_params.decodeW,
core_params.x86, clockRate,
Core_device, core_params.core_ty);
area.set_area(area.get_area()+ (ID_inst->area.get_area()
+ ID_operand->area.get_area()
+ ID_misc->area.get_area())
* core_params.decodeW);
}
void
InstFetchU::set_params_stats() {
int num_children = xml_data->nChildNode("component");
int i;
memset(&inst_fetch_params,0,sizeof(InstFetchParameters));
for (i = 0; i < num_children; i++) {
XMLNode* child = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = child->getAttribute("type");
if (!type)
warnMissingComponentType(child->getAttribute("id"));
STRCMP(type, "BranchTargetBuffer") {
int sub_num_children = child->nChildNode("param");
int j;
for (j = 0; j < sub_num_children; j++) {
XMLNode* paramNode = child->getChildNodePtr("param", &j);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("size", inst_fetch_params.btb_size);
ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size);
ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc);
ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks);
ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency);
ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput);
ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports);
else {
warnUnrecognizedParam(node_name);
}
}
sub_num_children = child->nChildNode("stat");
for (j = 0; j < sub_num_children; j++) {
XMLNode* statNode = child->getChildNodePtr("stat", &j);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("read_accesses",
inst_fetch_stats.btb_read_accesses);
ASSIGN_FP_IF("write_accesses",
inst_fetch_stats.btb_write_accesses);
else {
warnUnrecognizedStat(node_name);
}
}
}
}
// Parameter sanity check
if (inst_fetch_params.btb_size <= 0) {
errorNonPositiveParam("size");
}
if (inst_fetch_params.btb_block_size <= 0) {
errorNonPositiveParam("block_size");
}
if (inst_fetch_params.btb_assoc <= 0) {
errorNonPositiveParam("assoc");
}
if (inst_fetch_params.btb_num_banks <= 0) {
errorNonPositiveParam("num_banks");
}
}
BranchPredictor::BranchPredictor(XMLNode* _xml_data,
InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats,
bool exist_)
: McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL),
L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int tag;
int data;
int size;
clockRate = core_params.clockRate;
name = "Branch Predictor";
// Common interface parameters for the branch predictor structures
interface_ip.pure_cam = false;
if (core_params.multithreaded) {
tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS);
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
} else {
interface_ip.specific_tag = 0;
interface_ip.tag_w = 0;
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
}
// Parse params and stats from XML
set_params_stats();
// Common interface parameters for the branch predictor structures
interface_ip.assoc = branch_pred_params.assoc;
interface_ip.nbanks = branch_pred_params.nbanks;
//Global predictor
data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE));
size = data * branch_pred_params.global_predictor_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
area.set_area(area.get_area() + globalBPT->local_result.area);
//Local BPT (Level 1)
data = int(ceil(branch_pred_params.local_l1_predictor_size /
BITS_PER_BYTE));
size = data * branch_pred_params.local_predictor_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
L1_localBPT = new ArrayST(xml_data, &interface_ip,
"Local Predictor, Level 1",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
L1_localBPT->area.set_area(L1_localBPT->area.get_area() +
L1_localBPT->local_result.area);
area.set_area(area.get_area()+ L1_localBPT->local_result.area);
//Local BPT (Level 2)
data = int(ceil(branch_pred_params.local_l2_predictor_size /
BITS_PER_BYTE));
size = data * branch_pred_params.local_predictor_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
L2_localBPT = new ArrayST(xml_data, &interface_ip,
"Local Predictor, Level 2",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
area.set_area(area.get_area() + L2_localBPT->local_result.area);
//Chooser
data = int(ceil(branch_pred_params.chooser_predictor_bits /
BITS_PER_BYTE));
size = data * branch_pred_params.chooser_predictor_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
area.set_area(area.get_area() + chooser->local_result.area);
//RAS return address stacks are Duplicated for each thread.
data = int(ceil(core_params.pc_width / BITS_PER_BYTE));
size = data * core_params.RAS_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.predictionW;
interface_ip.num_wr_ports = core_params.predictionW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate,
core_params.opt_local, core_params.core_ty);
RAS->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + RAS->local_result.area *
core_params.num_hthreads);
}
void
BranchPredictor::set_params_stats() {
int num_children = xml_data->nChildNode("component");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* child = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = child->getAttribute("type");
if (!type)
warnMissingComponentType(child->getAttribute("id"));
STRCMP(type, "BranchPredictor") {
int sub_num_children = child->nChildNode("param");
int j;
for (j = 0; j < sub_num_children; j++) {
XMLNode* paramNode = child->getChildNodePtr("param", &j);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("assoc", branch_pred_params.assoc);
ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks);
ASSIGN_INT_IF("local_l1_predictor_size",
branch_pred_params.local_l1_predictor_size);
ASSIGN_INT_IF("local_l2_predictor_size",
branch_pred_params.local_l2_predictor_size);
ASSIGN_INT_IF("local_predictor_entries",
branch_pred_params.local_predictor_entries);
ASSIGN_INT_IF("global_predictor_entries",
branch_pred_params.global_predictor_entries);
ASSIGN_INT_IF("global_predictor_bits",
branch_pred_params.global_predictor_bits);
ASSIGN_INT_IF("chooser_predictor_entries",
branch_pred_params.chooser_predictor_entries);
ASSIGN_INT_IF("chooser_predictor_bits",
branch_pred_params.chooser_predictor_bits);
else {
warnUnrecognizedParam(node_name);
}
}
// The core reads in the number of branches and the number of
// function calls and these values are passed through the
// core_stats variable, so we don't need to read them in here
}
}
}
SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), int_inst_window(NULL),
fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL),
fp_instruction_selection(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int tag;
int data;
int size;
int line;
bool is_default = true;
string tmp_name;
clockRate = core_params.clockRate;
name = "Instruction Scheduler";
if ((core_params.core_ty == Inorder && core_params.multithreaded)) {
//Instruction issue queue, in-order multi-issue or multithreaded
//processor also has this structure. Unified window for Inorder
//processors
//This tag width is the normal thread state bits based on
//Niagara Design
tag = int(log2(core_params.num_hthreads) * core_params.perThreadState);
data = core_params.instruction_length;
line = int(ceil(data / BITS_PER_BYTE));
size = core_params.instruction_window_size * line;
if (size < MIN_BUFFER_SIZE) {
size = MIN_BUFFER_SIZE;
}
//NOTE: x86 inst can be very lengthy, up to 15B.
//Source: Intel® 64 and IA-32 Architectures
//Software Developers Manual
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.scheduler_assoc;
interface_ip.nbanks = core_params.scheduler_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.peak_issueW;
interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.peak_issueW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
int_inst_window = new ArrayST(xml_data, &interface_ip,
"InstFetchQueue", Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
int_inst_window->output_data.area *= core_params.num_pipelines;
area.set_area(area.get_area() + int_inst_window->local_result.area *
core_params.num_pipelines);
Iw_height = int_inst_window->local_result.cache_ht;
/*
* selection logic
* In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up
* instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who
* at the issue stage.
*/
int_instruction_selection =
new selection_logic(xml_data, is_default,
core_params.instruction_window_size,
core_params.peak_issueW *
core_params.num_hthreads,
&interface_ip,
"Int Instruction Selection Logic",
core_stats.inst_window_wakeup_accesses,
clockRate, Core_device, core_params.core_ty);
if (core_params.fp_instruction_window_size > 0) {
fp_instruction_selection =
new selection_logic(xml_data, is_default,
core_params.fp_instruction_window_size,
core_params.fp_issueW *
core_params.num_hthreads,
&interface_ip,
"FP Instruction Selection Logic",
core_stats.fp_inst_window_wakeup_accesses,
clockRate, Core_device,
core_params.core_ty);
}
}
if (core_params.core_ty == OOO) {
/*
* CAM based instruction window
* For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored
* For RS based OOO it is the Reservation station, where both tags and values of phy regs are stored
* It is written once and read twice(two operands) before an instruction can be issued.
* X86 instruction can be very long up to 15B. add instruction length in XML
*/
if (core_params.scheu_ty == PhysicalRegFile) {
tag = core_params.phy_ireg_width;
data = int((ceil((core_params.instruction_length +
NUM_SOURCE_OPERANDS *
(core_params.phy_ireg_width -
core_params.arch_ireg_width)) /
(double)NUM_SOURCE_OPERANDS) /
BITS_PER_BYTE));
tmp_name = "Integer Instruction Window";
} else {
tag = core_params.phy_ireg_width;
data = int(ceil(((core_params.instruction_length +
NUM_SOURCE_OPERANDS *
(core_params.phy_ireg_width -
core_params.arch_ireg_width) +
2 * core_params.int_data_width) /
(double)NUM_SOURCE_OPERANDS) /
BITS_PER_BYTE));
tmp_name = "Integer Reservation Station";
}
size = data * core_params.instruction_window_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = core_params.scheduler_assoc;
interface_ip.nbanks = core_params.scheduler_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.peak_issueW;
interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.peak_issueW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate;
int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name,
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
int_inst_window->output_data.area *= core_params.num_pipelines;
area.set_area(area.get_area() + int_inst_window->local_result.area *
core_params.num_pipelines);
Iw_height = int_inst_window->local_result.cache_ht;
//FU inst window
if (core_params.scheu_ty == PhysicalRegFile) {
tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width;
data = int(ceil((core_params.instruction_length +
NUM_SOURCE_OPERANDS *
(core_params.phy_freg_width -
core_params.arch_freg_width)) / BITS_PER_BYTE));
tmp_name = "FP Instruction Window";
} else {
tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width;
data = int(ceil((core_params.instruction_length +
NUM_SOURCE_OPERANDS *
(core_params.phy_freg_width -
core_params.arch_freg_width) +
NUM_SOURCE_OPERANDS * core_params.fp_data_width) /
BITS_PER_BYTE));
tmp_name = "FP Reservation Station";
}
size = data * core_params.fp_instruction_window_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = core_params.scheduler_assoc;
interface_ip.nbanks = core_params.scheduler_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.fp_issueW;
interface_ip.num_wr_ports = core_params.fp_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.fp_issueW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fp_inst_window =
new ArrayST(xml_data, &interface_ip, tmp_name, Core_device,
clockRate, core_params.opt_local, core_params.core_ty);
fp_inst_window->output_data.area *= core_params.num_fp_pipelines;
area.set_area(area.get_area() + fp_inst_window->local_result.area
*core_params.num_fp_pipelines);
fp_Iw_height = fp_inst_window->local_result.cache_ht;
if (core_params.ROB_size > 0) {
/*
* if ROB_size = 0, then the target processor does not support hardware-based
* speculation, i.e. , the processor allow OOO issue as well as OOO completion, which
* means branch must be resolved before instruction issued into instruction window, since
* there is no change to flush miss-predict branch path after instructions are issued in this situation.
*
* ROB.ROB size = inflight inst. ROB is unified for int and fp inst.
* One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7.
* However, this approach is abandoned due to its high power and poor scalablility.
* McPAT uses current implementation of ROB as circular buffer.
* ROB is written once when instruction is issued and read once when the instruction is committed. *
*/
int robExtra = int(ceil(ROB_STATUS_BITS +
log2(core_params.num_hthreads)));
if (core_params.scheu_ty == PhysicalRegFile) {
//PC is to id the instruction for recover exception.
//inst is used to map the renamed dest. registers. so that
//commit stage can know which reg/RRAT to update
data = int(ceil((robExtra + core_params.pc_width +
core_params.phy_ireg_width) / BITS_PER_BYTE));
} else {
//in RS based OOO, ROB also contains value of destination reg
data = int(ceil((robExtra + core_params.pc_width +
core_params.phy_ireg_width +
core_params.fp_data_width) / BITS_PER_BYTE));
}
interface_ip.cache_sz = data * core_params.ROB_size;
interface_ip.line_sz = data;
interface_ip.assoc = core_params.ROB_assoc;
interface_ip.nbanks = core_params.ROB_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = core_params.ROB_tag_width > 0;
interface_ip.tag_w = core_params.ROB_tag_width;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.peak_commitW;
interface_ip.num_wr_ports = core_params.peak_issueW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
ROB->output_data.area *= core_params.num_pipelines;
area.set_area(area.get_area() + ROB->local_result.area *
core_params.num_pipelines);
ROB_height = ROB->local_result.cache_ht;
}
int_instruction_selection =
new selection_logic(xml_data, is_default,
core_params.instruction_window_size,
core_params.peak_issueW, &interface_ip,
"Int Instruction Selection Logic",
core_stats.inst_window_wakeup_accesses,
clockRate, Core_device, core_params.core_ty);
if (core_params.fp_instruction_window_size > 0) {
fp_instruction_selection =
new selection_logic(xml_data, is_default,
core_params.fp_instruction_window_size,
core_params.fp_issueW, &interface_ip,
"FP Instruction Selection Logic",
core_stats.fp_inst_window_wakeup_accesses,
clockRate, Core_device,
core_params.core_ty);
}
}
}
LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int tag;
int line;
int size;
int ldst_opcode = core_params.opcode_width;
clockRate = core_params.clockRate;
name = "Load/Store Unit";
// Check if there is a dcache child:
int i;
dcache = NULL;
for( i = 0; i < xml_data->nChildNode("component"); i++ ) {
XMLNode* childXML = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = childXML->getAttribute("type");
if (!type)
warnMissingComponentType(childXML->getAttribute("id"));
STRCMP(type, "CacheUnit") {
XMLCSTR name = childXML->getAttribute("name");
if (strcmp(name, "Data Cache") == 0 ||
strcmp(name, "dcache") == 0) {
dcache = new CacheUnit(childXML, &interface_ip);
children.push_back(dcache);
}
}
}
/*
* LSU--in-order processors do not have separate load queue: unified lsq
* partitioned among threads
* it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ
*/
tag = ldst_opcode + virtual_address_width +
int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
line = int(ceil(data_path_width / BITS_PER_BYTE));
size = core_params.store_buffer_size * line * core_params.num_hthreads;
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.store_buffer_assoc;
interface_ip.nbanks = core_params.store_buffer_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.memory_ports;
interface_ip.num_wr_ports = core_params.memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.memory_ports;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
interface_ip.pure_cam = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device,
clockRate, core_params.opt_local, core_params.core_ty);
area.set_area(area.get_area() + LSQ->local_result.area);
area.set_area(area.get_area()*cdb_overhead);
lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead);
if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) {
tag = ldst_opcode + virtual_address_width +
int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
line = int(ceil(data_path_width / BITS_PER_BYTE));
size = core_params.load_buffer_size * line * core_params.num_hthreads;
interface_ip.cache_sz = size;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.load_buffer_assoc;
interface_ip.nbanks = core_params.load_buffer_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.memory_ports;
interface_ip.num_wr_ports = core_params.memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.memory_ports;
interface_ip.is_cache = true;
interface_ip.pure_ram = false;
interface_ip.pure_cam = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device,
clockRate, core_params.opt_local,
core_params.core_ty);
LoadQ->area.set_area(LoadQ->area.get_area() +
LoadQ->local_result.area);
area.set_area(area.get_area()*cdb_overhead);
lsq_height = (LSQ->local_result.cache_ht +
LoadQ->local_result.cache_ht) * sqrt(cdb_overhead);
}
}
MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), itlb(NULL), dtlb(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int tag;
int data;
int line;
clockRate = core_params.clockRate;
name = "Memory Management Unit";
set_params_stats();
// These are shared between ITLB and DTLB
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
//Itlb TLBs are partioned among threads according to Nigara and Nehalem
tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
line = int(ceil(data / BITS_PER_BYTE));
interface_ip.cache_sz = mem_man_params.itlb_number_entries * line;
interface_ip.line_sz = line;
interface_ip.assoc = mem_man_params.itlb_assoc;
interface_ip.nbanks = mem_man_params.itlb_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.number_instruction_fetch_ports;
interface_ip.throughput = mem_man_params.itlb_throughput / clockRate;
interface_ip.latency = mem_man_params.itlb_latency / clockRate;
itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device,
clockRate, core_params.opt_local, core_params.core_ty);
area.set_area(area.get_area() + itlb->local_result.area);
//dtlb
tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) +
int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS;
data = physical_address_width - int(floor(log2(virtual_memory_page_size)));
line = int(ceil(data / BITS_PER_BYTE));
interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line;
interface_ip.line_sz = line;
interface_ip.assoc = mem_man_params.dtlb_assoc;
interface_ip.nbanks = mem_man_params.dtlb_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Normal;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.memory_ports;
interface_ip.num_wr_ports = core_params.memory_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.memory_ports;
interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate;
interface_ip.latency = mem_man_params.dtlb_latency / clockRate;
dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device,
clockRate, core_params.opt_local, core_params.core_ty);
area.set_area(area.get_area() + dtlb->local_result.area);
}
void
MemManU::set_params_stats() {
memset(&mem_man_params, 0, sizeof(MemoryManagementParams));
memset(&mem_man_stats, 0, sizeof(MemoryManagementStats));
int num_children = xml_data->nChildNode("component");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* child = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = child->getAttribute("type");
if (!type)
warnMissingComponentType(child->getAttribute("id"));
STRCMP(type, "InstructionTLB") {
int sub_num_children = child->nChildNode("param");
int j;
for (j = 0; j < sub_num_children; j++) {
XMLNode* paramNode = child->getChildNodePtr("param", &j);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("number_entries",
mem_man_params.itlb_number_entries);
ASSIGN_FP_IF("latency", mem_man_params.itlb_latency);
ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput);
ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc);
ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks);
else {
warnUnrecognizedParam(node_name);
}
}
sub_num_children = child->nChildNode("stat");
for (j = 0; j < sub_num_children; j++) {
XMLNode* statNode = child->getChildNodePtr("stat", &j);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("total_accesses",
mem_man_stats.itlb_total_accesses);
ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses);
ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts);
else {
warnUnrecognizedStat(node_name);
}
}
} STRCMP(type, "DataTLB") {
int sub_num_children = child->nChildNode("param");
int j;
for (j = 0; j < sub_num_children; j++) {
XMLNode* paramNode = child->getChildNodePtr("param", &j);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_INT_IF("number_entries",
mem_man_params.dtlb_number_entries);
ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency);
ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput);
ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc);
ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks);
else {
warnUnrecognizedParam(node_name);
}
}
sub_num_children = child->nChildNode("stat");
for (j = 0; j < sub_num_children; j++) {
XMLNode* statNode = child->getChildNodePtr("stat", &j);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("read_accesses",
mem_man_stats.dtlb_read_accesses);
ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses);
ASSIGN_FP_IF("write_accesses",
mem_man_stats.dtlb_write_accesses);
ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses);
ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts);
else {
warnUnrecognizedStat(node_name);
}
}
}
}
}
RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL),
interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
/*
* processors have separate architectural register files for each thread.
* therefore, the bypass buses need to travel across all the register files.
*/
if (!exist) return;
int data;
int line;
clockRate = core_params.clockRate;
name = "Register File Unit";
//**********************************IRF************************************
data = core_params.int_data_width;
line = int(ceil(data / BITS_PER_BYTE));
interface_ip.cache_sz = core_params.num_IRF_entry * line;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.phy_Regs_IRF_assoc;
interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0;
interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports;
interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
IRF->output_data.area *= core_params.num_hthreads *
core_params.num_pipelines * cdb_overhead;
area.set_area(area.get_area() + IRF->local_result.area *
core_params.num_hthreads * core_params.num_pipelines *
cdb_overhead);
//**********************************FRF************************************
data = core_params.fp_data_width;
line = int(ceil(data / BITS_PER_BYTE));
interface_ip.cache_sz = core_params.num_FRF_entry * line;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.phy_Regs_FRF_assoc;
interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0;
interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = 0;
interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports;
interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device,
clockRate, core_params.opt_local, core_params.core_ty);
FRF->output_data.area *= core_params.num_hthreads *
core_params.num_fp_pipelines * cdb_overhead;
area.set_area(area.get_area() + FRF->local_result.area *
core_params.num_hthreads * core_params.num_fp_pipelines *
cdb_overhead);
int_regfile_height = IRF->local_result.cache_ht *
core_params.num_hthreads * sqrt(cdb_overhead);
fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads *
sqrt(cdb_overhead);
//since a EXU is associated with each pipeline, the cdb should not have
//longer length.
if (core_params.regWindowing) {
//*********************************REG_WIN*****************************
//ECC, and usually 2 regs are transfered together during window
//shifting.Niagara Mega cell
data = core_params.int_data_width;
line = int(ceil(data / BITS_PER_BYTE));
interface_ip.cache_sz = core_params.register_window_size *
IRF->l_ip.cache_sz * core_params.num_hthreads;
interface_ip.line_sz = line;
interface_ip.assoc = core_params.register_window_assoc;
interface_ip.nbanks = core_params.register_window_nbanks;
interface_ip.out_w = line * BITS_PER_BYTE;
interface_ip.specific_tag = core_params.register_window_tag_width > 0;
interface_ip.tag_w = core_params.register_window_tag_width;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.register_window_rw_ports;
interface_ip.num_rd_ports = 0;
interface_ip.num_wr_ports = 0;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput =
core_params.register_window_throughput / clockRate;
interface_ip.latency =
core_params.register_window_latency / clockRate;
RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device,
clockRate, core_params.opt_local,
core_params.core_ty);
RFWIN->output_data.area *= core_params.num_pipelines;
area.set_area(area.get_area() + RFWIN->local_result.area *
core_params.num_pipelines);
}
}
EXECU::EXECU(XMLNode* _xml_data,
InputParameter* interface_ip_, double lsq_height_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL),
exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL),
int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL),
fpTagBypass(NULL), interface_ip(*interface_ip_),
lsq_height(lsq_height_), core_params(_core_params),
core_stats(_core_stats), exist(exist_) {
if (!exist) return;
double fu_height = 0.0;
clockRate = core_params.clockRate;
name = "Execution Unit";
rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats);
if (core_params.core_ty == OOO ||
(core_params.core_ty == Inorder && core_params.multithreaded)) {
scheu = new SchedulerU(xml_data, &interface_ip, core_params,
core_stats);
area.set_area(area.get_area() + scheu->area.get_area() );
}
exeu = new FunctionalUnit(xml_data, &interface_ip, core_params,
core_stats, ALU);
area.set_area(area.get_area() + exeu->area.get_area() +
rfu->area.get_area());
fu_height = exeu->FU_height;
if (core_params.num_fpus > 0) {
fp_u = new FunctionalUnit(xml_data, &interface_ip,
core_params, core_stats, FPU);
area.set_area(area.get_area() + fp_u->area.get_area());
}
if (core_params.num_muls > 0) {
mul = new FunctionalUnit(xml_data, &interface_ip,
core_params, core_stats, MUL);
area.set_area(area.get_area() + mul->area.get_area());
fu_height += mul->FU_height;
}
/*
* broadcast logic, including int-broadcast; int_tag-broadcast;
* fp-broadcast; fp_tag-broadcast
* integer by pass has two paths and fp has 3 paths.
* on the same bus there are multiple tri-state drivers and muxes that go
* to different components on the same bus
*/
interface_ip.wt = core_params.execu_broadcast_wt;
interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type;
interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type;
interface_ip.throughput = core_params.broadcast_numerator / clockRate;
interface_ip.latency = core_params.broadcast_numerator / clockRate;
double scheu_Iw_height = 0.0;
double scheu_ROB_height = 0.0;
double scheu_fp_Iw_height = 0.0;
if (scheu) {
scheu_Iw_height = scheu->Iw_height;
scheu_ROB_height = scheu->ROB_height;
scheu_fp_Iw_height = scheu->fp_Iw_height;
}
// Common bypass logic parameters
double base_w = core_params.execu_bypass_base_width;
double base_h = core_params.execu_bypass_base_height;
int level = core_params.execu_bypass_start_wiring_level;
double route_over_perc = core_params.execu_bypass_route_over_perc;
Wire_type wire_type = core_params.execu_bypass_wire_type;
int data_w;
double len;
if (core_params.core_ty == Inorder) {
data_w = int(ceil(data_path_width / 32.0)*32);
len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device,
base_w, base_h, data_w, len,
&interface_ip, level, clockRate, false,
route_over_perc, core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.perThreadState;
len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
scheu_Iw_height;
intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
Core_device,
base_w, base_h, data_w, len,
&interface_ip, level, clockRate, false,
route_over_perc, core_params.opt_local,
core_params.core_ty, wire_type);
if (core_params.num_muls > 0) {
data_w = int(ceil(data_path_width / 32.0)*32*1.5);
len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
lsq_height;
int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.perThreadState;
len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height +
lsq_height + scheu_Iw_height;
intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty,
wire_type);
}
if (core_params.num_fpus > 0) {
data_w = int(ceil(data_path_width / 32.0)*32*1.5);
len = rfu->fp_regfile_height + fp_u->FU_height;
fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
Core_device,
base_w, base_h, data_w, len,
&interface_ip, level, clockRate,
false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.perThreadState;
len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
scheu_Iw_height;
fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
Core_device, base_w, base_h, data_w,
len, &interface_ip, level,
clockRate, false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
}
} else {//OOO
if (core_params.scheu_ty == PhysicalRegFile) {
/* For physical register based OOO,
* data broadcast interconnects cover across functional units, lsq,
* inst windows and register files,
* while tag broadcast interconnects also cover across ROB
*/
data_w = int(ceil(core_params.int_data_width));
len = rfu->int_regfile_height + exeu->FU_height + lsq_height;
int_bypass = new Interconnect(xml_data, "Int Bypass Data",
Core_device, base_w, base_h, data_w,
len, &interface_ip, level, clockRate,
false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.phy_ireg_width;
len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
scheu_Iw_height + scheu_ROB_height;
intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
Core_device, base_w, base_h,
data_w, len, &interface_ip, level,
clockRate, false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
if (core_params.num_muls > 0) {
data_w = int(ceil(core_params.int_data_width));
len = rfu->int_regfile_height + exeu->FU_height +
mul->FU_height + lsq_height;
int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty,
wire_type);
data_w = core_params.phy_ireg_width;
len = rfu->int_regfile_height + exeu->FU_height +
mul->FU_height + lsq_height + scheu_Iw_height +
scheu_ROB_height;
intTag_mul_Bypass = new Interconnect(xml_data,
"Mul Bypass Tag",
Core_device, base_w,
base_h, data_w, len,
&interface_ip, level,
clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty,
wire_type);
}
if (core_params.num_fpus > 0) {
data_w = int(ceil(core_params.fp_data_width));
len = rfu->fp_regfile_height + fp_u->FU_height;
fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
Core_device, base_w, base_h,
data_w, len, &interface_ip, level,
clockRate, false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.phy_freg_width;
len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
scheu_fp_Iw_height + scheu_ROB_height;
fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
}
} else {
/*
* In RS based processor both data and tag are broadcast together,
* covering functional units, lsq, nst windows, register files, and ROBs
*/
data_w = int(ceil(core_params.int_data_width));
len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
scheu_Iw_height + scheu_ROB_height;
int_bypass = new Interconnect(xml_data, "Int Bypass Data",
Core_device, base_w, base_h, data_w,
len, &interface_ip, level, clockRate,
false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.phy_ireg_width;
len = rfu->int_regfile_height + exeu->FU_height + lsq_height +
scheu_Iw_height + scheu_ROB_height;
intTagBypass = new Interconnect(xml_data, "Int Bypass Tag",
Core_device, base_w, base_h,
data_w, len, &interface_ip, level,
clockRate, false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
if (core_params.num_muls > 0) {
data_w = int(ceil(core_params.int_data_width));
len = rfu->int_regfile_height + exeu->FU_height +
mul->FU_height + lsq_height + scheu_Iw_height +
scheu_ROB_height;
int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty,
wire_type);
data_w = core_params.phy_ireg_width;
len = rfu->int_regfile_height + exeu->FU_height +
mul->FU_height + lsq_height + scheu_Iw_height +
scheu_ROB_height;
intTag_mul_Bypass = new Interconnect(xml_data,
"Mul Bypass Tag",
Core_device, base_w,
base_h, data_w, len,
&interface_ip, level,
clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty,
wire_type);
}
if (core_params.num_fpus > 0) {
data_w = int(ceil(core_params.fp_data_width));
len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
scheu_fp_Iw_height + scheu_ROB_height;
fp_bypass = new Interconnect(xml_data, "FP Bypass Data",
Core_device, base_w, base_h,
data_w, len, &interface_ip, level,
clockRate, false, route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
data_w = core_params.phy_freg_width;
len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height +
scheu_fp_Iw_height + scheu_ROB_height;
fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag",
Core_device, base_w, base_h,
data_w, len, &interface_ip,
level, clockRate, false,
route_over_perc,
core_params.opt_local,
core_params.core_ty, wire_type);
}
}
}
if (int_bypass) {
children.push_back(int_bypass);
}
if (intTagBypass) {
children.push_back(intTagBypass);
}
if (int_mul_bypass) {
children.push_back(int_mul_bypass);
}
if (intTag_mul_Bypass) {
children.push_back(intTag_mul_Bypass);
}
if (fp_bypass) {
children.push_back(fp_bypass);
}
if (fpTagBypass) {
children.push_back(fpTagBypass);
}
area.set_area(area.get_area() + int_bypass->area.get_area() +
intTagBypass->area.get_area());
if (core_params.num_muls > 0) {
area.set_area(area.get_area() + int_mul_bypass->area.get_area() +
intTag_mul_Bypass->area.get_area());
}
if (core_params.num_fpus > 0) {
area.set_area(area.get_area() + fp_bypass->area.get_area() +
fpTagBypass->area.get_area());
}
}
RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_,
const CoreParameters & _core_params,
const CoreStatistics & _core_stats, bool exist_)
: McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL),
fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL),
RAHT(NULL), interface_ip(*interface_ip_),
core_params(_core_params), core_stats(_core_stats), exist(exist_) {
if (!exist) return;
int tag;
int data;
int out_w;
int size;
// Assumption:
// We make an implicit design assumption based on the specific structure
// that is being modeled.
// 1. RAM-based RATs are direct mapped. However, if the associated
// scheduler is a reservation station style, the RATs are fully
// associative.
// 2. Non-CAM based RATs and free lists do not have tags.
// 3. Free lists are direct mapped.
const int RAM_BASED_RAT_ASSOC = 1;
const int RS_RAT_ASSOC = 0;
const int NON_CAM_BASED_TAG_WIDTH = 0;
const int FREELIST_ASSOC = 1;
clockRate = core_params.clockRate;
name = "Rename Unit";
if (core_params.core_ty == OOO) {
//integer pipeline
if (core_params.scheu_ty == PhysicalRegFile) {
if (core_params.rm_ty == RAMbased) {
//FRAT with global checkpointing (GCs) please see paper tech
//report for detailed explaintions
data = int(ceil(core_params.phy_ireg_width *
(1 + core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_IRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RAM_BASED_RAT_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports =
NUM_SOURCE_OPERANDS * core_params.decodeW;
interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
iFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + iFRAT->area.get_area());
//FRAT floating point
data = int(ceil(core_params.phy_freg_width *
(1 + core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_FRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RAM_BASED_RAT_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports =
NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
fFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + fFRAT->area.get_area());
} else if ((core_params.rm_ty == CAMbased)) {
//IRAT
tag = core_params.arch_ireg_width;
//the address of CAM needed to be sent out
data = int(ceil((core_params.arch_ireg_width + 1 *
core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE));
size = data * core_params.phy_Regs_IRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = CAM_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports = core_params.decodeW;
interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =
NUM_SOURCE_OPERANDS * core_params.decodeW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
iFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + iFRAT->area.get_area());
//FRAT for FP
tag = core_params.arch_freg_width;
//the address of CAM needed to be sent out
data = int(ceil((core_params.arch_freg_width + 1 *
core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
size = data * core_params.phy_Regs_FRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = CAM_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports = core_params.fp_decodeW;
interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =
NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
fFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + fFRAT->area.get_area());
}
//RRAT is always RAM based, does not have GCs, and is used only for
//record latest non-speculative mapping
data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_IRF_size *
NUM_SOURCE_OPERANDS;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RAM_BASED_RAT_ASSOC;
interface_ip.nbanks = core_params.retire_rat_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
interface_ip.num_rd_ports = core_params.commitW;
interface_ip.num_wr_ports = core_params.commitW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
iRRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + iRRAT->area.get_area());
//RRAT for FP
data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_FRF_size *
NUM_SOURCE_OPERANDS;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RAM_BASED_RAT_ASSOC;
interface_ip.nbanks = core_params.retire_rat_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.retire_rat_rw_ports;
interface_ip.num_rd_ports = core_params.fp_decodeW;
interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
fRRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + fRRAT->area.get_area());
//Freelist of renaming unit always RAM based
//Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist
// 2)When instruction commits the Phyregisters/ROB needed to be recycled.
//therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width
data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
size = data * core_params.num_ifreelist_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = FREELIST_ASSOC;
interface_ip.nbanks = core_params.freelist_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.freelist_rw_ports;
interface_ip.num_rd_ports = core_params.decodeW;
interface_ip.num_wr_ports =
core_params.decodeW - 1 + core_params.commitW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
ifreeL->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + ifreeL->area.get_area());
//freelist for FP
data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
size = data * core_params.num_ffreelist_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = FREELIST_ASSOC;
interface_ip.nbanks = core_params.freelist_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Sequential;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.freelist_rw_ports;
interface_ip.num_rd_ports = core_params.fp_decodeW;
interface_ip.num_wr_ports =
core_params.fp_decodeW - 1 + core_params.commitW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
ffreeL->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + ffreeL->area.get_area());
} else if (core_params.scheu_ty == ReservationStation) {
if (core_params.rm_ty == RAMbased) {
tag = core_params.phy_ireg_width;
data = int(ceil(core_params.phy_ireg_width *
(1 + core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_IRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RS_RAT_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports =
NUM_SOURCE_OPERANDS * core_params.decodeW;
interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.commitW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
iFRAT->local_result.adjust_area();
iFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + iFRAT->area.get_area());
//FP
tag = core_params.phy_freg_width;
data = int(ceil(core_params.phy_freg_width *
(1 + core_params.globalCheckpoint) /
BITS_PER_BYTE));
out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE));
size = data * core_params.archi_Regs_FRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = RS_RAT_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports =
NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = core_params.fp_issueW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
fFRAT->local_result.adjust_area();
fFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + fFRAT->area.get_area());
} else if ((core_params.rm_ty == CAMbased)) {
//FRAT
//the address of CAM needed to be sent out
tag = core_params.arch_ireg_width;
data = int(ceil (core_params.arch_ireg_width +
1 * core_params.globalCheckpoint /
BITS_PER_BYTE));
out_w = int(ceil (core_params.arch_ireg_width /
BITS_PER_BYTE));
size = data * core_params.phy_Regs_IRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = CAM_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports = core_params.decodeW;
interface_ip.num_wr_ports = core_params.decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =
NUM_SOURCE_OPERANDS * core_params.decodeW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
iFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + iFRAT->area.get_area());
//FRAT
tag = core_params.arch_freg_width;
//the address of CAM needed to be sent out
data = int(ceil(core_params.arch_freg_width +
1 * core_params.globalCheckpoint /
BITS_PER_BYTE));
out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE));
size = data * core_params.phy_Regs_FRF_size;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = CAM_ASSOC;
interface_ip.nbanks = core_params.front_rat_nbanks;
interface_ip.out_w = out_w * BITS_PER_BYTE;
interface_ip.specific_tag = tag > 0;
interface_ip.tag_w = tag;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.front_rat_rw_ports;
interface_ip.num_rd_ports = core_params.decodeW;
interface_ip.num_wr_ports = core_params.fp_decodeW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports =
NUM_SOURCE_OPERANDS * core_params.fp_decodeW;
interface_ip.is_cache = true;
interface_ip.pure_cam = false;
interface_ip.pure_ram = false;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT",
Core_device, clockRate,
core_params.opt_local,
core_params.core_ty);
fFRAT->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + fFRAT->area.get_area());
}
//No RRAT for RS based OOO
//Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified
data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE));
size = data * core_params.num_ifreelist_entries;
interface_ip.cache_sz = size;
interface_ip.line_sz = data;
interface_ip.assoc = FREELIST_ASSOC;
interface_ip.nbanks = core_params.freelist_nbanks;
interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE;
interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0;
interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH;
interface_ip.access_mode = Fast;
interface_ip.obj_func_dyn_energy = 0;
interface_ip.obj_func_dyn_power = 0;
interface_ip.obj_func_leak_power = 0;
interface_ip.obj_func_cycle_t = 1;
interface_ip.num_rw_ports = core_params.freelist_rw_ports;
interface_ip.num_rd_ports = core_params.decodeW;
interface_ip.num_wr_ports =
core_params.decodeW - 1 + core_params.commitW;
interface_ip.num_se_rd_ports = 0;
interface_ip.num_search_ports = 0;
interface_ip.is_cache = false;
interface_ip.pure_cam = false;
interface_ip.pure_ram = true;
interface_ip.throughput = 1.0 / clockRate;
interface_ip.latency = 1.0 / clockRate;
ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List",
Core_device, clockRate, core_params.opt_local,
core_params.core_ty);
ifreeL->output_data.area *= core_params.num_hthreads;
area.set_area(area.get_area() + ifreeL->area.get_area());
}
}
idcl =
new dep_resource_conflict_check(xml_data,
"Instruction Dependency Check?",
&interface_ip, core_params,
core_params.phy_ireg_width,
clockRate);
fdcl =
new dep_resource_conflict_check(xml_data,
"FP Dependency Check?", &interface_ip,
core_params,
core_params.phy_freg_width, clockRate);
}
Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_)
: McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL),
exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL),
ithCore(_ithCore), interface_ip(*interface_ip_) {
ostringstream os;
os << ithCore;
name = "Core " + os.str();
int i = 0;
XMLNode* childXML;
for (i = 0; i < xml_data->nChildNode("component"); i++) {
childXML = xml_data->getChildNodePtr("component", &i);
XMLCSTR type = childXML->getAttribute("type");
if (!type)
warnMissingComponentType(childXML->getAttribute("id"));
STRCMP(type, "CacheUnit") {
XMLCSTR comp_name = childXML->getAttribute("id");
if (!comp_name)
continue;
STRCMP(comp_name, "system.L20") {
l2cache = new CacheUnit(childXML, &interface_ip);
children.push_back(l2cache);
}
}
}
set_core_param();
clockRate = core_params.clockRate;
ifu = new InstFetchU(xml_data, &interface_ip, core_params,
core_stats);
children.push_back(ifu);
lsu = new LoadStoreU(xml_data, &interface_ip, core_params,
core_stats);
children.push_back(lsu);
mmu = new MemManU(xml_data, &interface_ip, core_params,
core_stats);
children.push_back(mmu);
exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height,
core_params, core_stats);
children.push_back(exu);
undiffCore = new UndiffCore(xml_data, &interface_ip, core_params);
children.push_back(undiffCore);
if (core_params.core_ty == OOO) {
rnu = new RENAMINGU(xml_data, &interface_ip, core_params,
core_stats);
children.push_back(rnu);
}
corepipe = new Pipeline(xml_data, &interface_ip, core_params);
children.push_back(corepipe);
double pipeline_area_per_unit;
if (core_params.core_ty == OOO) {
pipeline_area_per_unit = (corepipe->area.get_area() *
core_params.num_pipelines) / 5.0;
if (rnu->exist) {
rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit);
}
} else {
pipeline_area_per_unit = (corepipe->area.get_area() *
core_params.num_pipelines) / 4.0;
}
// Move all of this to computeArea
//area.set_area(area.get_area()+ corepipe->area.get_area());
if (ifu->exist) {
ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + ifu->area.get_area());
}
if (lsu->exist) {
lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + lsu->area.get_area());
}
if (exu->exist) {
exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + exu->area.get_area());
}
if (mmu->exist) {
mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit);
area.set_area(area.get_area() + mmu->area.get_area());
}
if (core_params.core_ty == OOO) {
if (rnu->exist) {
area.set_area(area.get_area() + rnu->area.get_area());
}
}
if (undiffCore->exist) {
area.set_area(area.get_area() + undiffCore->area.get_area());
}
if (l2cache) {
area.set_area(area.get_area() + l2cache->area.get_area());
}
}
void BranchPredictor::computeEnergy() {
if (!exist) return;
// ASSUMPTION: All instructions access the branch predictors at Fetch and
// only branch instrucions update the predictors regardless
// of the correctness of the prediction.
double tdp_read_accesses =
core_params.predictionW * core_stats.BR_duty_cycle;
globalBPT->tdp_stats.reset();
globalBPT->tdp_stats.readAc.access = tdp_read_accesses;
globalBPT->tdp_stats.writeAc.access = 0;
globalBPT->rtp_stats.reset();
globalBPT->rtp_stats.readAc.access = core_stats.total_instructions;
globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
globalBPT->power_t.reset();
globalBPT->power_t.readOp.dynamic +=
globalBPT->local_result.power.readOp.dynamic *
globalBPT->tdp_stats.readAc.access +
globalBPT->local_result.power.writeOp.dynamic *
globalBPT->tdp_stats.writeAc.access;
globalBPT->power_t = globalBPT->power_t +
globalBPT->local_result.power * pppm_lkg;
globalBPT->rt_power.reset();
globalBPT->rt_power.readOp.dynamic +=
globalBPT->local_result.power.readOp.dynamic *
globalBPT->rtp_stats.readAc.access +
globalBPT->local_result.power.writeOp.dynamic *
globalBPT->rtp_stats.writeAc.access;
L1_localBPT->tdp_stats.reset();
L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
L1_localBPT->tdp_stats.writeAc.access = 0;
L1_localBPT->rtp_stats.reset();
L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions;
L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
L1_localBPT->power_t.reset();
L1_localBPT->power_t.readOp.dynamic +=
L1_localBPT->local_result.power.readOp.dynamic *
L1_localBPT->tdp_stats.readAc.access +
L1_localBPT->local_result.power.writeOp.dynamic *
L1_localBPT->tdp_stats.writeAc.access;
L1_localBPT->power_t = L1_localBPT->power_t +
L1_localBPT->local_result.power * pppm_lkg;
L1_localBPT->rt_power.reset();
L1_localBPT->rt_power.readOp.dynamic +=
L1_localBPT->local_result.power.readOp.dynamic *
L1_localBPT->rtp_stats.readAc.access +
L1_localBPT->local_result.power.writeOp.dynamic *
L1_localBPT->rtp_stats.writeAc.access;
L2_localBPT->tdp_stats.reset();
L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses;
L2_localBPT->tdp_stats.writeAc.access = 0;
L2_localBPT->rtp_stats.reset();
L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions;
L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions;
L2_localBPT->power_t.reset();
L2_localBPT->power_t.readOp.dynamic +=
L2_localBPT->local_result.power.readOp.dynamic *
L2_localBPT->tdp_stats.readAc.access +
L2_localBPT->local_result.power.writeOp.dynamic *
L2_localBPT->tdp_stats.writeAc.access;
L2_localBPT->power_t = L2_localBPT->power_t +
L2_localBPT->local_result.power * pppm_lkg;
L2_localBPT->rt_power.reset();
L2_localBPT->rt_power.readOp.dynamic +=
L2_localBPT->local_result.power.readOp.dynamic *
L2_localBPT->rtp_stats.readAc.access +
L2_localBPT->local_result.power.writeOp.dynamic *
L2_localBPT->rtp_stats.writeAc.access;
chooser->tdp_stats.reset();
chooser->tdp_stats.readAc.access = tdp_read_accesses;
chooser->tdp_stats.writeAc.access = 0;
chooser->rtp_stats.reset();
chooser->rtp_stats.readAc.access = core_stats.total_instructions;
chooser->rtp_stats.writeAc.access = core_stats.branch_instructions;
chooser->power_t.reset();
chooser->power_t.readOp.dynamic +=
chooser->local_result.power.readOp.dynamic *
chooser->tdp_stats.readAc.access +
chooser->local_result.power.writeOp.dynamic *
chooser->tdp_stats.writeAc.access;
chooser->power_t =
chooser->power_t + chooser->local_result.power * pppm_lkg;
chooser->rt_power.reset();
chooser->rt_power.readOp.dynamic +=
chooser->local_result.power.readOp.dynamic *
chooser->rtp_stats.readAc.access +
chooser->local_result.power.writeOp.dynamic *
chooser->rtp_stats.writeAc.access;
RAS->tdp_stats.reset();
RAS->tdp_stats.readAc.access = tdp_read_accesses;
RAS->tdp_stats.writeAc.access = 0;
RAS->rtp_stats.reset();
RAS->rtp_stats.readAc.access = core_stats.function_calls;
RAS->rtp_stats.writeAc.access = core_stats.function_calls;
RAS->power_t.reset();
RAS->power_t.readOp.dynamic +=
RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access +
RAS->local_result.power.writeOp.dynamic *
RAS->tdp_stats.writeAc.access;
RAS->power_t = RAS->power_t + RAS->local_result.power *
core_params.pppm_lkg_multhread;
RAS->rt_power.reset();
RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic *
RAS->rtp_stats.readAc.access +
RAS->local_result.power.writeOp.dynamic *
RAS->rtp_stats.writeAc.access;
output_data.reset();
if (globalBPT) {
globalBPT->output_data.peak_dynamic_power =
globalBPT->power_t.readOp.dynamic * clockRate;
globalBPT->output_data.runtime_dynamic_energy =
globalBPT->rt_power.readOp.dynamic;
output_data += globalBPT->output_data;
}
if (L1_localBPT) {
L1_localBPT->output_data.peak_dynamic_power =
L1_localBPT->power_t.readOp.dynamic * clockRate;
L1_localBPT->output_data.runtime_dynamic_energy =
L1_localBPT->rt_power.readOp.dynamic;
output_data += L1_localBPT->output_data;
}
if (L2_localBPT) {
L2_localBPT->output_data.peak_dynamic_power =
L2_localBPT->power_t.readOp.dynamic * clockRate;
L2_localBPT->output_data.runtime_dynamic_energy =
L2_localBPT->rt_power.readOp.dynamic;
output_data += L2_localBPT->output_data;
}
if (chooser) {
chooser->output_data.peak_dynamic_power =
chooser->power_t.readOp.dynamic * clockRate;
chooser->output_data.runtime_dynamic_energy =
chooser->rt_power.readOp.dynamic;
output_data += chooser->output_data;
}
if (RAS) {
RAS->output_data.peak_dynamic_power =
RAS->power_t.readOp.dynamic * clockRate;
RAS->output_data.subthreshold_leakage_power =
RAS->power_t.readOp.leakage * core_params.num_hthreads;
RAS->output_data.gate_leakage_power =
RAS->power_t.readOp.gate_leakage * core_params.num_hthreads;
RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic;
output_data += RAS->output_data;
}
}
void BranchPredictor::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
globalBPT->displayData(indent + 4, plevel);
L1_localBPT->displayData(indent + 4, plevel);
L2_localBPT->displayData(indent + 4, plevel);
chooser->displayData(indent + 4, plevel);
RAS->displayData(indent + 4, plevel);
}
void InstFetchU::computeEnergy() {
if (!exist) return;
if (BPT) {
BPT->computeEnergy();
}
IB->tdp_stats.reset();
IB->tdp_stats.readAc.access = core_params.peak_issueW;
IB->tdp_stats.writeAc.access = core_params.peak_issueW;
IB->rtp_stats.reset();
IB->rtp_stats.readAc.access = core_stats.total_instructions;
IB->rtp_stats.writeAc.access = core_stats.total_instructions;
IB->power_t.reset();
IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic *
IB->tdp_stats.readAc.access +
IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access;
IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg;
IB->rt_power.reset();
IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic *
IB->rtp_stats.readAc.access +
IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access;
if (core_params.predictionW > 0) {
BTB->tdp_stats.reset();
BTB->tdp_stats.readAc.access = core_params.predictionW;
BTB->tdp_stats.writeAc.access = 0;
BTB->rtp_stats.reset();
BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses;
BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses;
BTB->power_t.reset();
BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic *
BTB->tdp_stats.readAc.access +
BTB->local_result.power.writeOp.dynamic *
BTB->tdp_stats.writeAc.access;
BTB->rt_power.reset();
BTB->rt_power.readOp.dynamic +=
BTB->local_result.power.readOp.dynamic *
BTB->rtp_stats.readAc.access +
BTB->local_result.power.writeOp.dynamic *
BTB->rtp_stats.writeAc.access;
}
ID_inst->tdp_stats.reset();
ID_inst->tdp_stats.readAc.access = core_params.decodeW;
ID_inst->power_t.reset();
ID_inst->power_t = ID_misc->power;
ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic *
ID_inst->tdp_stats.readAc.access;
ID_inst->rtp_stats.reset();
ID_inst->rtp_stats.readAc.access = core_stats.total_instructions;
ID_inst->rt_power.reset();
ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic *
ID_inst->rtp_stats.readAc.access;
ID_operand->tdp_stats.reset();
ID_operand->tdp_stats.readAc.access = core_params.decodeW;
ID_operand->power_t.reset();
ID_operand->power_t = ID_misc->power;
ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic *
ID_operand->tdp_stats.readAc.access;
ID_operand->rtp_stats.reset();
ID_operand->rtp_stats.readAc.access = core_stats.total_instructions;
ID_operand->rt_power.reset();
ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic *
ID_operand->rtp_stats.readAc.access;
ID_misc->tdp_stats.reset();
ID_misc->tdp_stats.readAc.access = core_params.decodeW;
ID_misc->power_t.reset();
ID_misc->power_t = ID_misc->power;
ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic *
ID_misc->tdp_stats.readAc.access;
ID_misc->rtp_stats.reset();
ID_misc->rtp_stats.readAc.access = core_stats.total_instructions;
ID_misc->rt_power.reset();
ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic *
ID_misc->rtp_stats.readAc.access;
power.reset();
rt_power.reset();
McPATComponent::computeEnergy();
output_data.reset();
if (icache) {
output_data += icache->output_data;
}
if (IB) {
IB->output_data.peak_dynamic_power =
IB->power_t.readOp.dynamic * clockRate;
IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic;
output_data += IB->output_data;
}
if (BTB) {
BTB->output_data.peak_dynamic_power =
BTB->power_t.readOp.dynamic * clockRate;
BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic;
output_data += BTB->output_data;
}
if (BPT) {
output_data += BPT->output_data;
}
if (ID_inst) {
ID_inst->output_data.peak_dynamic_power =
ID_inst->power_t.readOp.dynamic * clockRate;
ID_inst->output_data.runtime_dynamic_energy =
ID_inst->rt_power.readOp.dynamic;
output_data += ID_inst->output_data;
}
if (ID_operand) {
ID_operand->output_data.peak_dynamic_power =
ID_operand->power_t.readOp.dynamic * clockRate;
ID_operand->output_data.runtime_dynamic_energy =
ID_operand->rt_power.readOp.dynamic;
output_data += ID_operand->output_data;
}
if (ID_misc) {
ID_misc->output_data.peak_dynamic_power =
ID_misc->power_t.readOp.dynamic * clockRate;
ID_misc->output_data.runtime_dynamic_energy =
ID_misc->rt_power.readOp.dynamic;
output_data += ID_misc->output_data;
}
}
void InstFetchU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
if (core_params.predictionW > 0) {
BTB->displayData(indent + 4, plevel);
if (BPT->exist) {
BPT->displayData(indent + 4, plevel);
}
}
IB->displayData(indent + 4, plevel);
ID_inst->displayData(indent + 4, plevel);
ID_operand->displayData(indent + 4, plevel);
ID_misc->displayData(indent + 4, plevel);
}
void RENAMINGU::computeEnergy() {
if (!exist) return;
idcl->tdp_stats.reset();
idcl->rtp_stats.reset();
idcl->power_t.reset();
idcl->rt_power.reset();
if (core_params.core_ty == OOO) {
idcl->tdp_stats.readAc.access = core_params.decodeW;
idcl->rtp_stats.readAc.access = 3 * core_params.decodeW *
core_params.decodeW * core_stats.rename_reads;
} else if (core_params.issueW > 1) {
idcl->tdp_stats.readAc.access = core_params.decodeW;
idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions;
}
idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access *
idcl->power.readOp.dynamic;
idcl->power_t.readOp.leakage = idcl->power.readOp.leakage *
core_params.num_hthreads;
idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage *
core_params.num_hthreads;
idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access *
idcl->power.readOp.dynamic;
fdcl->tdp_stats.reset();
fdcl->rtp_stats.reset();
fdcl->power_t.reset();
fdcl->rt_power.reset();
if (core_params.core_ty == OOO) {
fdcl->tdp_stats.readAc.access = core_params.decodeW;
fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW *
core_params.fp_issueW * core_stats.fp_rename_writes;
} else if (core_params.issueW > 1) {
fdcl->tdp_stats.readAc.access = core_params.decodeW;
fdcl->rtp_stats.readAc.access = core_stats.fp_instructions;
}
fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access *
fdcl->power.readOp.dynamic;
fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage *
core_params.num_hthreads;
fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage *
core_params.num_hthreads;
fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access *
fdcl->power.readOp.dynamic;
if (iRRAT) {
iRRAT->tdp_stats.reset();
iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports;
iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports;
iRRAT->rtp_stats.reset();
iRRAT->rtp_stats.readAc.access = core_stats.rename_writes;
iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
iRRAT->power_t.reset();
iRRAT->power_t.readOp.dynamic +=
iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic +
iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
iRRAT->rt_power.reset();
iRRAT->rt_power.readOp.dynamic +=
iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic +
iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic;
iRRAT->power_t.readOp.leakage =
iRRAT->power.readOp.leakage * core_params.num_hthreads;
iRRAT->power_t.readOp.gate_leakage =
iRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
}
if (ifreeL) {
ifreeL->tdp_stats.reset();
ifreeL->tdp_stats.readAc.access = core_params.decodeW;
ifreeL->tdp_stats.writeAc.access = core_params.decodeW;
ifreeL->rtp_stats.reset();
if (core_params.scheu_ty == PhysicalRegFile) {
ifreeL->rtp_stats.readAc.access = core_stats.rename_reads;
ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes;
} else if (core_params.scheu_ty == ReservationStation) {
ifreeL->rtp_stats.readAc.access =
core_stats.rename_reads + core_stats.fp_rename_reads;
ifreeL->rtp_stats.writeAc.access =
2 * (core_stats.rename_writes + core_stats.fp_rename_writes);
}
ifreeL->power_t.reset();
ifreeL->power_t.readOp.dynamic +=
ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic +
ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
ifreeL->rt_power.reset();
ifreeL->rt_power.readOp.dynamic +=
ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic +
ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic;
ifreeL->power_t.readOp.leakage =
ifreeL->power.readOp.leakage * core_params.num_hthreads;
ifreeL->power_t.readOp.gate_leakage =
ifreeL->power.readOp.gate_leakage * core_params.num_hthreads;
}
if (fRRAT) {
fRRAT->tdp_stats.reset();
fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports;
fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports;
fRRAT->rtp_stats.reset();
fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes;
fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
fRRAT->power_t.reset();
fRRAT->power_t.readOp.dynamic +=
fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic +
fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
fRRAT->rt_power.reset();
fRRAT->rt_power.readOp.dynamic +=
fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic +
fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic;
fRRAT->power_t.readOp.leakage =
fRRAT->power.readOp.leakage * core_params.num_hthreads;
fRRAT->power_t.readOp.gate_leakage =
fRRAT->power.readOp.gate_leakage * core_params.num_hthreads;
}
if (ffreeL) {
ffreeL->tdp_stats.reset();
ffreeL->tdp_stats.readAc.access = core_params.decodeW;
ffreeL->tdp_stats.writeAc.access = core_params.decodeW;
ffreeL->rtp_stats.reset();
ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads;
ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes;
ffreeL->power_t.reset();
ffreeL->power_t.readOp.dynamic +=
ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic +
ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
ffreeL->rt_power.reset();
ffreeL->rt_power.readOp.dynamic +=
ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic +
ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic;
ffreeL->power_t.readOp.leakage =
ffreeL->power.readOp.leakage * core_params.num_hthreads;
ffreeL->power_t.readOp.gate_leakage =
ffreeL->power.readOp.gate_leakage * core_params.num_hthreads;
}
if (iFRAT) {
tdp_stats.reset();
if (core_params.rm_ty == RAMbased) {
iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports;
iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports;
} else if ((core_params.rm_ty == CAMbased)) {
iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports;
iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports;
}
rtp_stats.reset();
iFRAT->rtp_stats.readAc.access = core_stats.rename_reads;
iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes;
if (core_params.scheu_ty == ReservationStation &&
core_params.rm_ty == RAMbased) {
iFRAT->rtp_stats.searchAc.access =
core_stats.committed_int_instructions;
}
iFRAT->power_t.reset();
iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access
* (iFRAT->local_result.power.readOp.dynamic
+ idcl->power.readOp.dynamic)
+ iFRAT->tdp_stats.writeAc.access
* iFRAT->local_result.power.writeOp.dynamic
+ iFRAT->tdp_stats.searchAc.access
* iFRAT->local_result.power.searchOp.dynamic;
iFRAT->power_t.readOp.leakage =
iFRAT->power.readOp.leakage * core_params.num_hthreads;
iFRAT->power_t.readOp.gate_leakage =
iFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
iFRAT->rt_power.reset();
iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access
* (iFRAT->local_result.power.readOp.dynamic
+ idcl->power.readOp.dynamic)
+ iFRAT->rtp_stats.writeAc.access
* iFRAT->local_result.power.writeOp.dynamic
+ iFRAT->rtp_stats.searchAc.access
* iFRAT->local_result.power.searchOp.dynamic;
}
if (fFRAT) {
tdp_stats.reset();
fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports;
if ((core_params.rm_ty == CAMbased)) {
fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports;
} else if (core_params.rm_ty == RAMbased) {
fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports;
if (core_params.scheu_ty == ReservationStation) {
fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports;
}
}
rtp_stats.reset();
fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads;
fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes;
if (core_params.scheu_ty == ReservationStation &&
core_params.rm_ty == RAMbased) {
fFRAT->rtp_stats.searchAc.access =
core_stats.committed_fp_instructions;
}
fFRAT->power_t.reset();
fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access
* (fFRAT->local_result.power.readOp.dynamic
+ fdcl->power.readOp.dynamic)
+ fFRAT->tdp_stats.writeAc.access
* fFRAT->local_result.power.writeOp.dynamic
+ fFRAT->tdp_stats.searchAc.access
* fFRAT->local_result.power.searchOp.dynamic;
fFRAT->power_t.readOp.leakage =
fFRAT->power.readOp.leakage * core_params.num_hthreads;
fFRAT->power_t.readOp.gate_leakage =
fFRAT->power.readOp.gate_leakage * core_params.num_hthreads;
fFRAT->rt_power.reset();
fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access
* (fFRAT->local_result.power.readOp.dynamic
+ fdcl->power.readOp.dynamic)
+ fFRAT->rtp_stats.writeAc.access
* fFRAT->local_result.power.writeOp.dynamic
+ fFRAT->rtp_stats.searchAc.access
* fFRAT->local_result.power.searchOp.dynamic;
}
output_data.reset();
if (iFRAT) {
iFRAT->output_data.peak_dynamic_power =
iFRAT->power_t.readOp.dynamic * clockRate;
iFRAT->output_data.subthreshold_leakage_power =
iFRAT->power_t.readOp.leakage;
iFRAT->output_data.gate_leakage_power =
iFRAT->power_t.readOp.gate_leakage;
iFRAT->output_data.runtime_dynamic_energy =
iFRAT->rt_power.readOp.dynamic;
output_data += iFRAT->output_data;
}
if (fFRAT) {
fFRAT->output_data.peak_dynamic_power =
fFRAT->power_t.readOp.dynamic * clockRate;
fFRAT->output_data.subthreshold_leakage_power =
fFRAT->power_t.readOp.leakage;
fFRAT->output_data.gate_leakage_power =
fFRAT->power_t.readOp.gate_leakage;
fFRAT->output_data.runtime_dynamic_energy =
fFRAT->rt_power.readOp.dynamic;
output_data += fFRAT->output_data;
}
if (iRRAT) {
iRRAT->output_data.peak_dynamic_power =
iRRAT->power_t.readOp.dynamic * clockRate;
iRRAT->output_data.subthreshold_leakage_power =
iRRAT->power_t.readOp.leakage;
iRRAT->output_data.gate_leakage_power =
iRRAT->power_t.readOp.gate_leakage;
iRRAT->output_data.runtime_dynamic_energy =
iRRAT->rt_power.readOp.dynamic;
output_data += iRRAT->output_data;
}
if (fRRAT) {
fRRAT->output_data.peak_dynamic_power =
fRRAT->power_t.readOp.dynamic * clockRate;
fRRAT->output_data.subthreshold_leakage_power =
fRRAT->power_t.readOp.leakage;
fRRAT->output_data.gate_leakage_power =
fRRAT->power_t.readOp.gate_leakage;
fRRAT->output_data.runtime_dynamic_energy =
fRRAT->rt_power.readOp.dynamic;
output_data += fRRAT->output_data;
}
if (ifreeL) {
ifreeL->output_data.peak_dynamic_power =
ifreeL->power_t.readOp.dynamic * clockRate;
ifreeL->output_data.subthreshold_leakage_power =
ifreeL->power_t.readOp.leakage;
ifreeL->output_data.gate_leakage_power =
ifreeL->power_t.readOp.gate_leakage;
ifreeL->output_data.runtime_dynamic_energy =
ifreeL->rt_power.readOp.dynamic;
output_data += ifreeL->output_data;
}
if (ffreeL) {
ffreeL->output_data.peak_dynamic_power =
ffreeL->power_t.readOp.dynamic * clockRate;
ffreeL->output_data.subthreshold_leakage_power =
ffreeL->power_t.readOp.leakage;
ffreeL->output_data.gate_leakage_power =
ffreeL->power_t.readOp.gate_leakage;
ffreeL->output_data.runtime_dynamic_energy =
ffreeL->rt_power.readOp.dynamic;
output_data += ffreeL->output_data;
}
if (idcl) {
idcl->output_data.peak_dynamic_power =
idcl->power_t.readOp.dynamic * clockRate;
idcl->output_data.subthreshold_leakage_power =
idcl->power_t.readOp.leakage;
idcl->output_data.gate_leakage_power =
idcl->power_t.readOp.gate_leakage;
idcl->output_data.runtime_dynamic_energy =
idcl->rt_power.readOp.dynamic;
output_data += idcl->output_data;
}
if (fdcl) {
fdcl->output_data.peak_dynamic_power =
fdcl->power_t.readOp.dynamic * clockRate;
fdcl->output_data.subthreshold_leakage_power =
fdcl->power_t.readOp.leakage;
fdcl->output_data.gate_leakage_power =
fdcl->power_t.readOp.gate_leakage;
fdcl->output_data.runtime_dynamic_energy =
fdcl->rt_power.readOp.dynamic;
output_data += fdcl->output_data;
}
if (RAHT) {
output_data += RAHT->output_data;
}
}
void RENAMINGU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
if (core_params.core_ty == OOO) {
iFRAT->displayData(indent + 4, plevel);
fFRAT->displayData(indent + 4, plevel);
ifreeL->displayData(indent + 4, plevel);
if (core_params.scheu_ty == PhysicalRegFile) {
iRRAT->displayData(indent + 4, plevel);
fRRAT->displayData(indent + 4, plevel);
ffreeL->displayData(indent + 4, plevel);
}
}
idcl->displayData(indent + 4, plevel);
fdcl->displayData(indent + 4, plevel);
}
void SchedulerU::computeEnergy() {
if (!exist) return;
double ROB_duty_cycle;
ROB_duty_cycle = 1;
if (int_instruction_selection) {
int_instruction_selection->computeEnergy();
}
if (fp_instruction_selection) {
fp_instruction_selection->computeEnergy();
}
if (int_inst_window) {
int_inst_window->tdp_stats.reset();
int_inst_window->rtp_stats.reset();
int_inst_window->power_t.reset();
int_inst_window->rt_power.reset();
if (core_params.core_ty == OOO) {
int_inst_window->tdp_stats.readAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->tdp_stats.writeAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->tdp_stats.searchAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->power_t.readOp.dynamic +=
int_inst_window->local_result.power.readOp.dynamic *
int_inst_window->tdp_stats.readAc.access +
int_inst_window->local_result.power.searchOp.dynamic *
int_inst_window->tdp_stats.searchAc.access +
int_inst_window->local_result.power.writeOp.dynamic *
int_inst_window->tdp_stats.writeAc.access;
int_inst_window->rtp_stats.readAc.access =
core_stats.inst_window_reads;
int_inst_window->rtp_stats.writeAc.access =
core_stats.inst_window_writes;
int_inst_window->rtp_stats.searchAc.access =
core_stats.inst_window_wakeup_accesses;
int_inst_window->rt_power.readOp.dynamic +=
int_inst_window->local_result.power.readOp.dynamic *
int_inst_window->rtp_stats.readAc.access +
int_inst_window->local_result.power.searchOp.dynamic *
int_inst_window->rtp_stats.searchAc.access +
int_inst_window->local_result.power.writeOp.dynamic *
int_inst_window->rtp_stats.writeAc.access;
} else if (core_params.multithreaded) {
int_inst_window->tdp_stats.readAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->tdp_stats.writeAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->tdp_stats.searchAc.access =
core_params.issueW * core_params.num_pipelines;
int_inst_window->power_t.readOp.dynamic +=
int_inst_window->local_result.power.readOp.dynamic *
int_inst_window->tdp_stats.readAc.access +
int_inst_window->local_result.power.searchOp.dynamic *
int_inst_window->tdp_stats.searchAc.access +
int_inst_window->local_result.power.writeOp.dynamic *
int_inst_window->tdp_stats.writeAc.access;
int_inst_window->rtp_stats.readAc.access =
core_stats.int_instructions + core_stats.fp_instructions;
int_inst_window->rtp_stats.writeAc.access =
core_stats.int_instructions + core_stats.fp_instructions;
int_inst_window->rtp_stats.searchAc.access =
2 * (core_stats.int_instructions + core_stats.fp_instructions);
int_inst_window->rt_power.readOp.dynamic +=
int_inst_window->local_result.power.readOp.dynamic *
int_inst_window->rtp_stats.readAc.access +
int_inst_window->local_result.power.searchOp.dynamic *
int_inst_window->rtp_stats.searchAc.access +
int_inst_window->local_result.power.writeOp.dynamic *
int_inst_window->rtp_stats.writeAc.access;
}
}
if (fp_inst_window) {
fp_inst_window->tdp_stats.reset();
fp_inst_window->tdp_stats.readAc.access =
fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines;
fp_inst_window->tdp_stats.writeAc.access =
fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines;
fp_inst_window->tdp_stats.searchAc.access =
fp_inst_window->l_ip.num_search_ports *
core_params.num_fp_pipelines;
fp_inst_window->rtp_stats.reset();
fp_inst_window->rtp_stats.readAc.access =
core_stats.fp_inst_window_reads;
fp_inst_window->rtp_stats.writeAc.access =
core_stats.fp_inst_window_writes;
fp_inst_window->rtp_stats.searchAc.access =
core_stats.fp_inst_window_wakeup_accesses;
fp_inst_window->power_t.reset();
fp_inst_window->power_t.readOp.dynamic +=
fp_inst_window->power.readOp.dynamic *
fp_inst_window->tdp_stats.readAc.access +
fp_inst_window->power.searchOp.dynamic *
fp_inst_window->tdp_stats.searchAc.access +
fp_inst_window->power.writeOp.dynamic *
fp_inst_window->tdp_stats.writeAc.access;
fp_inst_window->rt_power.reset();
fp_inst_window->rt_power.readOp.dynamic +=
fp_inst_window->power.readOp.dynamic *
fp_inst_window->rtp_stats.readAc.access +
fp_inst_window->power.searchOp.dynamic *
fp_inst_window->rtp_stats.searchAc.access +
fp_inst_window->power.writeOp.dynamic *
fp_inst_window->rtp_stats.writeAc.access;
}
if (ROB) {
ROB->tdp_stats.reset();
ROB->tdp_stats.readAc.access = core_params.commitW *
core_params.num_pipelines * ROB_duty_cycle;
ROB->tdp_stats.writeAc.access = core_params.issueW *
core_params.num_pipelines * ROB_duty_cycle;
ROB->rtp_stats.reset();
ROB->rtp_stats.readAc.access = core_stats.ROB_reads;
ROB->rtp_stats.writeAc.access = core_stats.ROB_writes;
ROB->power_t.reset();
ROB->power_t.readOp.dynamic +=
ROB->local_result.power.readOp.dynamic *
ROB->tdp_stats.readAc.access +
ROB->local_result.power.writeOp.dynamic *
ROB->tdp_stats.writeAc.access;
ROB->rt_power.reset();
ROB->rt_power.readOp.dynamic +=
ROB->local_result.power.readOp.dynamic *
ROB->rtp_stats.readAc.access +
ROB->local_result.power.writeOp.dynamic *
ROB->rtp_stats.writeAc.access;
}
output_data.reset();
if (int_inst_window) {
int_inst_window->output_data.subthreshold_leakage_power =
int_inst_window->power_t.readOp.leakage;
int_inst_window->output_data.gate_leakage_power =
int_inst_window->power_t.readOp.gate_leakage;
int_inst_window->output_data.peak_dynamic_power =
int_inst_window->power_t.readOp.dynamic * clockRate;
int_inst_window->output_data.runtime_dynamic_energy =
int_inst_window->rt_power.readOp.dynamic;
output_data += int_inst_window->output_data;
}
if (fp_inst_window) {
fp_inst_window->output_data.subthreshold_leakage_power =
fp_inst_window->power_t.readOp.leakage;
fp_inst_window->output_data.gate_leakage_power =
fp_inst_window->power_t.readOp.gate_leakage;
fp_inst_window->output_data.peak_dynamic_power =
fp_inst_window->power_t.readOp.dynamic * clockRate;
fp_inst_window->output_data.runtime_dynamic_energy =
fp_inst_window->rt_power.readOp.dynamic;
output_data += fp_inst_window->output_data;
}
if (ROB) {
ROB->output_data.peak_dynamic_power =
ROB->power_t.readOp.dynamic * clockRate;
ROB->output_data.runtime_dynamic_energy =
ROB->rt_power.readOp.dynamic;
output_data += ROB->output_data;
}
// Integer and FP instruction selection logic is not included in the
// roll-up due to the uninitialized area
/*
if (int_instruction_selection) {
output_data += int_instruction_selection->output_data;
}
if (fp_instruction_selection) {
output_data += fp_instruction_selection->output_data;
}
*/
}
void SchedulerU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
if (core_params.core_ty == OOO) {
int_inst_window->displayData(indent + 4, plevel);
fp_inst_window->displayData(indent + 4, plevel);
if (core_params.ROB_size > 0) {
ROB->displayData(indent + 4, plevel);
}
} else if (core_params.multithreaded) {
int_inst_window->displayData(indent + 4, plevel);
}
// Integer and FP instruction selection logic is not included in the
// roll-up due to the uninitialized area
/*
if (int_instruction_selection) {
int_instruction_selection->displayData(indent + 4, plevel);
}
if (fp_instruction_selection) {
fp_instruction_selection->displayData(indent + 4, plevel);
}
*/
}
void LoadStoreU::computeEnergy() {
if (!exist) return;
LSQ->tdp_stats.reset();
LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports *
core_stats.LSU_duty_cycle;
LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports *
core_stats.LSU_duty_cycle;
LSQ->rtp_stats.reset();
// Flush overhead conidered
LSQ->rtp_stats.readAc.access = (core_stats.load_instructions +
core_stats.store_instructions) * 2;
LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions +
core_stats.store_instructions) * 2;
LSQ->power_t.reset();
//every memory access invloves at least two operations on LSQ
LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access *
(LSQ->local_result.power.searchOp.dynamic +
LSQ->local_result.power.readOp.dynamic) +
LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
LSQ->rt_power.reset();
//every memory access invloves at least two operations on LSQ
LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access *
(LSQ->local_result.power.searchOp.dynamic +
LSQ->local_result.power.readOp.dynamic) +
LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic;
if (LoadQ) {
LoadQ->tdp_stats.reset();
LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports *
core_stats.LSU_duty_cycle;
LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports *
core_stats.LSU_duty_cycle;
LoadQ->rtp_stats.reset();
LoadQ->rtp_stats.readAc.access = core_stats.load_instructions +
core_stats.store_instructions;
LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions +
core_stats.store_instructions;
LoadQ->power_t.reset();
//every memory access invloves at least two operations on LoadQ
LoadQ->power_t.readOp.dynamic +=
LoadQ->tdp_stats.readAc.access *
(LoadQ->local_result.power.searchOp.dynamic +
LoadQ->local_result.power.readOp.dynamic) +
LoadQ->tdp_stats.writeAc.access *
LoadQ->local_result.power.writeOp.dynamic;
LoadQ->rt_power.reset();
//every memory access invloves at least two operations on LoadQ
LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access *
(LoadQ->local_result.power.searchOp.dynamic +
LoadQ->local_result.power.readOp.dynamic) +
LoadQ->rtp_stats.writeAc.access *
LoadQ->local_result.power.writeOp.dynamic;
}
McPATComponent::computeEnergy();
output_data.reset();
if (dcache) {
output_data += dcache->output_data;
}
if (LSQ) {
LSQ->output_data.peak_dynamic_power =
LSQ->power_t.readOp.dynamic * clockRate;
LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic;
output_data += LSQ->output_data;
}
if (LoadQ) {
LoadQ->output_data.peak_dynamic_power =
LoadQ->power_t.readOp.dynamic * clockRate;
LoadQ->output_data.runtime_dynamic_energy =
LoadQ->rt_power.readOp.dynamic;
output_data += LoadQ->output_data;
}
}
void LoadStoreU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
if (LoadQ) {
LoadQ->displayData(indent + 4, plevel);
}
LSQ->displayData(indent + 4, plevel);
}
void MemManU::computeEnergy() {
if (!exist) return;
itlb->tdp_stats.reset();
itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports;
itlb->tdp_stats.readAc.miss = 0;
itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access -
itlb->tdp_stats.readAc.miss;
itlb->rtp_stats.reset();
itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses;
itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses;
itlb->power_t.reset();
//FA spent most power in tag, so use total access not hits
itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access *
itlb->local_result.power.searchOp.dynamic +
itlb->tdp_stats.readAc.miss *
itlb->local_result.power.writeOp.dynamic;
itlb->rt_power.reset();
//FA spent most power in tag, so use total access not hits
itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access *
itlb->local_result.power.searchOp.dynamic +
itlb->rtp_stats.writeAc.access *
itlb->local_result.power.writeOp.dynamic;
dtlb->tdp_stats.reset();
dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports *
core_stats.LSU_duty_cycle;
dtlb->tdp_stats.readAc.miss = 0;
dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access -
dtlb->tdp_stats.readAc.miss;
dtlb->rtp_stats.reset();
dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses +
mem_man_stats.dtlb_write_misses;
dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses +
mem_man_stats.dtlb_read_misses;
dtlb->power_t.reset();
//FA spent most power in tag, so use total access not hits
dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access *
dtlb->local_result.power.searchOp.dynamic +
dtlb->tdp_stats.readAc.miss *
dtlb->local_result.power.writeOp.dynamic;
dtlb->rt_power.reset();
//FA spent most power in tag, so use total access not hits
dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access *
dtlb->local_result.power.searchOp.dynamic +
dtlb->rtp_stats.writeAc.access *
dtlb->local_result.power.writeOp.dynamic;
output_data.reset();
if (itlb) {
itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic *
clockRate;
itlb->output_data.runtime_dynamic_energy =
itlb->rt_power.readOp.dynamic;
output_data += itlb->output_data;
}
if (dtlb) {
dtlb->output_data.peak_dynamic_power =
dtlb->power_t.readOp.dynamic * clockRate;
dtlb->output_data.runtime_dynamic_energy =
dtlb->rt_power.readOp.dynamic;
output_data += dtlb->output_data;
}
}
void MemManU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
itlb->displayData(indent + 4, plevel);
dtlb->displayData(indent + 4, plevel);
}
void RegFU::computeEnergy() {
/*
* Architecture RF and physical RF cannot be present at the same time.
* Therefore, the RF stats can only refer to either ARF or PRF;
* And the same stats can be used for both.
*/
if (!exist) return;
IRF->tdp_stats.reset();
IRF->tdp_stats.readAc.access =
core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS *
(core_stats.ALU_duty_cycle * 1.1 +
(core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
core_params.num_pipelines;
IRF->tdp_stats.writeAc.access =
core_params.issueW *
(core_stats.ALU_duty_cycle * 1.1 +
(core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) *
core_params.num_pipelines;
IRF->rtp_stats.reset();
IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads;
IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes;
if (core_params.regWindowing) {
IRF->rtp_stats.readAc.access += core_stats.function_calls *
RFWIN_ACCESS_MULTIPLIER;
IRF->rtp_stats.writeAc.access += core_stats.function_calls *
RFWIN_ACCESS_MULTIPLIER;
}
IRF->power_t.reset();
IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access *
IRF->local_result.power.readOp.dynamic +
IRF->tdp_stats.writeAc.access *
IRF->local_result.power.writeOp.dynamic;
IRF->rt_power.reset();
IRF->rt_power.readOp.dynamic +=
IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic +
IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic;
FRF->tdp_stats.reset();
FRF->tdp_stats.readAc.access =
FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 *
core_params.num_fp_pipelines;
FRF->tdp_stats.writeAc.access =
FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 *
core_params.num_fp_pipelines;
FRF->rtp_stats.reset();
FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads;
FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes;
if (core_params.regWindowing) {
FRF->rtp_stats.readAc.access += core_stats.function_calls *
RFWIN_ACCESS_MULTIPLIER;
FRF->rtp_stats.writeAc.access += core_stats.function_calls *
RFWIN_ACCESS_MULTIPLIER;
}
FRF->power_t.reset();
FRF->power_t.readOp.dynamic +=
FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
FRF->rt_power.reset();
FRF->rt_power.readOp.dynamic +=
FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic +
FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic;
if (core_params.regWindowing) {
RFWIN->tdp_stats.reset();
RFWIN->tdp_stats.readAc.access = 0;
RFWIN->tdp_stats.writeAc.access = 0;
RFWIN->rtp_stats.reset();
RFWIN->rtp_stats.readAc.access =
core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
RFWIN->rtp_stats.writeAc.access =
core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER;
RFWIN->power_t.reset();
RFWIN->power_t.readOp.dynamic +=
RFWIN->tdp_stats.readAc.access *
RFWIN->local_result.power.readOp.dynamic +
RFWIN->tdp_stats.writeAc.access *
RFWIN->local_result.power.writeOp.dynamic;
RFWIN->rt_power.reset();
RFWIN->rt_power.readOp.dynamic +=
RFWIN->rtp_stats.readAc.access *
RFWIN->local_result.power.readOp.dynamic +
RFWIN->rtp_stats.writeAc.access *
RFWIN->local_result.power.writeOp.dynamic;
}
output_data.reset();
if (IRF) {
IRF->output_data.peak_dynamic_power =
IRF->power_t.readOp.dynamic * clockRate;
IRF->output_data.subthreshold_leakage_power *=
core_params.num_hthreads;
IRF->output_data.gate_leakage_power *= core_params.num_hthreads;
IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic;
output_data += IRF->output_data;
}
if (FRF) {
FRF->output_data.peak_dynamic_power =
FRF->power_t.readOp.dynamic * clockRate;
FRF->output_data.subthreshold_leakage_power *=
core_params.num_hthreads;
FRF->output_data.gate_leakage_power *= core_params.num_hthreads;
FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic;
output_data += FRF->output_data;
}
if (RFWIN) {
RFWIN->output_data.peak_dynamic_power =
RFWIN->power_t.readOp.dynamic * clockRate;
RFWIN->output_data.runtime_dynamic_energy =
RFWIN->rt_power.readOp.dynamic;
output_data += RFWIN->output_data;
}
}
void RegFU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
IRF->displayData(indent + 4, plevel);
FRF->displayData(indent + 4, plevel);
if (core_params.regWindowing) {
RFWIN->displayData(indent + 4, plevel);
}
}
void EXECU::computeEnergy() {
if (!exist) return;
int_bypass->set_params_stats(core_params.execu_int_bypass_ports,
core_stats.ALU_cdb_duty_cycle,
core_stats.cdb_alu_accesses);
intTagBypass->set_params_stats(core_params.execu_int_bypass_ports,
core_stats.ALU_cdb_duty_cycle,
core_stats.cdb_alu_accesses);
if (core_params.num_muls > 0) {
int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports,
core_stats.MUL_cdb_duty_cycle,
core_stats.cdb_mul_accesses);
intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports,
core_stats.MUL_cdb_duty_cycle,
core_stats.cdb_mul_accesses);
}
if (core_params.num_fpus > 0) {
fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports,
core_stats.FPU_cdb_duty_cycle,
core_stats.cdb_fpu_accesses);
fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports,
core_stats.FPU_cdb_duty_cycle,
core_stats.cdb_fpu_accesses);
}
McPATComponent::computeEnergy();
if (rfu) {
rfu->computeEnergy();
output_data += rfu->output_data;
}
if (scheu) {
scheu->computeEnergy();
output_data += scheu->output_data;
}
if (fp_u) {
fp_u->computeEnergy();
output_data += fp_u->output_data;
}
if (exeu) {
exeu->computeEnergy();
output_data += exeu->output_data;
}
if (mul) {
mul->computeEnergy();
output_data += mul->output_data;
}
}
void EXECU::displayData(uint32_t indent, int plevel) {
if (!exist) return;
McPATComponent::displayData(indent, plevel);
rfu->displayData(indent + 4, plevel);
if (scheu) {
scheu->displayData(indent + 4, plevel);
}
exeu->displayData(indent + 4, plevel);
if (core_params.num_fpus > 0) {
fp_u->displayData(indent + 4, plevel);
}
if (core_params.num_muls > 0) {
mul->displayData(indent + 4, plevel);
}
}
void Core::computeEnergy() {
ifu->computeEnergy();
lsu->computeEnergy();
mmu->computeEnergy();
exu->computeEnergy();
if (core_params.core_ty == OOO) {
rnu->computeEnergy();
}
output_data.reset();
if (ifu) {
output_data += ifu->output_data;
}
if (lsu) {
output_data += lsu->output_data;
}
if (mmu) {
output_data += mmu->output_data;
}
if (exu) {
output_data += exu->output_data;
}
if (rnu) {
output_data += rnu->output_data;
}
if (corepipe) {
output_data += corepipe->output_data;
}
if (undiffCore) {
output_data += undiffCore->output_data;
}
if (l2cache) {
output_data += l2cache->output_data;
}
}
InstFetchU ::~InstFetchU() {
if (!exist) return;
if (IB) {
delete IB;
IB = NULL;
}
if (ID_inst) {
delete ID_inst;
ID_inst = NULL;
}
if (ID_operand) {
delete ID_operand;
ID_operand = NULL;
}
if (ID_misc) {
delete ID_misc;
ID_misc = NULL;
}
if (core_params.predictionW > 0) {
if (BTB) {
delete BTB;
BTB = NULL;
}
if (BPT) {
delete BPT;
BPT = NULL;
}
}
if (icache) {
delete icache;
}
}
BranchPredictor ::~BranchPredictor() {
if (!exist) return;
if (globalBPT) {
delete globalBPT;
globalBPT = NULL;
}
if (localBPT) {
delete localBPT;
localBPT = NULL;
}
if (L1_localBPT) {
delete L1_localBPT;
L1_localBPT = NULL;
}
if (L2_localBPT) {
delete L2_localBPT;
L2_localBPT = NULL;
}
if (chooser) {
delete chooser;
chooser = NULL;
}
if (RAS) {
delete RAS;
RAS = NULL;
}
}
RENAMINGU ::~RENAMINGU() {
if (!exist) return;
if (iFRAT) {
delete iFRAT;
iFRAT = NULL;
}
if (fFRAT) {
delete fFRAT;
fFRAT = NULL;
}
if (iRRAT) {
delete iRRAT;
iRRAT = NULL;
}
if (iFRAT) {
delete iFRAT;
iFRAT = NULL;
}
if (ifreeL) {
delete ifreeL;
ifreeL = NULL;
}
if (ffreeL) {
delete ffreeL;
ffreeL = NULL;
}
if (idcl) {
delete idcl;
idcl = NULL;
}
if (fdcl) {
delete fdcl;
fdcl = NULL;
}
if (RAHT) {
delete RAHT;
RAHT = NULL;
}
}
LoadStoreU ::~LoadStoreU() {
if (!exist) return;
if (LSQ) {
delete LSQ;
LSQ = NULL;
}
if (dcache) {
delete dcache;
dcache = NULL;
}
}
MemManU ::~MemManU() {
if (!exist) return;
if (itlb) {
delete itlb;
itlb = NULL;
}
if (dtlb) {
delete dtlb;
dtlb = NULL;
}
}
RegFU ::~RegFU() {
if (!exist) return;
if (IRF) {
delete IRF;
IRF = NULL;
}
if (FRF) {
delete FRF;
FRF = NULL;
}
if (RFWIN) {
delete RFWIN;
RFWIN = NULL;
}
}
SchedulerU ::~SchedulerU() {
if (!exist) return;
if (int_inst_window) {
delete int_inst_window;
int_inst_window = NULL;
}
if (fp_inst_window) {
delete int_inst_window;
int_inst_window = NULL;
}
if (ROB) {
delete ROB;
ROB = NULL;
}
if (int_instruction_selection) {
delete int_instruction_selection;
int_instruction_selection = NULL;
}
if (fp_instruction_selection) {
delete fp_instruction_selection;
fp_instruction_selection = NULL;
}
}
EXECU ::~EXECU() {
if (!exist) return;
if (int_bypass) {
delete int_bypass;
int_bypass = NULL;
}
if (intTagBypass) {
delete intTagBypass;
intTagBypass = NULL;
}
if (int_mul_bypass) {
delete int_mul_bypass;
int_mul_bypass = NULL;
}
if (intTag_mul_Bypass) {
delete intTag_mul_Bypass;
intTag_mul_Bypass = NULL;
}
if (fp_bypass) {
delete fp_bypass;
fp_bypass = NULL;
}
if (fpTagBypass) {
delete fpTagBypass;
fpTagBypass = NULL;
}
if (fp_u) {
delete fp_u;
fp_u = NULL;
}
if (exeu) {
delete exeu;
exeu = NULL;
}
if (mul) {
delete mul;
mul = NULL;
}
if (rfu) {
delete rfu;
rfu = NULL;
}
if (scheu) {
delete scheu;
scheu = NULL;
}
}
Core::~Core() {
if (ifu) {
delete ifu;
ifu = NULL;
}
if (lsu) {
delete lsu;
lsu = NULL;
}
if (rnu) {
delete rnu;
rnu = NULL;
}
if (mmu) {
delete mmu;
mmu = NULL;
}
if (exu) {
delete exu;
exu = NULL;
}
if (corepipe) {
delete corepipe;
corepipe = NULL;
}
if (undiffCore) {
delete undiffCore;
undiffCore = NULL;
}
if (l2cache) {
delete l2cache;
l2cache = NULL;
}
}
void Core::initialize_params() {
memset(&core_params, 0, sizeof(CoreParameters));
core_params.peak_issueW = -1;
core_params.peak_commitW = -1;
}
void Core::initialize_stats() {
memset(&core_stats, 0, sizeof(CoreStatistics));
core_stats.IFU_duty_cycle = 1.0;
core_stats.ALU_duty_cycle = 1.0;
core_stats.FPU_duty_cycle = 1.0;
core_stats.MUL_duty_cycle = 1.0;
core_stats.ALU_cdb_duty_cycle = 1.0;
core_stats.FPU_cdb_duty_cycle = 1.0;
core_stats.MUL_cdb_duty_cycle = 1.0;
core_stats.pipeline_duty_cycle = 1.0;
core_stats.IFU_duty_cycle = 1.0;
core_stats.LSU_duty_cycle = 1.0;
core_stats.MemManU_D_duty_cycle = 1.0;
core_stats.MemManU_I_duty_cycle = 1.0;
}
void Core::set_core_param() {
initialize_params();
initialize_stats();
int num_children = xml_data->nChildNode("param");
int i;
for (i = 0; i < num_children; i++) {
XMLNode* paramNode = xml_data->getChildNodePtr("param", &i);
XMLCSTR node_name = paramNode->getAttribute("name");
XMLCSTR value = paramNode->getAttribute("value");
if (!node_name)
warnMissingParamName(paramNode->getAttribute("id"));
ASSIGN_STR_IF("name", name);
ASSIGN_INT_IF("opt_local", core_params.opt_local);
ASSIGN_FP_IF("clock_rate", core_params.clockRate);
ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
ASSIGN_INT_IF("opcode_width", core_params.opcode_width);
ASSIGN_INT_IF("x86", core_params.x86);
ASSIGN_INT_IF("Embedded", core_params.Embedded);
ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type);
ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length);
ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads);
ASSIGN_INT_IF("fetch_width", core_params.fetchW);
ASSIGN_INT_IF("decode_width", core_params.decodeW);
ASSIGN_INT_IF("issue_width", core_params.issueW);
ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW);
ASSIGN_INT_IF("commit_width", core_params.commitW);
ASSIGN_INT_IF("prediction_width", core_params.predictionW);
ASSIGN_INT_IF("ALU_per_core", core_params.num_alus);
ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus);
ASSIGN_INT_IF("MUL_per_core", core_params.num_muls);
ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW);
ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty,
Scheduler_type);
ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type);
ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size);
ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size);
ASSIGN_INT_IF("ROB_size", core_params.ROB_size);
ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc);
ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks);
ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width);
ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc);
ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks);
ASSIGN_INT_IF("register_window_size",
core_params.register_window_size);
ASSIGN_INT_IF("register_window_throughput",
core_params.register_window_throughput);
ASSIGN_INT_IF("register_window_latency",
core_params.register_window_latency);
ASSIGN_INT_IF("register_window_assoc",
core_params.register_window_assoc);
ASSIGN_INT_IF("register_window_nbanks",
core_params.register_window_nbanks);
ASSIGN_INT_IF("register_window_tag_width",
core_params.register_window_tag_width);
ASSIGN_INT_IF("register_window_rw_ports",
core_params.register_window_rw_ports);
ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size);
ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc);
ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks);
ASSIGN_INT_IF("phy_Regs_IRF_tag_width",
core_params.phy_Regs_IRF_tag_width);
ASSIGN_INT_IF("phy_Regs_IRF_rd_ports",
core_params.phy_Regs_IRF_rd_ports);
ASSIGN_INT_IF("phy_Regs_IRF_wr_ports",
core_params.phy_Regs_IRF_wr_ports);
ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size);
ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc);
ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks);
ASSIGN_INT_IF("phy_Regs_FRF_tag_width",
core_params.phy_Regs_FRF_tag_width);
ASSIGN_INT_IF("phy_Regs_FRF_rd_ports",
core_params.phy_Regs_FRF_rd_ports);
ASSIGN_INT_IF("phy_Regs_FRF_wr_ports",
core_params.phy_Regs_FRF_wr_ports);
ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks);
ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports);
ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks);
ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports);
ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks);
ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports);
ASSIGN_INT_IF("memory_ports", core_params.memory_ports);
ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size);
ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc);
ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks);
ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size);
ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc);
ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks);
ASSIGN_INT_IF("instruction_window_size",
core_params.instruction_window_size);
ASSIGN_INT_IF("fp_instruction_window_size",
core_params.fp_instruction_window_size);
ASSIGN_INT_IF("instruction_buffer_size",
core_params.instruction_buffer_size);
ASSIGN_INT_IF("instruction_buffer_assoc",
core_params.instruction_buffer_assoc);
ASSIGN_INT_IF("instruction_buffer_nbanks",
core_params.instruction_buffer_nbanks);
ASSIGN_INT_IF("instruction_buffer_tag_width",
core_params.instruction_buffer_tag_width);
ASSIGN_INT_IF("number_instruction_fetch_ports",
core_params.number_instruction_fetch_ports);
ASSIGN_INT_IF("RAS_size", core_params.RAS_size);
ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt,
Wire_type);
ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type);
ASSIGN_INT_IF("execu_int_bypass_ports",
core_params.execu_int_bypass_ports);
ASSIGN_INT_IF("execu_mul_bypass_ports",
core_params.execu_mul_bypass_ports);
ASSIGN_INT_IF("execu_fp_bypass_ports",
core_params.execu_fp_bypass_ports);
ASSIGN_ENUM_IF("execu_bypass_wire_type",
core_params.execu_bypass_wire_type, Wire_type);
ASSIGN_FP_IF("execu_bypass_base_width",
core_params.execu_bypass_base_width);
ASSIGN_FP_IF("execu_bypass_base_height",
core_params.execu_bypass_base_height);
ASSIGN_INT_IF("execu_bypass_start_wiring_level",
core_params.execu_bypass_start_wiring_level);
ASSIGN_FP_IF("execu_bypass_route_over_perc",
core_params.execu_bypass_route_over_perc);
ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator);
ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages);
ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages);
ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines);
ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines);
ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint);
ASSIGN_INT_IF("perThreadState", core_params.perThreadState);
ASSIGN_INT_IF("instruction_length", core_params.instruction_length);
else {
warnUnrecognizedParam(node_name);
}
}
// Change from MHz to Hz
core_params.clockRate *= 1e6;
clockRate = core_params.clockRate;
core_params.peak_commitW = core_params.peak_issueW;
core_params.fp_decodeW = core_params.fp_issueW;
num_children = xml_data->nChildNode("stat");
for (i = 0; i < num_children; i++) {
XMLNode* statNode = xml_data->getChildNodePtr("stat", &i);
XMLCSTR node_name = statNode->getAttribute("name");
XMLCSTR value = statNode->getAttribute("value");
if (!node_name)
warnMissingStatName(statNode->getAttribute("id"));
ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle);
ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle);
ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle);
ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle);
ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle);
ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle);
ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle);
ASSIGN_FP_IF("total_cycles", core_stats.total_cycles);
ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles);
ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles);
ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle);
ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle);
ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle);
ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle);
ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle);
ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses);
ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses);
ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses);
ASSIGN_FP_IF("function_calls", core_stats.function_calls);
ASSIGN_FP_IF("total_instructions", core_stats.total_instructions);
ASSIGN_FP_IF("int_instructions", core_stats.int_instructions);
ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions);
ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions);
ASSIGN_FP_IF("branch_mispredictions",
core_stats.branch_mispredictions);
ASSIGN_FP_IF("load_instructions", core_stats.load_instructions);
ASSIGN_FP_IF("store_instructions", core_stats.store_instructions);
ASSIGN_FP_IF("committed_instructions",
core_stats.committed_instructions);
ASSIGN_FP_IF("committed_int_instructions",
core_stats.committed_int_instructions);
ASSIGN_FP_IF("committed_fp_instructions",
core_stats.committed_fp_instructions);
ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads);
ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes);
ASSIGN_FP_IF("rename_reads", core_stats.rename_reads);
ASSIGN_FP_IF("rename_writes", core_stats.rename_writes);
ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads);
ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes);
ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads);
ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes);
ASSIGN_FP_IF("inst_window_wakeup_accesses",
core_stats.inst_window_wakeup_accesses);
ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads);
ASSIGN_FP_IF("fp_inst_window_writes",
core_stats.fp_inst_window_writes);
ASSIGN_FP_IF("fp_inst_window_wakeup_accesses",
core_stats.fp_inst_window_wakeup_accesses);
ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads);
ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads);
ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes);
ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes);
ASSIGN_FP_IF("context_switches", core_stats.context_switches);
ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses);
ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses);
ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses);
else {
warnUnrecognizedStat(node_name);
}
}
// Initialize a few variables
core_params.multithreaded = core_params.num_hthreads > 1 ? true : false;
core_params.pc_width = virtual_address_width;
core_params.v_address_width = virtual_address_width;
core_params.p_address_width = physical_address_width;
core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32;
core_params.fp_data_width = core_params.int_data_width;
core_params.arch_ireg_width =
int(ceil(log2(core_params.archi_Regs_IRF_size)));
core_params.arch_freg_width
= int(ceil(log2(core_params.archi_Regs_FRF_size)));
core_params.num_IRF_entry = core_params.archi_Regs_IRF_size;
core_params.num_FRF_entry = core_params.archi_Regs_FRF_size;
if (core_params.instruction_length <= 0) {
errorNonPositiveParam("instruction_length");
}
if (core_params.num_hthreads <= 0) {
errorNonPositiveParam("number_hardware_threads");
}
if (core_params.opcode_width <= 0) {
errorNonPositiveParam("opcode_width");
}
if (core_params.instruction_buffer_size <= 0) {
errorNonPositiveParam("instruction_buffer_size");
}
if (core_params.number_instruction_fetch_ports <= 0) {
errorNonPositiveParam("number_instruction_fetch_ports");
}
if (core_params.peak_issueW <= 0) {
errorNonPositiveParam("peak_issue_width");
} else {
assert(core_params.peak_commitW > 0);
}
if (core_params.core_ty == OOO) {
if (core_params.scheu_ty == PhysicalRegFile) {
core_params.phy_ireg_width =
int(ceil(log2(core_params.phy_Regs_IRF_size)));
core_params.phy_freg_width =
int(ceil(log2(core_params.phy_Regs_FRF_size)));
core_params.num_ifreelist_entries =
core_params.num_IRF_entry = core_params.phy_Regs_IRF_size;
core_params.num_ffreelist_entries =
core_params.num_FRF_entry = core_params.phy_Regs_FRF_size;
} else if (core_params.scheu_ty == ReservationStation) {
core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size)));
core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size)));
core_params.num_ifreelist_entries = core_params.ROB_size;
core_params.num_ffreelist_entries = core_params.ROB_size;
}
}
core_params.regWindowing =
(core_params.register_window_size > 0 &&
core_params.core_ty == Inorder) ? true : false;
if (core_params.regWindowing) {
if (core_params.register_window_throughput <= 0) {
errorNonPositiveParam("register_window_throughput");
} else if (core_params.register_window_latency <= 0) {
errorNonPositiveParam("register_window_latency");
}
}
set_pppm(core_params.pppm_lkg_multhread, 0, core_params.num_hthreads,
core_params.num_hthreads, 0);
if (!((core_params.core_ty == OOO) || (core_params.core_ty == Inorder))) {
cout << "Invalid Core Type" << endl;
exit(0);
}
if (!((core_params.scheu_ty == PhysicalRegFile) ||
(core_params.scheu_ty == ReservationStation))) {
cout << "Invalid OOO Scheduler Type" << endl;
exit(0);
}
if (!((core_params.rm_ty == RAMbased) ||
(core_params.rm_ty == CAMbased))) {
cout << "Invalid OOO Renaming Type" << endl;
exit(0);
}
}