gem5/ext/mcpat/cacti/decoder.cc
Yasuko Eckert 0deef376d9 ext: McPAT interface changes and fixes
This patch includes software engineering changes and some generic bug fixes
Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known
issues/concernts we did not have a chance to address in this patch.

High-level changes in this patch include:
 1) Making XML parsing modular and hierarchical:
   - Shift parsing responsibility into the components
   - Read XML in a (mostly) context-free recursive manner so that McPAT input
     files can contain arbitrary component hierarchies
 2) Making power, energy, and area calculations a hierarchical and recursive
    process
   - Components track their subcomponents and recursively call compute
     functions in stages
   - Make C++ object hierarchy reflect inheritance of classes of components
     with similar structures
   - Simplify computeArea() and computeEnergy() functions to eliminate
     successive calls to calculate separate TDP vs. runtime energy
   - Remove Processor component (now unnecessary) and introduce a more abstract
     System component
 3) Standardizing McPAT output across all components
   - Use a single, common data structure for storing and printing McPAT output
   - Recursively call print functions through component hierarchy
 4) For caches, allow splitting data array and tag array reads and writes for
    better accuracy
 5) Improving the usability of CACTI by printing more helpful warning and error
    messages
 6) Minor: Impose more rigorous code style for clarity (more work still to be
    done)
Overall, these changes greatly reduce the amount of replicated code, and they
improve McPAT runtime and decrease memory footprint.
2014-06-03 13:32:59 -07:00

1499 lines
65 KiB
C++

/*****************************************************************************
* McPAT/CACTI
* SOFTWARE LICENSE AGREEMENT
* Copyright 2012 Hewlett-Packard Development Company, L.P.
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
* All Rights Reserved
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
***************************************************************************/
#include <cassert>
#include <cmath>
#include <iostream>
#include "area.h"
#include "decoder.h"
#include "parameter.h"
using namespace std;
Decoder::Decoder(
int _num_dec_signals,
bool flag_way_select,
double _C_ld_dec_out,
double _R_wire_dec_out,
bool fully_assoc_,
bool is_dram_,
bool is_wl_tr_,
const Area & cell_)
: exist(false),
C_ld_dec_out(_C_ld_dec_out),
R_wire_dec_out(_R_wire_dec_out),
num_gates(0), num_gates_min(2),
delay(0),
//power(),
fully_assoc(fully_assoc_), is_dram(is_dram_),
is_wl_tr(is_wl_tr_), cell(cell_) {
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
w_dec_n[i] = 0;
w_dec_p[i] = 0;
}
/*
* _num_dec_signals is the number of decoded signal as output
* num_addr_bits_dec is the number of signal to be decoded
* as the decoders input.
*/
int num_addr_bits_dec = _log2(_num_dec_signals);
if (num_addr_bits_dec < 4) {
if (flag_way_select) {
exist = true;
num_in_signals = 2;
} else {
num_in_signals = 0;
}
} else {
exist = true;
if (flag_way_select) {
num_in_signals = 3;
} else {
num_in_signals = 2;
}
}
assert(cell.h > 0);
assert(cell.w > 0);
// the height of a row-decoder-driver cell is fixed to be 4 * cell.h;
//area.h = 4 * cell.h;
area.h = g_tp.h_dec * cell.h;
compute_widths();
compute_area();
}
void Decoder::compute_widths() {
double F;
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr);
double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
if (exist) {
if (num_in_signals == 2 || fully_assoc) {
w_dec_n[0] = 2 * g_tp.min_w_nmos_;
w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand2;
} else {
w_dec_n[0] = 3 * g_tp.min_w_nmos_;
w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand3;
}
F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) +
gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr));
num_gates = logical_effort(
num_gates_min,
num_in_signals == 2 ? gnand2 : gnand3,
F,
w_dec_n,
w_dec_p,
C_ld_dec_out,
p_to_n_sz_ratio,
is_dram,
is_wl_tr,
g_tp.max_w_nmos_dec);
}
}
void Decoder::compute_area() {
double cumulative_area = 0;
double cumulative_curr = 0; // cumulative leakage current
double cumulative_curr_Ig = 0; // cumulative leakage current
if (exist) { // First check if this decoder exists
if (num_in_signals == 2) {
cumulative_area =
compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h);
cumulative_curr =
cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
cumulative_curr_Ig =
cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram);
} else if (num_in_signals == 3) {
cumulative_area =
compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h);
cumulative_curr =
cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
cumulative_curr_Ig =
cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
}
for (int i = 1; i < num_gates; i++) {
cumulative_area +=
compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h);
cumulative_curr +=
cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
cumulative_curr_Ig =
cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
}
power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
area.w = (cumulative_area / area.h);
}
}
double Decoder::compute_delays(double inrisetime) {
if (exist) {
double ret_val = 0; // outrisetime
int i;
double rd, tf, this_delay, c_load, c_intrinsic, Vpp;
double Vdd = g_tp.peri_global.Vdd;
if ((is_wl_tr) && (is_dram)) {
Vpp = g_tp.vpp;
} else if (is_wl_tr) {
Vpp = g_tp.sram_cell.Vdd;
} else {
Vpp = g_tp.peri_global.Vdd;
}
// first check whether a decoder is required at all
rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr);
c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr);
c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals +
drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
inrisetime = this_delay / (1.0 - 0.5);
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
for (i = 1; i < num_gates - 1; ++i) {
rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr);
c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
inrisetime = this_delay / (1.0 - 0.5);
power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
}
// add delay of final inverter that drives the wordline
i = num_gates - 1;
c_load = C_ld_dec_out;
rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr);
c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) +
drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr);
tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2;
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
ret_val = this_delay / (1.0 - 0.5);
power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd;
return ret_val;
} else {
return 0.0;
}
}
void Decoder::leakage_feedback(double temperature)
{
double cumulative_curr = 0; // cumulative leakage current
double cumulative_curr_Ig = 0; // cumulative leakage current
if (exist)
{ // First check if this decoder exists
if (num_in_signals == 2)
{
cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram);
}
else if (num_in_signals == 3)
{
cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);;
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);
}
for (int i = 1; i < num_gates; i++)
{
cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram);
}
power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd;
power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd;
}
}
PredecBlk::PredecBlk(
int num_dec_signals,
Decoder * dec_,
double C_wire_predec_blk_out,
double R_wire_predec_blk_out_,
int num_dec_per_predec,
bool is_dram,
bool is_blk1)
: dec(dec_),
exist(false),
number_input_addr_bits(0),
C_ld_predec_blk_out(0),
R_wire_predec_blk_out(0),
branch_effort_nand2_gate_output(1),
branch_effort_nand3_gate_output(1),
flag_two_unique_paths(false),
flag_L2_gate(0),
number_inputs_L1_gate(0),
number_gates_L1_nand2_path(0),
number_gates_L1_nand3_path(0),
number_gates_L2(0),
min_number_gates_L1(2),
min_number_gates_L2(2),
num_L1_active_nand2_path(0),
num_L1_active_nand3_path(0),
delay_nand2_path(0),
delay_nand3_path(0),
power_nand2_path(),
power_nand3_path(),
power_L2(),
is_dram_(is_dram) {
int branch_effort_predec_out;
double C_ld_dec_gate;
int num_addr_bits_dec = _log2(num_dec_signals);
int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2;
int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits;
w_L1_nand2_n[0] = 0;
w_L1_nand2_p[0] = 0;
w_L1_nand3_n[0] = 0;
w_L1_nand3_p[0] = 0;
if (is_blk1 == true) {
if (num_addr_bits_dec <= 0) {
return;
} else if (num_addr_bits_dec < 4) {
// Just one predecoder block is required with NAND2 gates. No decoder required.
// The first level of predecoding directly drives the decoder output load
exist = true;
number_input_addr_bits = num_addr_bits_dec;
R_wire_predec_blk_out = dec->R_wire_dec_out;
C_ld_predec_blk_out = dec->C_ld_dec_out;
} else {
exist = true;
number_input_addr_bits = blk1_num_input_addr_bits;
branch_effort_predec_out = (1 << blk2_num_input_addr_bits);
C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
R_wire_predec_blk_out = R_wire_predec_blk_out_;
C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
}
} else {
if (num_addr_bits_dec >= 4) {
exist = true;
number_input_addr_bits = blk2_num_input_addr_bits;
branch_effort_predec_out = (1 << blk1_num_input_addr_bits);
C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false);
R_wire_predec_blk_out = R_wire_predec_blk_out_;
C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out;
}
}
compute_widths();
compute_area();
}
void PredecBlk::compute_widths() {
double F, c_load_nand3_path, c_load_nand2_path;
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio);
if (exist == false) return;
switch (number_input_addr_bits) {
case 1:
flag_two_unique_paths = false;
number_inputs_L1_gate = 2;
flag_L2_gate = 0;
break;
case 2:
flag_two_unique_paths = false;
number_inputs_L1_gate = 2;
flag_L2_gate = 0;
break;
case 3:
flag_two_unique_paths = false;
number_inputs_L1_gate = 3;
flag_L2_gate = 0;
break;
case 4:
flag_two_unique_paths = false;
number_inputs_L1_gate = 2;
flag_L2_gate = 2;
branch_effort_nand2_gate_output = 4;
break;
case 5:
flag_two_unique_paths = true;
flag_L2_gate = 2;
branch_effort_nand2_gate_output = 8;
branch_effort_nand3_gate_output = 4;
break;
case 6:
flag_two_unique_paths = false;
number_inputs_L1_gate = 3;
flag_L2_gate = 2;
branch_effort_nand3_gate_output = 8;
break;
case 7:
flag_two_unique_paths = true;
flag_L2_gate = 3;
branch_effort_nand2_gate_output = 32;
branch_effort_nand3_gate_output = 16;
break;
case 8:
flag_two_unique_paths = true;
flag_L2_gate = 3;
branch_effort_nand2_gate_output = 64;
branch_effort_nand3_gate_output = 32;
break;
case 9:
flag_two_unique_paths = false;
number_inputs_L1_gate = 3;
flag_L2_gate = 3;
branch_effort_nand3_gate_output = 64;
break;
default:
assert(0);
break;
}
// find the number of gates and sizing in second level of predecoder (if there is a second level)
if (flag_L2_gate) {
if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate
w_L2_n[0] = 2 * g_tp.min_w_nmos_;
F = gnand2;
} else { // 2nd level is a NAND3 gate
w_L2_n[0] = 3 * g_tp.min_w_nmos_;
F = gnand3;
}
w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_));
number_gates_L2 = logical_effort(
min_number_gates_L2,
flag_L2_gate == 2 ? gnand2 : gnand3,
F,
w_L2_n,
w_L2_p,
C_ld_predec_blk_out,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
// Now find the number of gates and widths in first level of predecoder
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
// Whenever flag_two_unique_paths is true, it means first level of
// decoder employs
// both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2,
// it means
// a NAND2 gate is used in the first level of the predecoder
c_load_nand2_path = branch_effort_nand2_gate_output *
(gate_C(w_L2_n[0], 0, is_dram_) +
gate_C(w_L2_p[0], 0, is_dram_));
w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand2 * c_load_nand2_path /
(gate_C(w_L1_nand2_n[0], 0, is_dram_) +
gate_C(w_L1_nand2_p[0], 0, is_dram_));
number_gates_L1_nand2_path = logical_effort(
min_number_gates_L1,
gnand2,
F,
w_L1_nand2_n,
w_L1_nand2_p,
c_load_nand2_path,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
}
//Now find widths of gates along path in which first gate is a NAND3
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs
// both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means
// a NAND3 gate is used in the first level of the predecoder
c_load_nand3_path = branch_effort_nand3_gate_output *
(gate_C(w_L2_n[0], 0, is_dram_) +
gate_C(w_L2_p[0], 0, is_dram_));
w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand3 * c_load_nand3_path /
(gate_C(w_L1_nand3_n[0], 0, is_dram_) +
gate_C(w_L1_nand3_p[0], 0, is_dram_));
number_gates_L1_nand3_path = logical_effort(
min_number_gates_L1,
gnand3,
F,
w_L1_nand3_n,
w_L1_nand3_p,
c_load_nand3_path,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
}
} else { // find number of gates and widths in first level of predecoder block when there is no second level
if (number_inputs_L1_gate == 2) {
w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_;
w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand2 * C_ld_predec_blk_out /
(gate_C(w_L1_nand2_n[0], 0, is_dram_) +
gate_C(w_L1_nand2_p[0], 0, is_dram_));
number_gates_L1_nand2_path = logical_effort(
min_number_gates_L1,
gnand2,
F,
w_L1_nand2_n,
w_L1_nand2_p,
C_ld_predec_blk_out,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
} else if (number_inputs_L1_gate == 3) {
w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_;
w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
F = gnand3 * C_ld_predec_blk_out /
(gate_C(w_L1_nand3_n[0], 0, is_dram_) +
gate_C(w_L1_nand3_p[0], 0, is_dram_));
number_gates_L1_nand3_path = logical_effort(
min_number_gates_L1,
gnand3,
F,
w_L1_nand3_n,
w_L1_nand3_p,
C_ld_predec_blk_out,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
}
}
}
void PredecBlk::compute_area() {
if (exist) { // First check whether a predecoder block is needed
int num_L1_nand2 = 0;
int num_L1_nand3 = 0;
int num_L2 = 0;
double tot_area_L1_nand3 = 0;
double leak_L1_nand3 = 0;
double gate_leak_L1_nand3 = 0;
double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def);
double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
if (number_inputs_L1_gate != 3) {
tot_area_L1_nand3 = 0;
leak_L1_nand3 = 0;
gate_leak_L1_nand3 = 0;
} else {
tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def);
leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
}
switch (number_input_addr_bits) {
case 1: //2 NAND2 gates
num_L1_nand2 = 2;
num_L2 = 0;
num_L1_active_nand2_path = 1;
num_L1_active_nand3_path = 0;
break;
case 2: //4 NAND2 gates
num_L1_nand2 = 4;
num_L2 = 0;
num_L1_active_nand2_path = 1;
num_L1_active_nand3_path = 0;
break;
case 3: //8 NAND3 gates
num_L1_nand3 = 8;
num_L2 = 0;
num_L1_active_nand2_path = 0;
num_L1_active_nand3_path = 1;
break;
case 4: //4 + 4 NAND2 gates
num_L1_nand2 = 8;
num_L2 = 16;
num_L1_active_nand2_path = 2;
num_L1_active_nand3_path = 0;
break;
case 5: //4 NAND2 gates, 8 NAND3 gates
num_L1_nand2 = 4;
num_L1_nand3 = 8;
num_L2 = 32;
num_L1_active_nand2_path = 1;
num_L1_active_nand3_path = 1;
break;
case 6: //8 + 8 NAND3 gates
num_L1_nand3 = 16;
num_L2 = 64;
num_L1_active_nand2_path = 0;
num_L1_active_nand3_path = 2;
break;
case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
num_L1_nand2 = 8;
num_L1_nand3 = 8;
num_L2 = 128;
num_L1_active_nand2_path = 2;
num_L1_active_nand3_path = 1;
break;
case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
num_L1_nand2 = 4;
num_L1_nand3 = 16;
num_L2 = 256;
num_L1_active_nand2_path = 2;
num_L1_active_nand3_path = 2;
break;
case 9: //8 + 8 + 8 NAND3 gates
num_L1_nand3 = 24;
num_L2 = 512;
num_L1_active_nand2_path = 0;
num_L1_active_nand3_path = 3;
break;
default:
break;
}
for (int i = 1; i < number_gates_L1_nand2_path; ++i) {
tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def);
leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
}
tot_area_L1_nand2 *= num_L1_nand2;
leak_L1_nand2 *= num_L1_nand2;
gate_leak_L1_nand2 *= num_L1_nand2;
for (int i = 1; i < number_gates_L1_nand3_path; ++i) {
tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def);
leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
}
tot_area_L1_nand3 *= num_L1_nand3;
leak_L1_nand3 *= num_L1_nand3;
gate_leak_L1_nand3 *= num_L1_nand3;
double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3;
double cumulative_area_L2 = 0.0;
double leakage_L2 = 0.0;
double gate_leakage_L2 = 0.0;
if (flag_L2_gate == 2) {
cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
} else if (flag_L2_gate == 3) {
cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def);
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
}
for (int i = 1; i < number_gates_L2; ++i) {
cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def);
leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
}
cumulative_area_L2 *= num_L2;
leakage_L2 *= num_L2;
gate_leakage_L2 *= num_L2;
power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
area.set_area(cumulative_area_L1 + cumulative_area_L2);
power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
}
}
pair<double, double> PredecBlk::compute_delays(
pair<double, double> inrisetime) { // <nand2, nand3>
pair<double, double> ret_val;
ret_val.first = 0; // outrisetime_nand2_path
ret_val.second = 0; // outrisetime_nand3_path
double inrisetime_nand2_path = inrisetime.first;
double inrisetime_nand3_path = inrisetime.second;
int i;
double rd, c_load, c_intrinsic, tf, this_delay;
double Vdd = g_tp.peri_global.Vdd;
// TODO: following delay calculation part can be greatly simplified.
// first check whether a predecoder block is required
if (exist) {
//Find delay in first level of predecoder block
//First find delay in path
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) {
//First gate is a NAND2 gate
rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_);
c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_);
c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd;
//Add delays of all but the last inverter in the chain
for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) {
rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
//Add delay of the last inverter
i = number_gates_L1_nand2_path - 1;
rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_);
if (flag_L2_gate) {
c_load = branch_effort_nand2_gate_output *
(gate_C(w_L2_n[0], 0, is_dram_) +
gate_C(w_L2_p[0], 0, is_dram_));
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
} else { //First level directly drives decoder output load
c_load = C_ld_predec_blk_out;
c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
ret_val.first = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
}
if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) {
//Check if the number of gates in the first level is more than 1.
//First gate is a NAND3 gate
rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_);
c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_);
c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
//Add delays of all but the last inverter in the chain
for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) {
rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
//Add delay of the last inverter
i = number_gates_L1_nand3_path - 1;
rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_);
if (flag_L2_gate) {
c_load = branch_effort_nand3_gate_output *
(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0,
is_dram_));
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
} else { //First level directly drives decoder output load
c_load = C_ld_predec_blk_out;
c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
ret_val.second = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
}
// Find delay through second level
if (flag_L2_gate) {
if (flag_L2_gate == 2) {
rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_);
c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
} else { // flag_L2_gate = 3
rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_);
c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_);
c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
for (i = 1; i < number_gates_L2 - 1; ++i) {
rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
//Add delay of final inverter that drives the wordline decoders
i = number_gates_L2 - 1;
c_load = C_ld_predec_blk_out;
rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_);
c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2;
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
ret_val.first = this_delay / (1.0 - 0.5);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
ret_val.second = this_delay / (1.0 - 0.5);
power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd;
}
}
delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second;
return ret_val;
}
void PredecBlk::leakage_feedback(double temperature)
{
if (exist)
{ // First check whether a predecoder block is needed
int num_L1_nand2 = 0;
int num_L1_nand3 = 0;
int num_L2 = 0;
double leak_L1_nand3 =0;
double gate_leak_L1_nand3 =0;
double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_);
if (number_inputs_L1_gate != 3) {
leak_L1_nand3 = 0;
gate_leak_L1_nand3 =0;
}
else {
leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand);
}
switch (number_input_addr_bits)
{
case 1: //2 NAND2 gates
num_L1_nand2 = 2;
num_L2 = 0;
num_L1_active_nand2_path =1;
num_L1_active_nand3_path =0;
break;
case 2: //4 NAND2 gates
num_L1_nand2 = 4;
num_L2 = 0;
num_L1_active_nand2_path =1;
num_L1_active_nand3_path =0;
break;
case 3: //8 NAND3 gates
num_L1_nand3 = 8;
num_L2 = 0;
num_L1_active_nand2_path =0;
num_L1_active_nand3_path =1;
break;
case 4: //4 + 4 NAND2 gates
num_L1_nand2 = 8;
num_L2 = 16;
num_L1_active_nand2_path =2;
num_L1_active_nand3_path =0;
break;
case 5: //4 NAND2 gates, 8 NAND3 gates
num_L1_nand2 = 4;
num_L1_nand3 = 8;
num_L2 = 32;
num_L1_active_nand2_path =1;
num_L1_active_nand3_path =1;
break;
case 6: //8 + 8 NAND3 gates
num_L1_nand3 = 16;
num_L2 = 64;
num_L1_active_nand2_path =0;
num_L1_active_nand3_path =2;
break;
case 7: //4 + 4 NAND2 gates, 8 NAND3 gates
num_L1_nand2 = 8;
num_L1_nand3 = 8;
num_L2 = 128;
num_L1_active_nand2_path =2;
num_L1_active_nand3_path =1;
break;
case 8: //4 NAND2 gates, 8 + 8 NAND3 gates
num_L1_nand2 = 4;
num_L1_nand3 = 16;
num_L2 = 256;
num_L1_active_nand2_path =2;
num_L1_active_nand3_path =2;
break;
case 9: //8 + 8 + 8 NAND3 gates
num_L1_nand3 = 24;
num_L2 = 512;
num_L1_active_nand2_path =0;
num_L1_active_nand3_path =3;
break;
default:
break;
}
for (int i = 1; i < number_gates_L1_nand2_path; ++i)
{
leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_);
}
leak_L1_nand2 *= num_L1_nand2;
gate_leak_L1_nand2 *= num_L1_nand2;
for (int i = 1; i < number_gates_L1_nand3_path; ++i)
{
leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_);
}
leak_L1_nand3 *= num_L1_nand3;
gate_leak_L1_nand3 *= num_L1_nand3;
double leakage_L2 = 0.0;
double gate_leakage_L2 = 0.0;
if (flag_L2_gate == 2)
{
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_);
}
else if (flag_L2_gate == 3)
{
leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_);
}
for (int i = 1; i < number_gates_L2; ++i)
{
leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_);
}
leakage_L2 *= num_L2;
gate_leakage_L2 *= num_L2;
power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd;
power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd;
power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd;
power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd;
power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd;
power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd;
}
}
PredecBlkDrv::PredecBlkDrv(
int way_select_,
PredecBlk * blk_,
bool is_dram)
: flag_driver_exists(0),
number_gates_nand2_path(0),
number_gates_nand3_path(0),
min_number_gates(2),
num_buffers_driving_1_nand2_load(0),
num_buffers_driving_2_nand2_load(0),
num_buffers_driving_4_nand2_load(0),
num_buffers_driving_2_nand3_load(0),
num_buffers_driving_8_nand3_load(0),
num_buffers_nand3_path(0),
c_load_nand2_path_out(0),
c_load_nand3_path_out(0),
r_load_nand2_path_out(0),
r_load_nand3_path_out(0),
delay_nand2_path(0),
delay_nand3_path(0),
power_nand2_path(),
power_nand3_path(),
blk(blk_), dec(blk->dec),
is_dram_(is_dram),
way_select(way_select_) {
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
width_nand2_path_n[i] = 0;
width_nand2_path_p[i] = 0;
width_nand3_path_n[i] = 0;
width_nand3_path_p[i] = 0;
}
number_input_addr_bits = blk->number_input_addr_bits;
if (way_select > 1) {
flag_driver_exists = 1;
number_input_addr_bits = way_select;
if (dec->num_in_signals == 2) {
c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
num_buffers_driving_2_nand2_load = number_input_addr_bits;
} else if (dec->num_in_signals == 3) {
c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_);
num_buffers_driving_2_nand3_load = number_input_addr_bits;
}
} else if (way_select == 0) {
if (blk->exist) {
flag_driver_exists = 1;
}
}
compute_widths();
compute_area();
}
void PredecBlkDrv::compute_widths() {
// The predecode block driver accepts as input the address bits from the h-tree network. For
// each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of
// inversion to generate addrbar and simply treat addrbar as addr.
double F;
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
if (flag_driver_exists) {
double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_);
double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_);
if (way_select == 0) {
if (blk->number_input_addr_bits == 1) {
//2 NAND2 gates
num_buffers_driving_2_nand2_load = 1;
c_load_nand2_path_out = 2 * C_nand2_gate_blk;
} else if (blk->number_input_addr_bits == 2) {
//4 NAND2 gates one 2-4 decoder
num_buffers_driving_4_nand2_load = 2;
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
} else if (blk->number_input_addr_bits == 3) {
//8 NAND3 gates one 3-8 decoder
num_buffers_driving_8_nand3_load = 3;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
} else if (blk->number_input_addr_bits == 4) {
//4 + 4 NAND2 gates two 2-4 decoder
num_buffers_driving_4_nand2_load = 4;
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
} else if (blk->number_input_addr_bits == 5) {
//4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8
//decoder
num_buffers_driving_4_nand2_load = 2;
num_buffers_driving_8_nand3_load = 3;
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
} else if (blk->number_input_addr_bits == 6) {
//8 + 8 NAND3 gates two 3-8 decoder
num_buffers_driving_8_nand3_load = 6;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
} else if (blk->number_input_addr_bits == 7) {
//4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8
//decoder
num_buffers_driving_4_nand2_load = 4;
num_buffers_driving_8_nand3_load = 3;
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
} else if (blk->number_input_addr_bits == 8) {
//4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8
//decoder
num_buffers_driving_4_nand2_load = 2;
num_buffers_driving_8_nand3_load = 6;
c_load_nand2_path_out = 4 * C_nand2_gate_blk;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
} else if (blk->number_input_addr_bits == 9) {
//8 + 8 + 8 NAND3 gates three 3-8 decoder
num_buffers_driving_8_nand3_load = 9;
c_load_nand3_path_out = 8 * C_nand3_gate_blk;
}
}
if ((blk->flag_two_unique_paths) ||
(blk->number_inputs_L1_gate == 2) ||
(number_input_addr_bits == 0) ||
((way_select) && (dec->num_in_signals == 2))) {
//this means that way_select is driving NAND2 in decoder.
width_nand2_path_n[0] = g_tp.min_w_nmos_;
width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0];
F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_);
number_gates_nand2_path = logical_effort(
min_number_gates,
1,
F,
width_nand2_path_n,
width_nand2_path_p,
c_load_nand2_path_out,
p_to_n_sz_ratio,
is_dram_, false, g_tp.max_w_nmos_);
}
if ((blk->flag_two_unique_paths) ||
(blk->number_inputs_L1_gate == 3) ||
((way_select) && (dec->num_in_signals == 3))) {
//this means that way_select is driving NAND3 in decoder.
width_nand3_path_n[0] = g_tp.min_w_nmos_;
width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0];
F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_);
number_gates_nand3_path = logical_effort(
min_number_gates,
1,
F,
width_nand3_path_n,
width_nand3_path_p,
c_load_nand3_path_out,
p_to_n_sz_ratio,
is_dram_, false, g_tp.max_w_nmos_);
}
}
}
void PredecBlkDrv::compute_area() {
double area_nand2_path = 0;
double area_nand3_path = 0;
double leak_nand2_path = 0;
double leak_nand3_path = 0;
double gate_leak_nand2_path = 0;
double gate_leak_nand3_path = 0;
if (flag_driver_exists) {
// first check whether a predecoder block driver is needed
for (int i = 0; i < number_gates_nand2_path; ++i) {
area_nand2_path +=
compute_gate_area(INV, 1, width_nand2_path_p[i],
width_nand2_path_n[i], g_tp.cell_h_def);
leak_nand2_path +=
cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1, inv, is_dram_);
gate_leak_nand2_path +=
cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i],
1, inv, is_dram_);
}
area_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
leak_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
for (int i = 0; i < number_gates_nand3_path; ++i) {
area_nand3_path +=
compute_gate_area(INV, 1, width_nand3_path_p[i],
width_nand3_path_n[i], g_tp.cell_h_def);
leak_nand3_path +=
cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1, inv, is_dram_);
gate_leak_nand3_path +=
cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i],
1, inv, is_dram_);
}
area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
area.set_area(area_nand2_path + area_nand3_path);
}
}
pair<double, double> PredecBlkDrv::compute_delays(
double inrisetime_nand2_path,
double inrisetime_nand3_path) {
pair<double, double> ret_val;
ret_val.first = 0; // outrisetime_nand2_path
ret_val.second = 0; // outrisetime_nand3_path
int i;
double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay;
double Vdd = g_tp.peri_global.Vdd;
if (flag_driver_exists) {
for (i = 0; i < number_gates_nand2_path - 1; ++i) {
rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_gate_load);
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
inrisetime_nand2_path = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
}
// Final inverter drives the predecoder block or the decoder output load
if (number_gates_nand2_path != 0) {
i = number_gates_nand2_path - 1;
rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_);
c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
c_load = c_load_nand2_path_out;
tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2;
this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE);
delay_nand2_path += this_delay;
ret_val.first = this_delay / (1.0 - 0.5);
power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
// cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <<endl;
}
for (i = 0; i < number_gates_nand3_path - 1; ++i) {
rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
c_gate_load = gate_C(width_nand3_path_p[i+1] + width_nand3_path_n[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_gate_load);
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
inrisetime_nand3_path = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd;
}
// Final inverter drives the predecoder block or the decoder output load
if (number_gates_nand3_path != 0) {
i = number_gates_nand3_path - 1;
rd = tr_R_on(width_nand3_path_n[i], NCH, 1, is_dram_);
c_intrinsic = drain_C_(width_nand3_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_nand3_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
c_load = c_load_nand3_path_out;
tf = rd * (c_intrinsic + c_load) + r_load_nand3_path_out * c_load / 2;
this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE);
delay_nand3_path += this_delay;
ret_val.second = this_delay / (1.0 - 0.5);
power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd;
}
}
return ret_val;
}
double PredecBlkDrv::get_rdOp_dynamic_E(int num_act_mats_hor_dir) {
return (num_addr_bits_nand2_path()*power_nand2_path.readOp.dynamic +
num_addr_bits_nand3_path()*power_nand3_path.readOp.dynamic) * num_act_mats_hor_dir;
}
Predec::Predec(
PredecBlkDrv * drv1_,
PredecBlkDrv * drv2_)
: blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) {
driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
drv1->power_nand3_path.readOp.leakage +
drv2->power_nand2_path.readOp.leakage +
drv2->power_nand3_path.readOp.leakage;
block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
blk1->power_nand3_path.readOp.leakage +
blk1->power_L2.readOp.leakage +
blk2->power_nand2_path.readOp.leakage +
blk2->power_nand3_path.readOp.leakage +
blk2->power_L2.readOp.leakage;
power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
drv1->power_nand3_path.readOp.gate_leakage +
drv2->power_nand2_path.readOp.gate_leakage +
drv2->power_nand3_path.readOp.gate_leakage;
block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
blk1->power_nand3_path.readOp.gate_leakage +
blk1->power_L2.readOp.gate_leakage +
blk2->power_nand2_path.readOp.gate_leakage +
blk2->power_nand3_path.readOp.gate_leakage +
blk2->power_L2.readOp.gate_leakage;
power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
}
void PredecBlkDrv::leakage_feedback(double temperature)
{
double leak_nand2_path = 0;
double leak_nand3_path = 0;
double gate_leak_nand2_path = 0;
double gate_leak_nand3_path = 0;
if (flag_driver_exists)
{ // first check whether a predecoder block driver is needed
for (int i = 0; i < number_gates_nand2_path; ++i)
{
leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_);
}
leak_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load +
num_buffers_driving_2_nand2_load +
num_buffers_driving_4_nand2_load);
for (int i = 0; i < number_gates_nand3_path; ++i)
{
leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_);
}
leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load);
power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd;
power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd;
power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd;
power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd;
}
}
double Predec::compute_delays(double inrisetime) {
// TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block.
pair<double, double> tmp_pair1, tmp_pair2;
tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime);
tmp_pair1 = blk1->compute_delays(tmp_pair1);
tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime);
tmp_pair2 = blk2->compute_delays(tmp_pair2);
tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2);
driver_power.readOp.dynamic =
drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic +
drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic +
drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic +
drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic;
block_power.readOp.dynamic =
blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
blk1->power_L2.readOp.dynamic +
blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path +
blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path +
blk2->power_L2.readOp.dynamic;
power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic;
delay = tmp_pair1.first;
return tmp_pair1.second;
}
void Predec::leakage_feedback(double temperature)
{
drv1->leakage_feedback(temperature);
drv2->leakage_feedback(temperature);
blk1->leakage_feedback(temperature);
blk2->leakage_feedback(temperature);
driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage +
drv1->power_nand3_path.readOp.leakage +
drv2->power_nand2_path.readOp.leakage +
drv2->power_nand3_path.readOp.leakage;
block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage +
blk1->power_nand3_path.readOp.leakage +
blk1->power_L2.readOp.leakage +
blk2->power_nand2_path.readOp.leakage +
blk2->power_nand3_path.readOp.leakage +
blk2->power_L2.readOp.leakage;
power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage;
driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage +
drv1->power_nand3_path.readOp.gate_leakage +
drv2->power_nand2_path.readOp.gate_leakage +
drv2->power_nand3_path.readOp.gate_leakage;
block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage +
blk1->power_nand3_path.readOp.gate_leakage +
blk1->power_L2.readOp.gate_leakage +
blk2->power_nand2_path.readOp.gate_leakage +
blk2->power_nand3_path.readOp.gate_leakage +
blk2->power_L2.readOp.gate_leakage;
power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage;
}
// returns <delay, risetime>
pair<double, double> Predec::get_max_delay_before_decoder(
pair<double, double> input_pair1,
pair<double, double> input_pair2) {
pair<double, double> ret_val;
double delay;
delay = drv1->delay_nand2_path + blk1->delay_nand2_path;
ret_val.first = delay;
ret_val.second = input_pair1.first;
delay = drv1->delay_nand3_path + blk1->delay_nand3_path;
if (ret_val.first < delay) {
ret_val.first = delay;
ret_val.second = input_pair1.second;
}
delay = drv2->delay_nand2_path + blk2->delay_nand2_path;
if (ret_val.first < delay) {
ret_val.first = delay;
ret_val.second = input_pair2.first;
}
delay = drv2->delay_nand3_path + blk2->delay_nand3_path;
if (ret_val.first < delay) {
ret_val.first = delay;
ret_val.second = input_pair2.second;
}
return ret_val;
}
Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_,
bool is_dram)
: number_gates(0),
min_number_gates(2),
c_gate_load(c_gate_load_),
c_wire_load(c_wire_load_),
r_wire_load(r_wire_load_),
delay(0),
power(),
is_dram_(is_dram) {
for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) {
width_n[i] = 0;
width_p[i] = 0;
}
compute_widths();
}
void Driver::compute_widths() {
double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_);
double c_load = c_gate_load + c_wire_load;
width_n[0] = g_tp.min_w_nmos_;
width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_;
double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_);
number_gates = logical_effort(
min_number_gates,
1,
F,
width_n,
width_p,
c_load,
p_to_n_sz_ratio,
is_dram_, false,
g_tp.max_w_nmos_);
}
double Driver::compute_delay(double inrisetime) {
int i;
double rd, c_load, c_intrinsic, tf;
double this_delay = 0;
for (i = 0; i < number_gates - 1; ++i) {
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_);
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
inrisetime = this_delay / (1.0 - 0.5);
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
g_tp.peri_global.Vdd;
power.readOp.leakage +=
cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
g_tp.peri_global.Vdd;
power.readOp.gate_leakage +=
cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
g_tp.peri_global.Vdd;
}
i = number_gates - 1;
c_load = c_gate_load + c_wire_load;
rd = tr_R_on(width_n[i], NCH, 1, is_dram_);
c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) +
drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_);
tf = rd * (c_intrinsic + c_load) + r_wire_load *
(c_wire_load / 2 + c_gate_load);
this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
delay += this_delay;
power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd *
g_tp.peri_global.Vdd;
power.readOp.leakage +=
cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
g_tp.peri_global.Vdd;
power.readOp.gate_leakage +=
cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *
g_tp.peri_global.Vdd;
return this_delay / (1.0 - 0.5);
}