0deef376d9
This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint.
744 lines
25 KiB
C++
744 lines
25 KiB
C++
/*****************************************************************************
|
|
* McPAT/CACTI
|
|
* SOFTWARE LICENSE AGREEMENT
|
|
* Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
* Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
|
|
* All Rights Reserved
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
|
|
#include <cassert>
|
|
#include <cmath>
|
|
#include <iostream>
|
|
|
|
#include "basic_circuit.h"
|
|
#include "parameter.h"
|
|
|
|
uint32_t _log2(uint64_t num) {
|
|
uint32_t log2 = 0;
|
|
|
|
if (num == 0) {
|
|
std::cerr << "log0?" << std::endl;
|
|
exit(1);
|
|
}
|
|
|
|
while (num > 1) {
|
|
num = (num >> 1);
|
|
log2++;
|
|
}
|
|
|
|
return log2;
|
|
}
|
|
|
|
|
|
bool is_pow2(int64_t val) {
|
|
if (val <= 0) {
|
|
return false;
|
|
} else if (val == 1) {
|
|
return true;
|
|
} else {
|
|
return (_log2(val) != _log2(val - 1));
|
|
}
|
|
}
|
|
|
|
|
|
int powers (int base, int n) {
|
|
int i, p;
|
|
|
|
p = 1;
|
|
for (i = 1; i <= n; ++i)
|
|
p *= base;
|
|
return p;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------*/
|
|
|
|
double logtwo (double x) {
|
|
assert(x > 0);
|
|
return ((double) (log (x) / log (2.0)));
|
|
}
|
|
|
|
/*----------------------------------------------------------------------*/
|
|
|
|
|
|
double gate_C(
|
|
double width,
|
|
double wirelength,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
const TechnologyParameter::DeviceType * dt;
|
|
|
|
if (_is_dram && _is_cell) {
|
|
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
|
} else if (_is_dram && _is_wl_tr) {
|
|
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
|
} else if (!_is_dram && _is_cell) {
|
|
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
|
} else {
|
|
dt = &g_tp.peri_global;
|
|
}
|
|
|
|
return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
|
|
}
|
|
|
|
|
|
// returns gate capacitance in Farads
|
|
// actually this function is the same as gate_C() now
|
|
double gate_C_pass(
|
|
double width, // gate width in um (length is Lphy_periph_global)
|
|
double wirelength, // poly wire length going to gate in lambda
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
// v5.0
|
|
const TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((_is_dram) && (_is_cell)) {
|
|
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
|
} else if ((_is_dram) && (_is_wl_tr)) {
|
|
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
|
} else if ((!_is_dram) && _is_cell) {
|
|
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
|
} else {
|
|
dt = &g_tp.peri_global;
|
|
}
|
|
|
|
return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire;
|
|
}
|
|
|
|
|
|
|
|
double drain_C_(
|
|
double width,
|
|
int nchannel,
|
|
int stack,
|
|
int next_arg_thresh_folding_width_or_height_cell,
|
|
double fold_dimension,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
double w_folded_tr;
|
|
const TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((_is_dram) && (_is_cell)) {
|
|
dt = &g_tp.dram_acc; // DRAM cell access transistor
|
|
} else if ((_is_dram) && (_is_wl_tr)) {
|
|
dt = &g_tp.dram_wl; // DRAM wordline transistor
|
|
} else if ((!_is_dram) && _is_cell) {
|
|
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
|
} else {
|
|
dt = &g_tp.peri_global;
|
|
}
|
|
|
|
double c_junc_area = dt->C_junc;
|
|
double c_junc_sidewall = dt->C_junc_sidewall;
|
|
double c_fringe = 2 * dt->C_fringe;
|
|
double c_overlap = 2 * dt->C_overlap;
|
|
double drain_C_metal_connecting_folded_tr = 0;
|
|
|
|
// determine the width of the transistor after folding (if it is getting folded)
|
|
if (next_arg_thresh_folding_width_or_height_cell == 0) {
|
|
// interpret fold_dimension as the the folding width threshold
|
|
// i.e. the value of transistor width above which the transistor gets folded
|
|
w_folded_tr = fold_dimension;
|
|
} else { // interpret fold_dimension as the height of the cell that this transistor is part of.
|
|
double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL;
|
|
// TODO : w_folded_tr must come from Component::compute_gate_area()
|
|
double ratio_p_to_n = 2.0 / (2.0 + 1.0);
|
|
if (nchannel) {
|
|
w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
|
} else {
|
|
w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS);
|
|
}
|
|
}
|
|
int num_folded_tr = (int) (ceil(width / w_folded_tr));
|
|
|
|
if (num_folded_tr < 2) {
|
|
w_folded_tr = width;
|
|
}
|
|
|
|
double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain
|
|
(stack - 1) * g_tp.spacing_poly_to_poly;
|
|
double drain_h_for_sidewall = w_folded_tr;
|
|
double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1);
|
|
if (num_folded_tr > 1) {
|
|
total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) +
|
|
(num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly);
|
|
|
|
if (num_folded_tr % 2 == 0) {
|
|
drain_h_for_sidewall = 0;
|
|
}
|
|
total_drain_height_for_cap_wrt_gate *= num_folded_tr;
|
|
drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w;
|
|
}
|
|
|
|
double drain_C_area = c_junc_area * total_drain_w * w_folded_tr;
|
|
double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w);
|
|
double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate;
|
|
|
|
return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr);
|
|
}
|
|
|
|
|
|
double tr_R_on(
|
|
double width,
|
|
int nchannel,
|
|
int stack,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
const TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((_is_dram) && (_is_cell)) {
|
|
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
|
} else if ((_is_dram) && (_is_wl_tr)) {
|
|
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
|
} else if ((!_is_dram) && _is_cell) {
|
|
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
|
} else {
|
|
dt = &g_tp.peri_global;
|
|
}
|
|
|
|
double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
|
|
return (stack * restrans / width);
|
|
}
|
|
|
|
|
|
/* This routine operates in reverse: given a resistance, it finds
|
|
* the transistor width that would have this R. It is used in the
|
|
* data wordline to estimate the wordline driver size. */
|
|
|
|
// returns width in um
|
|
double R_to_w(
|
|
double res,
|
|
int nchannel,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
const TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((_is_dram) && (_is_cell)) {
|
|
dt = &g_tp.dram_acc; //DRAM cell access transistor
|
|
} else if ((_is_dram) && (_is_wl_tr)) {
|
|
dt = &g_tp.dram_wl; //DRAM wordline transistor
|
|
} else if ((!_is_dram) && (_is_cell)) {
|
|
dt = &g_tp.sram_cell; // SRAM cell access transistor
|
|
} else {
|
|
dt = &g_tp.peri_global;
|
|
}
|
|
|
|
double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on;
|
|
return (restrans / res);
|
|
}
|
|
|
|
|
|
double pmos_to_nmos_sz_ratio(
|
|
bool _is_dram,
|
|
bool _is_wl_tr) {
|
|
double p_to_n_sizing_ratio;
|
|
if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio;
|
|
} else { //DRAM or SRAM all other transistors
|
|
p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio;
|
|
}
|
|
return p_to_n_sizing_ratio;
|
|
}
|
|
|
|
|
|
// "Timing Models for MOS Circuits" by Mark Horowitz, 1984
|
|
double horowitz(
|
|
double inputramptime, // input rise time
|
|
double tf, // time constant of gate
|
|
double vs1, // threshold voltage
|
|
double vs2, // threshold voltage
|
|
int rise) { // whether input rises or fall
|
|
if (inputramptime == 0 && vs1 == vs2) {
|
|
return tf * (vs1 < 1 ? -log(vs1) : log(vs1));
|
|
}
|
|
double a, b, td;
|
|
|
|
a = inputramptime / tf;
|
|
if (rise == RISE) {
|
|
b = 0.5;
|
|
td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) +
|
|
tf * (log(vs1) - log(vs2));
|
|
} else {
|
|
b = 0.4;
|
|
td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) +
|
|
tf * (log(1.0 - vs1) - log(1.0 - vs2));
|
|
}
|
|
return (td);
|
|
}
|
|
|
|
double cmos_Ileak(
|
|
double nWidth,
|
|
double pWidth,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
|
dt = &(g_tp.sram_cell);
|
|
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
dt = &(g_tp.dram_wl);
|
|
} else { //DRAM or SRAM all other transistors
|
|
dt = &(g_tp.peri_global);
|
|
}
|
|
return nWidth*dt->I_off_n + pWidth*dt->I_off_p;
|
|
}
|
|
|
|
|
|
double simplified_nmos_leakage(
|
|
double nwidth,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
|
dt = &(g_tp.sram_cell);
|
|
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
dt = &(g_tp.dram_wl);
|
|
} else { //DRAM or SRAM all other transistors
|
|
dt = &(g_tp.peri_global);
|
|
}
|
|
return nwidth * dt->I_off_n;
|
|
}
|
|
|
|
int factorial(int n, int m) {
|
|
int fa = m, i;
|
|
for (i = m + 1; i <= n; i++)
|
|
fa *= i;
|
|
return fa;
|
|
}
|
|
|
|
int combination(int n, int m) {
|
|
int ret;
|
|
ret = factorial(n, m + 1) / factorial(n - m);
|
|
return ret;
|
|
}
|
|
|
|
double simplified_pmos_leakage(
|
|
double pwidth,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
|
dt = &(g_tp.sram_cell);
|
|
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
dt = &(g_tp.dram_wl);
|
|
} else { //DRAM or SRAM all other transistors
|
|
dt = &(g_tp.peri_global);
|
|
}
|
|
return pwidth * dt->I_off_p;
|
|
}
|
|
|
|
double cmos_Ig_n(
|
|
double nWidth,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
|
dt = &(g_tp.sram_cell);
|
|
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
dt = &(g_tp.dram_wl);
|
|
} else { //DRAM or SRAM all other transistors
|
|
dt = &(g_tp.peri_global);
|
|
}
|
|
return nWidth*dt->I_g_on_n;
|
|
}
|
|
|
|
double cmos_Ig_p(
|
|
double pWidth,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr) {
|
|
TechnologyParameter::DeviceType * dt;
|
|
|
|
if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor
|
|
dt = &(g_tp.sram_cell);
|
|
} else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor
|
|
dt = &(g_tp.dram_wl);
|
|
} else { //DRAM or SRAM all other transistors
|
|
dt = &(g_tp.peri_global);
|
|
}
|
|
return pWidth*dt->I_g_on_p;
|
|
}
|
|
|
|
double cmos_Isub_leakage(
|
|
double nWidth,
|
|
double pWidth,
|
|
int fanin,
|
|
enum Gate_type g_type,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr,
|
|
enum Half_net_topology topo) {
|
|
assert (fanin >= 1);
|
|
double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr);
|
|
double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr);
|
|
double Isub = 0;
|
|
int num_states;
|
|
int num_off_tx;
|
|
|
|
num_states = int(pow(2.0, fanin));
|
|
|
|
switch (g_type) {
|
|
case nmos:
|
|
if (fanin == 1) {
|
|
Isub = nmos_leak / num_states;
|
|
} else {
|
|
if (topo == parallel) {
|
|
//only when all tx are off, leakage power is non-zero.
|
|
//The possibility of this state is 1/num_states
|
|
Isub = nmos_leak * fanin / num_states;
|
|
} else {
|
|
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
|
//when num_off_tx ==0 there is no leakage power
|
|
Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
|
(num_off_tx - 1)) *
|
|
combination(fanin, num_off_tx);
|
|
}
|
|
Isub /= num_states;
|
|
}
|
|
|
|
}
|
|
break;
|
|
case pmos:
|
|
if (fanin == 1) {
|
|
Isub = pmos_leak / num_states;
|
|
} else {
|
|
if (topo == parallel) {
|
|
//only when all tx are off, leakage power is non-zero.
|
|
//The possibility of this state is 1/num_states
|
|
Isub = pmos_leak * fanin / num_states;
|
|
} else {
|
|
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
|
//when num_off_tx ==0 there is no leakage power
|
|
Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
|
(num_off_tx - 1)) *
|
|
combination(fanin, num_off_tx);
|
|
}
|
|
Isub /= num_states;
|
|
}
|
|
|
|
}
|
|
break;
|
|
case inv:
|
|
Isub = (nmos_leak + pmos_leak) / 2;
|
|
break;
|
|
case nand:
|
|
Isub += fanin * pmos_leak;//the pullup network
|
|
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
|
// the pulldown network
|
|
Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
|
(num_off_tx - 1)) *
|
|
combination(fanin, num_off_tx);
|
|
}
|
|
Isub /= num_states;
|
|
break;
|
|
case nor:
|
|
for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) {
|
|
// the pullup network
|
|
Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR,
|
|
(num_off_tx - 1)) *
|
|
combination(fanin, num_off_tx);
|
|
}
|
|
Isub += fanin * nmos_leak;//the pulldown network
|
|
Isub /= num_states;
|
|
break;
|
|
case tri:
|
|
Isub += (nmos_leak + pmos_leak) / 2;//enabled
|
|
//disabled upper bound of leakage power
|
|
Isub += nmos_leak * UNI_LEAK_STACK_FACTOR;
|
|
Isub /= 2;
|
|
break;
|
|
case tg:
|
|
Isub = (nmos_leak + pmos_leak) / 2;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
return Isub;
|
|
}
|
|
|
|
|
|
double cmos_Ig_leakage(
|
|
double nWidth,
|
|
double pWidth,
|
|
int fanin,
|
|
enum Gate_type g_type,
|
|
bool _is_dram,
|
|
bool _is_cell,
|
|
bool _is_wl_tr,
|
|
enum Half_net_topology topo) {
|
|
assert (fanin >= 1);
|
|
double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr);
|
|
double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr);
|
|
double Ig_on = 0;
|
|
int num_states;
|
|
int num_on_tx;
|
|
|
|
num_states = int(pow(2.0, fanin));
|
|
|
|
switch (g_type) {
|
|
case nmos:
|
|
if (fanin == 1) {
|
|
Ig_on = nmos_leak / num_states;
|
|
} else {
|
|
if (topo == parallel) {
|
|
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
|
Ig_on += nmos_leak * combination(fanin, num_on_tx) *
|
|
num_on_tx;
|
|
}
|
|
} else {
|
|
//pull down network when all TXs are on.
|
|
Ig_on += nmos_leak * fanin;
|
|
//num_on_tx is the number of on tx
|
|
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
|
//when num_on_tx=[1,n-1]
|
|
//TODO: this is a approximation now, a precise computation
|
|
//will be very complicated.
|
|
Ig_on += nmos_leak * combination(fanin, num_on_tx) *
|
|
num_on_tx / 2;
|
|
}
|
|
Ig_on /= num_states;
|
|
}
|
|
}
|
|
break;
|
|
case pmos:
|
|
if (fanin == 1) {
|
|
Ig_on = pmos_leak / num_states;
|
|
} else {
|
|
if (topo == parallel) {
|
|
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
|
Ig_on += pmos_leak * combination(fanin, num_on_tx) *
|
|
num_on_tx;
|
|
}
|
|
} else {
|
|
//pull down network when all TXs are on.
|
|
Ig_on += pmos_leak * fanin;
|
|
//num_on_tx is the number of on tx
|
|
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
|
//when num_on_tx=[1,n-1]
|
|
//TODO: this is a approximation now, a precise computation
|
|
//will be very complicated.
|
|
Ig_on += pmos_leak * combination(fanin, num_on_tx) *
|
|
num_on_tx / 2;
|
|
}
|
|
Ig_on /= num_states;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case inv:
|
|
Ig_on = (nmos_leak + pmos_leak) / 2;
|
|
break;
|
|
case nand:
|
|
//pull up network
|
|
//when num_on_tx=[1,n]
|
|
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
|
Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx;
|
|
}
|
|
|
|
//pull down network
|
|
Ig_on += nmos_leak * fanin;//pull down network when all TXs are on.
|
|
//num_on_tx is the number of on tx
|
|
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
|
//when num_on_tx=[1,n-1]
|
|
//TODO: this is a approximation now, a precise computation will be
|
|
//very complicated.
|
|
Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
|
|
}
|
|
Ig_on /= num_states;
|
|
break;
|
|
case nor:
|
|
// num_on_tx is the number of on tx in pull up network
|
|
Ig_on += pmos_leak * fanin;//pull up network when all TXs are on.
|
|
for (num_on_tx = 1; num_on_tx < fanin; num_on_tx++) {
|
|
Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2;
|
|
|
|
}
|
|
//pull down network
|
|
for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) {
|
|
//when num_on_tx=[1,n]
|
|
Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx;
|
|
}
|
|
Ig_on /= num_states;
|
|
break;
|
|
case tri:
|
|
Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2;//enabled
|
|
//disabled upper bound of leakage power
|
|
Ig_on += (nmos_leak + pmos_leak) / 2;
|
|
Ig_on /= 2;
|
|
break;
|
|
case tg:
|
|
Ig_on = (nmos_leak + pmos_leak) / 2;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
return Ig_on;
|
|
}
|
|
|
|
double shortcircuit_simple(
|
|
double vt,
|
|
double velocity_index,
|
|
double c_in,
|
|
double c_out,
|
|
double w_nmos,
|
|
double w_pmos,
|
|
double i_on_n,
|
|
double i_on_p,
|
|
double i_on_n_in,
|
|
double i_on_p_in,
|
|
double vdd) {
|
|
|
|
double p_short_circuit, p_short_circuit_discharge, p_short_circuit_charge, p_short_circuit_discharge_low, p_short_circuit_discharge_high, p_short_circuit_charge_low, p_short_circuit_charge_high; //this is actually energy
|
|
double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
|
|
|
|
fo_n = i_on_n / i_on_n_in;
|
|
fo_p = i_on_p / i_on_p_in;
|
|
fanout = c_out / c_in;
|
|
beta_ratio = i_on_p / i_on_n;
|
|
vt_to_vdd_ratio = vt / vdd;
|
|
|
|
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
|
p_short_circuit_discharge_low =
|
|
10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
|
|
pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
|
|
vt_to_vdd_ratio)) * c_in *
|
|
vdd * vdd * fo_p * fo_p / fanout / beta_ratio;
|
|
p_short_circuit_charge_low =
|
|
10 / 3 * (pow(((vdd - vt) - vt_to_vdd_ratio), 3.0) /
|
|
pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio *
|
|
vt_to_vdd_ratio)) * c_in *
|
|
vdd * vdd * fo_n * fo_n / fanout * beta_ratio;
|
|
// double t1, t2, t3, t4, t5;
|
|
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
|
|
// t2=pow(velocity_index,2.0);
|
|
// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
|
|
// t4=t1/t2/t3;
|
|
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
|
|
|
p_short_circuit_discharge_high =
|
|
pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) * c_in * vdd * vdd *
|
|
fo_p / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 * velocity_index);
|
|
p_short_circuit_charge_high = pow(((vdd - vt) - vt_to_vdd_ratio), 1.5) *
|
|
c_in * vdd * vdd * fo_n / 10 / pow(2, 3 * vt_to_vdd_ratio + 2 *
|
|
velocity_index);
|
|
|
|
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),1.5);
|
|
// t2=pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
|
// t3=t1/t2;
|
|
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
|
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
|
|
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high); //harmmoic mean cannot be applied simple formulas.
|
|
|
|
p_short_circuit_discharge = p_short_circuit_discharge_low;
|
|
p_short_circuit_charge = p_short_circuit_charge_low;
|
|
p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2;
|
|
|
|
return (p_short_circuit);
|
|
}
|
|
|
|
double shortcircuit(
|
|
double vt,
|
|
double velocity_index,
|
|
double c_in,
|
|
double c_out,
|
|
double w_nmos,
|
|
double w_pmos,
|
|
double i_on_n,
|
|
double i_on_p,
|
|
double i_on_n_in,
|
|
double i_on_p_in,
|
|
double vdd) {
|
|
|
|
//this is actually energy
|
|
double p_short_circuit = 0, p_short_circuit_discharge;
|
|
double fo_n, fo_p, fanout, beta_ratio, vt_to_vdd_ratio;
|
|
double f_alpha, k_v, e, g_v_alpha, h_v_alpha;
|
|
|
|
fo_n = i_on_n / i_on_n_in;
|
|
fo_p = i_on_p / i_on_p_in;
|
|
fanout = 1;
|
|
beta_ratio = i_on_p / i_on_n;
|
|
vt_to_vdd_ratio = vt / vdd;
|
|
e = 2.71828;
|
|
f_alpha = 1 / (velocity_index + 2) - velocity_index /
|
|
(2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) *
|
|
(velocity_index / 2 - 1);
|
|
k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * log(10 * (vdd - vt) / e);
|
|
g_v_alpha = (velocity_index + 1) *
|
|
pow((1 - velocity_index), velocity_index) *
|
|
pow((1 - velocity_index), velocity_index / 2) / f_alpha /
|
|
pow((1 - velocity_index - velocity_index),
|
|
(velocity_index / 2 + velocity_index + 2));
|
|
h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) *
|
|
pow((1 - velocity_index), velocity_index) /
|
|
pow((1 - velocity_index - velocity_index), (velocity_index + 1));
|
|
|
|
//p_short_circuit_discharge_low = 10/3*(pow(0.5-vt_to_vdd_ratio,3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
|
// p_short_circuit_discharge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_p*fo_p/fanout/beta_ratio;
|
|
// p_short_circuit_charge_low = 10/3*(pow(((vdd-vt)-vt_to_vdd_ratio),3.0)/pow(velocity_index,2.0)/pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio))*c_in*vdd*vdd*fo_n*fo_n/fanout*beta_ratio;
|
|
// double t1, t2, t3, t4, t5;
|
|
// t1=pow(((vdd-vt)-vt_to_vdd_ratio),3);
|
|
// t2=pow(velocity_index,2.0);
|
|
// t3=pow(2.0,3*vt_to_vdd_ratio*vt_to_vdd_ratio);
|
|
// t4=t1/t2/t3;
|
|
//
|
|
// cout <<t1<<"t1\n"<<t2<<"t2\n"<<t3<<"t3\n"<<t4<<"t4\n"<<fanout<<endl;
|
|
//
|
|
//
|
|
// p_short_circuit_discharge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_p/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
|
// p_short_circuit_charge_high = pow(((vdd-vt)-vt_to_vdd_ratio),1.5)*c_in*vdd*vdd*fo_n/10/pow(2, 3*vt_to_vdd_ratio+2*velocity_index);
|
|
//
|
|
// p_short_circuit_discharge = 1.0/(1.0/p_short_circuit_discharge_low + 1.0/p_short_circuit_discharge_high);
|
|
// p_short_circuit_charge = 1/(1/p_short_circuit_charge_low + 1/p_short_circuit_charge_high);
|
|
//
|
|
// p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge)/2;
|
|
//
|
|
// p_short_circuit = p_short_circuit_discharge;
|
|
|
|
p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p /
|
|
((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha *
|
|
fo_p);
|
|
return (p_short_circuit);
|
|
}
|