diff --git a/src/arch/alpha/tlb.cc b/src/arch/alpha/tlb.cc index 3360b34c5..fcd2b518b 100644 --- a/src/arch/alpha/tlb.cc +++ b/src/arch/alpha/tlb.cc @@ -76,6 +76,8 @@ TLB::~TLB() void TLB::regStats() { + BaseTLB::regStats(); + fetch_hits .name(name() + ".fetch_hits") .desc("ITB hits"); diff --git a/src/arch/mips/tlb.cc b/src/arch/mips/tlb.cc index d2aa5ad70..340c83021 100644 --- a/src/arch/mips/tlb.cc +++ b/src/arch/mips/tlb.cc @@ -226,6 +226,8 @@ TLB::unserialize(CheckpointIn &cp) void TLB::regStats() { + BaseTLB::regStats(); + read_hits .name(name() + ".read_hits") .desc("DTB read hits") diff --git a/src/arch/power/tlb.cc b/src/arch/power/tlb.cc index edfb4f453..90a341d85 100644 --- a/src/arch/power/tlb.cc +++ b/src/arch/power/tlb.cc @@ -223,6 +223,8 @@ TLB::unserialize(CheckpointIn &cp) void TLB::regStats() { + BaseTLB::regStats(); + read_hits .name(name() + ".read_hits") .desc("DTB read hits") diff --git a/src/cpu/testers/memtest/memtest.cc b/src/cpu/testers/memtest/memtest.cc index b0dde6d27..223532088 100644 --- a/src/cpu/testers/memtest/memtest.cc +++ b/src/cpu/testers/memtest/memtest.cc @@ -197,6 +197,8 @@ MemTest::completeRequest(PacketPtr pkt, bool functional) void MemTest::regStats() { + MemObject::regStats(); + using namespace Stats; numReadsStat diff --git a/src/dev/arm/flash_device.cc b/src/dev/arm/flash_device.cc index 60c910626..297354b65 100644 --- a/src/dev/arm/flash_device.cc +++ b/src/dev/arm/flash_device.cc @@ -472,6 +472,8 @@ FlashDevice::getUnknownPages(uint32_t index) void FlashDevice::regStats() { + AbstractNVM::regStats(); + using namespace Stats; std::string fd_name = name() + ".FlashDevice"; diff --git a/src/dev/arm/hdlcd.cc b/src/dev/arm/hdlcd.cc index b04de21bf..0f63f23fc 100644 --- a/src/dev/arm/hdlcd.cc +++ b/src/dev/arm/hdlcd.cc @@ -97,6 +97,8 @@ HDLcd::~HDLcd() void HDLcd::regStats() { + AmbaDmaDevice::regStats(); + using namespace Stats; stats.underruns diff --git a/src/dev/arm/ufs_device.cc b/src/dev/arm/ufs_device.cc index 07d50903b..fe05b3279 100644 --- a/src/dev/arm/ufs_device.cc +++ b/src/dev/arm/ufs_device.cc @@ -774,6 +774,8 @@ UFSHostDeviceParams::create() void UFSHostDevice::regStats() { + DmaDevice::regStats(); + using namespace Stats; std::string UFSHost_name = name() + ".UFSDiskHost"; diff --git a/src/mem/probes/stack_dist.cc b/src/mem/probes/stack_dist.cc index a447f49e5..b12c81e2c 100644 --- a/src/mem/probes/stack_dist.cc +++ b/src/mem/probes/stack_dist.cc @@ -57,6 +57,8 @@ StackDistProbe::StackDistProbe(StackDistProbeParams *p) void StackDistProbe::regStats() { + BaseMemProbe::regStats(); + const StackDistProbeParams *p( dynamic_cast(params())); assert(p); diff --git a/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc b/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc index 1213073e9..2bd2acb9f 100644 --- a/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc +++ b/src/mem/ruby/network/garnet/BaseGarnetNetwork.cc @@ -69,6 +69,8 @@ BaseGarnetNetwork::init() void BaseGarnetNetwork::regStats() { + Network::regStats(); + m_flits_received .init(m_virtual_networks) .name(name() + ".flits_received") diff --git a/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc b/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc index 97bc1abdd..dab9b7dda 100644 --- a/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc +++ b/src/mem/ruby/network/garnet/fixed-pipeline/Router_d.cc @@ -158,6 +158,8 @@ Router_d::update_sw_winner(int inport, flit_d *t_flit) void Router_d::regStats() { + BasicRouter::regStats(); + m_buffer_reads .name(name() + ".buffer_reads") .flags(Stats::nozero) diff --git a/src/mem/ruby/network/simple/SimpleNetwork.cc b/src/mem/ruby/network/simple/SimpleNetwork.cc index 25d0b6f4b..2fc7b6440 100644 --- a/src/mem/ruby/network/simple/SimpleNetwork.cc +++ b/src/mem/ruby/network/simple/SimpleNetwork.cc @@ -132,6 +132,8 @@ SimpleNetwork::makeInternalLink(SwitchID src, SwitchID dest, BasicLink* link, void SimpleNetwork::regStats() { + Network::regStats(); + for (MessageSizeType type = MessageSizeType_FIRST; type < MessageSizeType_NUM; ++type) { m_msg_counts[(unsigned int) type] diff --git a/src/mem/ruby/network/simple/Switch.cc b/src/mem/ruby/network/simple/Switch.cc index 747884f16..78f5b609c 100644 --- a/src/mem/ruby/network/simple/Switch.cc +++ b/src/mem/ruby/network/simple/Switch.cc @@ -112,6 +112,8 @@ Switch::getThrottle(LinkID link_number) const void Switch::regStats() { + BasicRouter::regStats(); + for (int link = 0; link < m_throttles.size(); link++) { m_throttles[link]->regStats(name()); } diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 2a53e53be..be48628e9 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -76,6 +76,8 @@ AbstractController::resetStats() void AbstractController::regStats() { + MemObject::regStats(); + m_fully_busy_cycles .name(name() + ".fully_busy_cycles") .desc("cycles for which number of transistions == max transitions") diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index f7c196119..36d109769 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -488,6 +488,8 @@ CacheMemory::isLocked(Addr address, int context) void CacheMemory::regStats() { + SimObject::regStats(); + m_demand_hits .name(name() + ".demand_hits") .desc("Number of cache demand hits") diff --git a/src/mem/ruby/structures/Prefetcher.cc b/src/mem/ruby/structures/Prefetcher.cc index ce6d36c04..eef51dcf7 100644 --- a/src/mem/ruby/structures/Prefetcher.cc +++ b/src/mem/ruby/structures/Prefetcher.cc @@ -86,6 +86,8 @@ Prefetcher::~Prefetcher() void Prefetcher::regStats() { + SimObject::regStats(); + numMissObserved .name(name() + ".miss_observed") .desc("number of misses observed") diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index 62330e19d..8ebd3494a 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -89,7 +89,10 @@ class RubySystem : public ClockedObject return m_profiler; } - void regStats() override { m_profiler->regStats(name()); } + void regStats() override { + ClockedObject::regStats(); + m_profiler->regStats(name()); + } void collateStats() { m_profiler->collateStats(); } void resetStats() override; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index dedade3cf..fbaad8407 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -688,6 +688,8 @@ Sequencer::evictionCallback(Addr address) void Sequencer::regStats() { + RubyPort::regStats(); + m_store_waiting_on_load .name(name() + ".store_waiting_on_load") .desc("Number of times a store aliased with a pending load") diff --git a/src/mem/snoop_filter.cc b/src/mem/snoop_filter.cc index 9d02ed249..9e8f8afb8 100755 --- a/src/mem/snoop_filter.cc +++ b/src/mem/snoop_filter.cc @@ -351,6 +351,8 @@ SnoopFilter::updateResponse(const Packet* cpkt, const SlavePort& slave_port) void SnoopFilter::regStats() { + SimObject::regStats(); + totRequests .name(name() + ".tot_requests") .desc("Total number of requests made to the snoop filter."); diff --git a/src/sim/ClockedObject.py b/src/sim/ClockedObject.py index 2562f1f01..b933ea07a 100644 --- a/src/sim/ClockedObject.py +++ b/src/sim/ClockedObject.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012 ARM Limited +# Copyright (c) 2012, 2015 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -39,6 +39,24 @@ from m5.SimObject import SimObject from m5.params import * from m5.proxy import * +# Enumerate set of allowed power states that can be used by a clocked object. +# The list is kept generic to express a base minimal set. +# State definition :- +# Undefined: Invalid state, no power state derived information is available. +# On: The logic block is actively running and consuming dynamic and leakage +# energy depending on the amount of processing required. +# Clk_gated: The clock circuity within the block is gated to save dynamic +# energy, the power supply to the block is still on and leakage +# energy is being consumed by the block. +# Sram_retention: The SRAMs within the logic blocks are pulled into retention +# state to reduce leakage energy further. +# Off: The logic block is power gated and is not consuming any energy. +class PwrState(Enum): vals = ['UNDEFINED', + 'ON', + 'CLK_GATED', + 'SRAM_RETENTION', + 'OFF'] + class ClockedObject(SimObject): type = 'ClockedObject' abstract = True @@ -47,3 +65,12 @@ class ClockedObject(SimObject): # The clock domain this clocked object belongs to, inheriting the # parent's clock domain by default clk_domain = Param.ClockDomain(Parent.clk_domain, "Clock domain") + + # Provide initial power state, should ideally get redefined in startup + # routine + default_p_state = Param.PwrState("UNDEFINED", "Default Power State") + + p_state_clk_gate_min = Param.Latency('1ns', "Min value of the distribution") + p_state_clk_gate_max = Param.Latency('1s', "Max value of the distribution") + p_state_clk_gate_bins = Param.Unsigned('20', + "# bins in clk gated distribution") diff --git a/src/sim/SConscript b/src/sim/SConscript index 3238301ed..e40c43f0c 100644 --- a/src/sim/SConscript +++ b/src/sim/SConscript @@ -69,6 +69,7 @@ Source('voltage_domain.cc') Source('linear_solver.cc') Source('system.cc') Source('dvfs_handler.cc') +Source('clocked_object.cc') if env['TARGET_ISA'] != 'null': SimObject('InstTracer.py') diff --git a/src/sim/clock_domain.cc b/src/sim/clock_domain.cc index 1ccee7f1d..9865c4d11 100644 --- a/src/sim/clock_domain.cc +++ b/src/sim/clock_domain.cc @@ -56,6 +56,8 @@ void ClockDomain::regStats() { + SimObject::regStats(); + using namespace Stats; // Expose the current clock period as a stat for observability in diff --git a/src/sim/clocked_object.cc b/src/sim/clocked_object.cc new file mode 100644 index 000000000..9a682a4ce --- /dev/null +++ b/src/sim/clocked_object.cc @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Akash Bagdia + * David Guillen Fandos + */ + +#include "sim/clocked_object.hh" + +#include "base/misc.hh" + +void +ClockedObject::serialize(CheckpointOut &cp) const +{ + unsigned int currPwrState = (unsigned int)_currPwrState; + + SERIALIZE_SCALAR(currPwrState); + SERIALIZE_SCALAR(prvEvalTick); +} + +void +ClockedObject::unserialize(CheckpointIn &cp) +{ + unsigned int currPwrState; + + UNSERIALIZE_SCALAR(currPwrState); + UNSERIALIZE_SCALAR(prvEvalTick); + + _currPwrState = Enums::PwrState(currPwrState); +} + +void +ClockedObject::pwrState(Enums::PwrState p) +{ + // Function should ideally be called only when there is a state change + if (_currPwrState == p) { + warn("ClockedObject: Already in the requested power state, request "\ + "ignored"); + return; + } + + // No need to compute stats if in the same tick, update state + // though. This can happen in cases like a) during start of the + // simulation multiple state changes happens in init/startup phase, + // b) one takes a decision to migrate state but decides to reverts + // back to the original state in the same tick if other conditions + // are not met elsewhere. Any state change related stats would have + // been recorded on previous call to the pwrState() function. + if (prvEvalTick == curTick()) { + warn("ClockedObject: More than one power state change request "\ + "encountered within the same simulation tick"); + _currPwrState = p; + return; + } + + // Record stats for previous state. + computeStats(); + + _currPwrState = p; + + numPwrStateTransitions++; +} + +void +ClockedObject::computeStats() +{ + // Calculate time elapsed from last (valid) state change + Tick elapsed_time = curTick() - prvEvalTick; + + pwrStateResidencyTicks[_currPwrState] += elapsed_time; + + // Time spent in CLK_GATED state, this might change depending on + // transition to other low power states in respective simulation + // objects. + if (_currPwrState == Enums::PwrState::CLK_GATED) { + pwrStateClkGateDist.sample(elapsed_time); + } + + prvEvalTick = curTick(); +} + +std::vector +ClockedObject::pwrStateWeights() const +{ + // Get residency stats + std::vector ret; + Stats::VCounter residencies; + pwrStateResidencyTicks.value(residencies); + + // Account for current state too! + Tick elapsed_time = curTick() - prvEvalTick; + residencies[_currPwrState] += elapsed_time; + + ret.resize(Enums::PwrState::Num_PwrState); + for (unsigned i = 0; i < Enums::PwrState::Num_PwrState; i++) + ret[i] = residencies[i] / + (pwrStateResidencyTicks.total() + elapsed_time); + + return ret; +} + +void +ClockedObject::regStats() +{ + SimObject::regStats(); + + using namespace Stats; + + numPwrStateTransitions + .name(params()->name + ".numPwrStateTransitions") + .desc("Number of power state transitions") + ; + + // Each sample is time in ticks + unsigned num_bins = std::max(params()->p_state_clk_gate_bins, 10U); + pwrStateClkGateDist + .init(params()->p_state_clk_gate_min, params()->p_state_clk_gate_max, + (params()->p_state_clk_gate_max / num_bins)) + .name(params()->name + ".pwrStateClkGateDist") + .desc("Distribution of time spent in the clock gated state") + .flags(pdf) + ; + + pwrStateResidencyTicks + .init(Enums::PwrState::Num_PwrState) + .name(params()->name + ".pwrStateResidencyTicks") + .desc("Cumulative time (in ticks) in various power states") + ; + for (int i = 0; i < Enums::PwrState::Num_PwrState; i++) { + pwrStateResidencyTicks.subname(i, Enums::PwrStateStrings[i]); + } + + numPwrStateTransitions = 0; + + /** + * For every stats dump, the power state residency and other distribution + * stats should be computed just before the dump to ensure correct stats + * value being reported for current dump window. It avoids things like + * having any unreported time spent in a power state to be forwarded to the + * next dump window which might have rather unpleasant effects (like + * perturbing the distribution stats). + */ + registerDumpCallback(new ClockedObjectDumpCallback(this)); +} diff --git a/src/sim/clocked_object.hh b/src/sim/clocked_object.hh index b9a2481ec..1ba5ca617 100644 --- a/src/sim/clocked_object.hh +++ b/src/sim/clocked_object.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 ARM Limited + * Copyright (c) 2012-2013, 2015 ARM Limited * Copyright (c) 2013 Cornell University * All rights reserved * @@ -37,6 +37,8 @@ * * Authors: Andreas Hansson * Christopher Torng + * Akash Bagdia + * David Guillen Fandos */ /** @@ -47,8 +49,10 @@ #ifndef __SIM_CLOCKED_OBJECT_HH__ #define __SIM_CLOCKED_OBJECT_HH__ +#include "base/callback.hh" #include "base/intmath.hh" #include "base/misc.hh" +#include "enums/PwrState.hh" #include "params/ClockedObject.hh" #include "sim/core.hh" #include "sim/clock_domain.hh" @@ -233,7 +237,58 @@ class ClockedObject { public: ClockedObject(const ClockedObjectParams *p) - : SimObject(p), Clocked(*p->clk_domain) { } + : SimObject(p), Clocked(*p->clk_domain), + _currPwrState(p->default_p_state), + prvEvalTick(0) + { } + + /** Parameters of ClockedObject */ + typedef ClockedObjectParams Params; + const Params* params() const + { return reinterpret_cast(_params); } + + void serialize(CheckpointOut &cp) const override; + void unserialize(CheckpointIn &cp) override; + + inline Enums::PwrState pwrState() const + { return _currPwrState; } + + inline std::string pwrStateName() const + { return Enums::PwrStateStrings[_currPwrState]; } + + /** Returns the percentage residency for each power state */ + std::vector pwrStateWeights() const; + + /** + * Record stats values like state residency by computing the time + * difference from previous update. Also, updates the previous + * evaluation tick once all stats are recorded. + * Usually called on power state change and stats dump callback. + */ + void computeStats(); + + void pwrState(Enums::PwrState); + void regStats(); + + protected: + + /** To keep track of the current power state */ + Enums::PwrState _currPwrState; + + Tick prvEvalTick; + + Stats::Scalar numPwrStateTransitions; + Stats::Distribution pwrStateClkGateDist; + Stats::Vector pwrStateResidencyTicks; + +}; + +class ClockedObjectDumpCallback : public Callback +{ + ClockedObject *co; + public: + ClockedObjectDumpCallback(ClockedObject *co_t) : co(co_t) {} + virtual void process() { co->computeStats(); }; }; #endif //__SIM_CLOCKED_OBJECT_HH__ diff --git a/src/sim/voltage_domain.cc b/src/sim/voltage_domain.cc index b82efda33..61715dfbc 100644 --- a/src/sim/voltage_domain.cc +++ b/src/sim/voltage_domain.cc @@ -128,6 +128,8 @@ VoltageDomain::startup() { void VoltageDomain::regStats() { + SimObject::regStats(); + currentVoltage .method(this, &VoltageDomain::voltage) .name(params()->name + ".voltage")