From 1b370431d0ac51eb54bfbf17247f935d48995a34 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 5 Jun 2012 01:23:08 -0400 Subject: [PATCH] sim: Remove FastAlloc While FastAlloc provides a small performance increase (~1.5%) over regular malloc it isn't thread safe. After removing FastAlloc and using tcmalloc I've seen a performance increase of 12% over libc malloc when running twolf for ARM. --- SConstruct | 8 +- src/arch/x86/pagetable_walker.hh | 3 +- src/base/SConscript | 1 - src/base/fast_alloc.cc | 72 -------- src/base/fast_alloc.hh | 190 --------------------- src/cpu/base_dyn_inst.hh | 3 +- src/cpu/inorder/inorder_dyn_inst.hh | 3 +- src/cpu/o3/lsq_unit.hh | 3 +- src/cpu/ozone/lw_lsq.hh | 3 +- src/cpu/testers/memtest/memtest.hh | 3 +- src/cpu/testers/networktest/networktest.hh | 3 +- src/dev/dma_device.hh | 2 +- src/mem/bridge.hh | 3 +- src/mem/cache/cache_impl.hh | 3 +- src/mem/packet.hh | 5 +- src/mem/request.hh | 5 +- src/python/swig/event.i | 1 - src/sim/eventq.hh | 3 +- 18 files changed, 16 insertions(+), 298 deletions(-) delete mode 100644 src/base/fast_alloc.cc delete mode 100644 src/base/fast_alloc.hh diff --git a/SConstruct b/SConstruct index 415af6ca7..2b3f6a8d8 100755 --- a/SConstruct +++ b/SConstruct @@ -833,11 +833,6 @@ sticky_vars.AddVariables( ListVariable('CPU_MODELS', 'CPU models', sorted(n for n,m in CpuModel.dict.iteritems() if m.default), sorted(CpuModel.list)), - BoolVariable('NO_FAST_ALLOC', 'Disable fast object allocator', False), - BoolVariable('FORCE_FAST_ALLOC', - 'Enable fast object allocator, even for gem5.debug', False), - BoolVariable('FAST_ALLOC_STATS', 'Enable fast object allocator statistics', - False), BoolVariable('EFENCE', 'Link with Electric Fence malloc debugger', False), BoolVariable('SS_COMPATIBLE_FP', @@ -852,8 +847,7 @@ sticky_vars.AddVariables( ) # These variables get exported to #defines in config/*.hh (see src/SConscript). -export_vars += ['USE_FENV', 'NO_FAST_ALLOC', 'FORCE_FAST_ALLOC', - 'FAST_ALLOC_STATS', 'SS_COMPATIBLE_FP', +export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP', 'TARGET_ISA', 'CP_ANNOTATE', 'USE_POSIX_CLOCK' ] ################################################### diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh index 9392290c7..c59661619 100644 --- a/src/arch/x86/pagetable_walker.hh +++ b/src/arch/x86/pagetable_walker.hh @@ -44,7 +44,6 @@ #include "arch/x86/pagetable.hh" #include "arch/x86/tlb.hh" -#include "base/fast_alloc.hh" #include "base/types.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" @@ -86,7 +85,7 @@ namespace X86ISA WalkerPort port; // State to track each walk of the page table - class WalkerState : public FastAlloc + class WalkerState { private: enum State { diff --git a/src/base/SConscript b/src/base/SConscript index 7c3b0786b..8790942cd 100644 --- a/src/base/SConscript +++ b/src/base/SConscript @@ -40,7 +40,6 @@ Source('callback.cc') Source('circlebuf.cc') Source('cprintf.cc') Source('debug.cc') -Source('fast_alloc.cc') if env['USE_FENV']: Source('fenv.c') Source('hostinfo.cc') diff --git a/src/base/fast_alloc.cc b/src/base/fast_alloc.cc deleted file mode 100644 index d370b93e8..000000000 --- a/src/base/fast_alloc.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2000-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Steve Reinhardt - */ - -/* - * This code was originally written by Steve Reinhardt as part of - * the Wisconsin Wind Tunnel simulator. Relicensed as part of M5 - * by permission. - */ - -#include - -#include "base/fast_alloc.hh" - -#if USE_FAST_ALLOC - -void *FastAlloc::freeLists[Num_Buckets]; - -#if FAST_ALLOC_STATS -unsigned FastAlloc::newCount[Num_Buckets]; -unsigned FastAlloc::deleteCount[Num_Buckets]; -unsigned FastAlloc::allocCount[Num_Buckets]; -#endif - -void * -FastAlloc::moreStructs(int bucket) -{ - assert(bucket > 0 && bucket < Num_Buckets); - - int sz = bucket * Alloc_Quantum; - const int nstructs = Num_Structs_Per_New; // how many to allocate? - char *p = ::new char[nstructs * sz]; - -#if FAST_ALLOC_STATS - ++allocCount[bucket]; -#endif - - freeLists[bucket] = p; - for (int i = 0; i < (nstructs-2); ++i, p += sz) - *(void **)p = p + sz; - *(void **)p = 0; - - return (p + sz); -} - -#endif // USE_FAST_ALLOC diff --git a/src/base/fast_alloc.hh b/src/base/fast_alloc.hh deleted file mode 100644 index d6dc593d4..000000000 --- a/src/base/fast_alloc.hh +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2000-2001, 2003-2005 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Authors: Steve Reinhardt - */ - -/* - * This code was originally written by Steve Reinhardt as part of - * the Wisconsin Wind Tunnel simulator. Relicensed as part of M5 - * by permission. - */ - -#ifndef __BASE_FAST_ALLOC_HH__ -#define __BASE_FAST_ALLOC_HH__ - -#include - -// Fast structure allocator. Designed for small objects that are -// frequently allocated and deallocated. This code is derived from the -// 'alloc_struct' package used in WWT and Blizzard. C++ provides a -// much nicer framework for the same optimization. The package is -// implemented as a class, FastAlloc. Allocation and deletion are -// performed using FastAlloc's new and delete operators. Any object -// that derives from the FastAlloc class will transparently use this -// allocation package. - -// The static allocate() and deallocate() methods can also be called -// directly if desired. - -// In order for derived classes to call delete with the correct -// structure size even when they are deallocated via a base-type -// pointer, they must have a virtual destructor. It is sufficient for -// FastAlloc to declare a virtual destructor (as it does); it is not -// required for derived classes to declare their own destructor. The -// compiler will automatically generate a virtual destructor for each -// derived class. However, it is more efficient if each derived class -// defines an inline destructor, so that the compiler can statically -// collapse the destructor call chain back up the inheritance -// hierarchy. - -#include "config/fast_alloc_stats.hh" -#include "config/force_fast_alloc.hh" -#include "config/no_fast_alloc.hh" - -// By default, we want to enable FastAlloc in any build other than -// m5.debug. (FastAlloc's reuse policies can mask allocation bugs, so -// we typically want it disabled when debugging.) Set -// FORCE_FAST_ALLOC to enable even when debugging, and set -// NO_FAST_ALLOC to disable even in non-debug builds. -#define USE_FAST_ALLOC \ - (FORCE_FAST_ALLOC || (!defined(DEBUG) && !NO_FAST_ALLOC)) - -#if !USE_FAST_ALLOC - -class FastAlloc -{ -}; - -#else - -class FastAlloc -{ - public: - static void *allocate(size_t); - static void deallocate(void *, size_t); - - void *operator new(size_t); - void operator delete(void *, size_t); - - virtual ~FastAlloc() {} - - private: - - // Max_Alloc_Size is the largest object that can be allocated with - // this class. There's no fundamental limit, but this limits the - // size of the freeLists array. Let's not make this really huge - // like in Blizzard. - static const size_t Max_Alloc_Size = 512; - - // Alloc_Quantum is the difference in size between adjacent - // buckets in the free list array. - static const int Log2_Alloc_Quantum = 3; - static const int Alloc_Quantum = (1 << Log2_Alloc_Quantum); - - // Num_Buckets = bucketFor(Max_Alloc_Size) + 1 - static const int Num_Buckets = - ((Max_Alloc_Size + Alloc_Quantum - 1) >> Log2_Alloc_Quantum) + 1; - - // when we call new() for more structures, how many should we get? - static const int Num_Structs_Per_New = 20; - - static int bucketFor(size_t); - static void *moreStructs(int bucket); - - static void *freeLists[Num_Buckets]; - -#if FAST_ALLOC_STATS - static unsigned newCount[Num_Buckets]; - static unsigned deleteCount[Num_Buckets]; - static unsigned allocCount[Num_Buckets]; -#endif -}; - -inline int -FastAlloc::bucketFor(size_t sz) -{ - return (sz + Alloc_Quantum - 1) >> Log2_Alloc_Quantum; -} - -inline void * -FastAlloc::allocate(size_t sz) -{ - int b; - void *p; - - if (sz > Max_Alloc_Size) - return (void *)::new char[sz]; - - b = bucketFor(sz); - p = freeLists[b]; - - if (p) - freeLists[b] = *(void **)p; - else - p = moreStructs(b); - -#if FAST_ALLOC_STATS - ++newCount[b]; -#endif - - return p; -} - -inline void -FastAlloc::deallocate(void *p, size_t sz) -{ - int b; - - if (sz > Max_Alloc_Size) { - ::delete [] (char *)p; - return; - } - - b = bucketFor(sz); - *(void **)p = freeLists[b]; - freeLists[b] = p; -#if FAST_ALLOC_STATS - ++deleteCount[b]; -#endif -} - -inline void * -FastAlloc::operator new(size_t sz) -{ - return allocate(sz); -} - -inline void -FastAlloc::operator delete(void *p, size_t sz) -{ - deallocate(p, sz); -} - -#endif // USE_FAST_ALLOC - -#endif // __BASE_FAST_ALLOC_HH__ diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index 900a98aa0..a9cb60070 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -51,7 +51,6 @@ #include #include "arch/utility.hh" -#include "base/fast_alloc.hh" #include "base/trace.hh" #include "config/the_isa.hh" #include "cpu/checker/cpu.hh" @@ -73,7 +72,7 @@ */ template -class BaseDynInst : public FastAlloc, public RefCounted +class BaseDynInst : public RefCounted { public: // Typedef for the CPU. diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh index 4b48a157b..afd137a2e 100644 --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -41,7 +41,6 @@ #include "arch/mt.hh" #include "arch/types.hh" #include "arch/utility.hh" -#include "base/fast_alloc.hh" #include "base/trace.hh" #include "base/types.hh" #include "config/the_isa.hh" @@ -73,7 +72,7 @@ class ResourceRequest; class Packet; -class InOrderDynInst : public FastAlloc, public RefCounted +class InOrderDynInst : public RefCounted { public: // Binary machine instruction type. diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index ad1e26d2f..c3bb8f7cd 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -41,7 +41,6 @@ #include "arch/isa_traits.hh" #include "arch/locked_mem.hh" #include "arch/mmapped_ipr.hh" -#include "base/fast_alloc.hh" #include "base/hashmap.hh" #include "config/the_isa.hh" #include "cpu/inst_seq.hh" @@ -271,7 +270,7 @@ class LSQUnit { MasterPort *dcachePort; /** Derived class to hold any sender state the LSQ needs. */ - class LSQSenderState : public Packet::SenderState, public FastAlloc + class LSQSenderState : public Packet::SenderState { public: /** Default constructor. */ diff --git a/src/cpu/ozone/lw_lsq.hh b/src/cpu/ozone/lw_lsq.hh index db8e53b43..855e5a97c 100644 --- a/src/cpu/ozone/lw_lsq.hh +++ b/src/cpu/ozone/lw_lsq.hh @@ -37,7 +37,6 @@ #include #include "arch/types.hh" -#include "base/fast_alloc.hh" #include "base/hashmap.hh" #include "config/the_isa.hh" #include "cpu/inst_seq.hh" @@ -301,7 +300,7 @@ class OzoneLWLSQ { }; /** Derived class to hold any sender state the LSQ needs. */ - class LSQSenderState : public Packet::SenderState, public FastAlloc + class LSQSenderState : public Packet::SenderState { public: /** Default constructor. */ diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh index 450a3e4f1..52e32d72d 100644 --- a/src/cpu/testers/memtest/memtest.hh +++ b/src/cpu/testers/memtest/memtest.hh @@ -34,7 +34,6 @@ #include -#include "base/fast_alloc.hh" #include "base/statistics.hh" #include "mem/mem_object.hh" #include "mem/port.hh" @@ -112,7 +111,7 @@ class MemTest : public MemObject CpuPort funcPort; PortProxy funcProxy; - class MemTestSenderState : public Packet::SenderState, public FastAlloc + class MemTestSenderState : public Packet::SenderState { public: /** Constructor. */ diff --git a/src/cpu/testers/networktest/networktest.hh b/src/cpu/testers/networktest/networktest.hh index 8b7a89d6f..aec74a484 100644 --- a/src/cpu/testers/networktest/networktest.hh +++ b/src/cpu/testers/networktest/networktest.hh @@ -33,7 +33,6 @@ #include -#include "base/fast_alloc.hh" #include "base/statistics.hh" #include "mem/mem_object.hh" #include "mem/port.hh" @@ -99,7 +98,7 @@ class NetworkTest : public MemObject CpuPort cachePort; - class NetworkTestSenderState : public Packet::SenderState, public FastAlloc + class NetworkTestSenderState : public Packet::SenderState { public: /** Constructor. */ diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh index 8fc4e664c..8b40cc7e4 100644 --- a/src/dev/dma_device.hh +++ b/src/dev/dma_device.hh @@ -50,7 +50,7 @@ class DmaPort : public MasterPort { protected: - struct DmaReqState : public Packet::SenderState, public FastAlloc + struct DmaReqState : public Packet::SenderState { /** Event to call on the device when this transaction (all packets) * complete. */ diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh index 7342f4a9f..4595cf516 100644 --- a/src/mem/bridge.hh +++ b/src/mem/bridge.hh @@ -55,7 +55,6 @@ #include #include -#include "base/fast_alloc.hh" #include "base/types.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" @@ -85,7 +84,7 @@ class Bridge : public MemObject * state and original source. It has enough information to also * restore the response once it comes back to the bridge. */ - class RequestState : public Packet::SenderState, public FastAlloc + class RequestState : public Packet::SenderState { public: diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3b9bfd35a..942ac59ec 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -50,7 +50,6 @@ * Cache definitions. */ -#include "base/fast_alloc.hh" #include "base/misc.hh" #include "base/range.hh" #include "base/types.hh" @@ -349,7 +348,7 @@ Cache::access(PacketPtr pkt, BlkType *&blk, } -class ForwardResponseRecord : public Packet::SenderState, public FastAlloc +class ForwardResponseRecord : public Packet::SenderState { Packet::SenderState *prevSenderState; PortID prevSrc; diff --git a/src/mem/packet.hh b/src/mem/packet.hh index 8e3ef9456..cdcefcadb 100644 --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -58,7 +58,6 @@ #include "base/cast.hh" #include "base/compiler.hh" -#include "base/fast_alloc.hh" #include "base/flags.hh" #include "base/misc.hh" #include "base/printable.hh" @@ -227,7 +226,7 @@ class MemCmd * ultimate destination and back, possibly being conveyed by several * different Packets along the way.) */ -class Packet : public FastAlloc, public Printable +class Packet : public Printable { public: typedef uint32_t FlagsType; @@ -358,7 +357,7 @@ class Packet : public FastAlloc, public Printable * Object used to maintain state of a PrintReq. The senderState * field of a PrintReq should always be of this type. */ - class PrintReqState : public SenderState, public FastAlloc + class PrintReqState : public SenderState { private: /** diff --git a/src/mem/request.hh b/src/mem/request.hh index 68ef0540a..f6406e2c5 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -42,7 +42,6 @@ #include #include -#include "base/fast_alloc.hh" #include "base/flags.hh" #include "base/misc.hh" #include "base/types.hh" @@ -53,7 +52,7 @@ class Request; typedef Request* RequestPtr; typedef uint16_t MasterID; -class Request : public FastAlloc +class Request { public: typedef uint32_t FlagsType; @@ -229,7 +228,7 @@ class Request : public FastAlloc setThreadContext(cid, tid); } - ~Request() {} // for FastAlloc + ~Request() {} /** * Set up CPU and thread numbers. diff --git a/src/python/swig/event.i b/src/python/swig/event.i index f8c37bb4f..0af29e449 100644 --- a/src/python/swig/event.i +++ b/src/python/swig/event.i @@ -73,7 +73,6 @@ %include %include -%import "base/fast_alloc.hh" %import "sim/serialize.hh" %include "base/types.hh" diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh index e7d088e80..b04b43702 100644 --- a/src/sim/eventq.hh +++ b/src/sim/eventq.hh @@ -42,7 +42,6 @@ #include #include -#include "base/fast_alloc.hh" #include "base/flags.hh" #include "base/misc.hh" #include "base/trace.hh" @@ -61,7 +60,7 @@ extern EventQueue mainEventQueue; * * Caution, the order of members is chosen to maximize data packing. */ -class Event : public Serializable, public FastAlloc +class Event : public Serializable { friend class EventQueue;