sim: Remove FastAlloc

While FastAlloc provides a small performance increase (~1.5%) over regular malloc it isn't thread safe.
After removing FastAlloc and using tcmalloc I've seen a performance increase of 12% over libc malloc
when running twolf for ARM.
This commit is contained in:
Ali Saidi 2012-06-05 01:23:08 -04:00
parent d6997777be
commit 1b370431d0
18 changed files with 16 additions and 298 deletions

View file

@ -833,11 +833,6 @@ sticky_vars.AddVariables(
ListVariable('CPU_MODELS', 'CPU models', ListVariable('CPU_MODELS', 'CPU models',
sorted(n for n,m in CpuModel.dict.iteritems() if m.default), sorted(n for n,m in CpuModel.dict.iteritems() if m.default),
sorted(CpuModel.list)), sorted(CpuModel.list)),
BoolVariable('NO_FAST_ALLOC', 'Disable fast object allocator', False),
BoolVariable('FORCE_FAST_ALLOC',
'Enable fast object allocator, even for gem5.debug', False),
BoolVariable('FAST_ALLOC_STATS', 'Enable fast object allocator statistics',
False),
BoolVariable('EFENCE', 'Link with Electric Fence malloc debugger', BoolVariable('EFENCE', 'Link with Electric Fence malloc debugger',
False), False),
BoolVariable('SS_COMPATIBLE_FP', BoolVariable('SS_COMPATIBLE_FP',
@ -852,8 +847,7 @@ sticky_vars.AddVariables(
) )
# These variables get exported to #defines in config/*.hh (see src/SConscript). # These variables get exported to #defines in config/*.hh (see src/SConscript).
export_vars += ['USE_FENV', 'NO_FAST_ALLOC', 'FORCE_FAST_ALLOC', export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP',
'FAST_ALLOC_STATS', 'SS_COMPATIBLE_FP',
'TARGET_ISA', 'CP_ANNOTATE', 'USE_POSIX_CLOCK' ] 'TARGET_ISA', 'CP_ANNOTATE', 'USE_POSIX_CLOCK' ]
################################################### ###################################################

View file

@ -44,7 +44,6 @@
#include "arch/x86/pagetable.hh" #include "arch/x86/pagetable.hh"
#include "arch/x86/tlb.hh" #include "arch/x86/tlb.hh"
#include "base/fast_alloc.hh"
#include "base/types.hh" #include "base/types.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/packet.hh" #include "mem/packet.hh"
@ -86,7 +85,7 @@ namespace X86ISA
WalkerPort port; WalkerPort port;
// State to track each walk of the page table // State to track each walk of the page table
class WalkerState : public FastAlloc class WalkerState
{ {
private: private:
enum State { enum State {

View file

@ -40,7 +40,6 @@ Source('callback.cc')
Source('circlebuf.cc') Source('circlebuf.cc')
Source('cprintf.cc') Source('cprintf.cc')
Source('debug.cc') Source('debug.cc')
Source('fast_alloc.cc')
if env['USE_FENV']: if env['USE_FENV']:
Source('fenv.c') Source('fenv.c')
Source('hostinfo.cc') Source('hostinfo.cc')

View file

@ -1,72 +0,0 @@
/*
* Copyright (c) 2000-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Steve Reinhardt
*/
/*
* This code was originally written by Steve Reinhardt as part of
* the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
* by permission.
*/
#include <cassert>
#include "base/fast_alloc.hh"
#if USE_FAST_ALLOC
void *FastAlloc::freeLists[Num_Buckets];
#if FAST_ALLOC_STATS
unsigned FastAlloc::newCount[Num_Buckets];
unsigned FastAlloc::deleteCount[Num_Buckets];
unsigned FastAlloc::allocCount[Num_Buckets];
#endif
void *
FastAlloc::moreStructs(int bucket)
{
assert(bucket > 0 && bucket < Num_Buckets);
int sz = bucket * Alloc_Quantum;
const int nstructs = Num_Structs_Per_New; // how many to allocate?
char *p = ::new char[nstructs * sz];
#if FAST_ALLOC_STATS
++allocCount[bucket];
#endif
freeLists[bucket] = p;
for (int i = 0; i < (nstructs-2); ++i, p += sz)
*(void **)p = p + sz;
*(void **)p = 0;
return (p + sz);
}
#endif // USE_FAST_ALLOC

View file

@ -1,190 +0,0 @@
/*
* Copyright (c) 2000-2001, 2003-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Steve Reinhardt
*/
/*
* This code was originally written by Steve Reinhardt as part of
* the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
* by permission.
*/
#ifndef __BASE_FAST_ALLOC_HH__
#define __BASE_FAST_ALLOC_HH__
#include <cstddef>
// Fast structure allocator. Designed for small objects that are
// frequently allocated and deallocated. This code is derived from the
// 'alloc_struct' package used in WWT and Blizzard. C++ provides a
// much nicer framework for the same optimization. The package is
// implemented as a class, FastAlloc. Allocation and deletion are
// performed using FastAlloc's new and delete operators. Any object
// that derives from the FastAlloc class will transparently use this
// allocation package.
// The static allocate() and deallocate() methods can also be called
// directly if desired.
// In order for derived classes to call delete with the correct
// structure size even when they are deallocated via a base-type
// pointer, they must have a virtual destructor. It is sufficient for
// FastAlloc to declare a virtual destructor (as it does); it is not
// required for derived classes to declare their own destructor. The
// compiler will automatically generate a virtual destructor for each
// derived class. However, it is more efficient if each derived class
// defines an inline destructor, so that the compiler can statically
// collapse the destructor call chain back up the inheritance
// hierarchy.
#include "config/fast_alloc_stats.hh"
#include "config/force_fast_alloc.hh"
#include "config/no_fast_alloc.hh"
// By default, we want to enable FastAlloc in any build other than
// m5.debug. (FastAlloc's reuse policies can mask allocation bugs, so
// we typically want it disabled when debugging.) Set
// FORCE_FAST_ALLOC to enable even when debugging, and set
// NO_FAST_ALLOC to disable even in non-debug builds.
#define USE_FAST_ALLOC \
(FORCE_FAST_ALLOC || (!defined(DEBUG) && !NO_FAST_ALLOC))
#if !USE_FAST_ALLOC
class FastAlloc
{
};
#else
class FastAlloc
{
public:
static void *allocate(size_t);
static void deallocate(void *, size_t);
void *operator new(size_t);
void operator delete(void *, size_t);
virtual ~FastAlloc() {}
private:
// Max_Alloc_Size is the largest object that can be allocated with
// this class. There's no fundamental limit, but this limits the
// size of the freeLists array. Let's not make this really huge
// like in Blizzard.
static const size_t Max_Alloc_Size = 512;
// Alloc_Quantum is the difference in size between adjacent
// buckets in the free list array.
static const int Log2_Alloc_Quantum = 3;
static const int Alloc_Quantum = (1 << Log2_Alloc_Quantum);
// Num_Buckets = bucketFor(Max_Alloc_Size) + 1
static const int Num_Buckets =
((Max_Alloc_Size + Alloc_Quantum - 1) >> Log2_Alloc_Quantum) + 1;
// when we call new() for more structures, how many should we get?
static const int Num_Structs_Per_New = 20;
static int bucketFor(size_t);
static void *moreStructs(int bucket);
static void *freeLists[Num_Buckets];
#if FAST_ALLOC_STATS
static unsigned newCount[Num_Buckets];
static unsigned deleteCount[Num_Buckets];
static unsigned allocCount[Num_Buckets];
#endif
};
inline int
FastAlloc::bucketFor(size_t sz)
{
return (sz + Alloc_Quantum - 1) >> Log2_Alloc_Quantum;
}
inline void *
FastAlloc::allocate(size_t sz)
{
int b;
void *p;
if (sz > Max_Alloc_Size)
return (void *)::new char[sz];
b = bucketFor(sz);
p = freeLists[b];
if (p)
freeLists[b] = *(void **)p;
else
p = moreStructs(b);
#if FAST_ALLOC_STATS
++newCount[b];
#endif
return p;
}
inline void
FastAlloc::deallocate(void *p, size_t sz)
{
int b;
if (sz > Max_Alloc_Size) {
::delete [] (char *)p;
return;
}
b = bucketFor(sz);
*(void **)p = freeLists[b];
freeLists[b] = p;
#if FAST_ALLOC_STATS
++deleteCount[b];
#endif
}
inline void *
FastAlloc::operator new(size_t sz)
{
return allocate(sz);
}
inline void
FastAlloc::operator delete(void *p, size_t sz)
{
deallocate(p, sz);
}
#endif // USE_FAST_ALLOC
#endif // __BASE_FAST_ALLOC_HH__

View file

@ -51,7 +51,6 @@
#include <queue> #include <queue>
#include "arch/utility.hh" #include "arch/utility.hh"
#include "base/fast_alloc.hh"
#include "base/trace.hh" #include "base/trace.hh"
#include "config/the_isa.hh" #include "config/the_isa.hh"
#include "cpu/checker/cpu.hh" #include "cpu/checker/cpu.hh"
@ -73,7 +72,7 @@
*/ */
template <class Impl> template <class Impl>
class BaseDynInst : public FastAlloc, public RefCounted class BaseDynInst : public RefCounted
{ {
public: public:
// Typedef for the CPU. // Typedef for the CPU.

View file

@ -41,7 +41,6 @@
#include "arch/mt.hh" #include "arch/mt.hh"
#include "arch/types.hh" #include "arch/types.hh"
#include "arch/utility.hh" #include "arch/utility.hh"
#include "base/fast_alloc.hh"
#include "base/trace.hh" #include "base/trace.hh"
#include "base/types.hh" #include "base/types.hh"
#include "config/the_isa.hh" #include "config/the_isa.hh"
@ -73,7 +72,7 @@
class ResourceRequest; class ResourceRequest;
class Packet; class Packet;
class InOrderDynInst : public FastAlloc, public RefCounted class InOrderDynInst : public RefCounted
{ {
public: public:
// Binary machine instruction type. // Binary machine instruction type.

View file

@ -41,7 +41,6 @@
#include "arch/isa_traits.hh" #include "arch/isa_traits.hh"
#include "arch/locked_mem.hh" #include "arch/locked_mem.hh"
#include "arch/mmapped_ipr.hh" #include "arch/mmapped_ipr.hh"
#include "base/fast_alloc.hh"
#include "base/hashmap.hh" #include "base/hashmap.hh"
#include "config/the_isa.hh" #include "config/the_isa.hh"
#include "cpu/inst_seq.hh" #include "cpu/inst_seq.hh"
@ -271,7 +270,7 @@ class LSQUnit {
MasterPort *dcachePort; MasterPort *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */ /** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState, public FastAlloc class LSQSenderState : public Packet::SenderState
{ {
public: public:
/** Default constructor. */ /** Default constructor. */

View file

@ -37,7 +37,6 @@
#include <queue> #include <queue>
#include "arch/types.hh" #include "arch/types.hh"
#include "base/fast_alloc.hh"
#include "base/hashmap.hh" #include "base/hashmap.hh"
#include "config/the_isa.hh" #include "config/the_isa.hh"
#include "cpu/inst_seq.hh" #include "cpu/inst_seq.hh"
@ -301,7 +300,7 @@ class OzoneLWLSQ {
}; };
/** Derived class to hold any sender state the LSQ needs. */ /** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState, public FastAlloc class LSQSenderState : public Packet::SenderState
{ {
public: public:
/** Default constructor. */ /** Default constructor. */

View file

@ -34,7 +34,6 @@
#include <set> #include <set>
#include "base/fast_alloc.hh"
#include "base/statistics.hh" #include "base/statistics.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/port.hh" #include "mem/port.hh"
@ -112,7 +111,7 @@ class MemTest : public MemObject
CpuPort funcPort; CpuPort funcPort;
PortProxy funcProxy; PortProxy funcProxy;
class MemTestSenderState : public Packet::SenderState, public FastAlloc class MemTestSenderState : public Packet::SenderState
{ {
public: public:
/** Constructor. */ /** Constructor. */

View file

@ -33,7 +33,6 @@
#include <set> #include <set>
#include "base/fast_alloc.hh"
#include "base/statistics.hh" #include "base/statistics.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/port.hh" #include "mem/port.hh"
@ -99,7 +98,7 @@ class NetworkTest : public MemObject
CpuPort cachePort; CpuPort cachePort;
class NetworkTestSenderState : public Packet::SenderState, public FastAlloc class NetworkTestSenderState : public Packet::SenderState
{ {
public: public:
/** Constructor. */ /** Constructor. */

View file

@ -50,7 +50,7 @@
class DmaPort : public MasterPort class DmaPort : public MasterPort
{ {
protected: protected:
struct DmaReqState : public Packet::SenderState, public FastAlloc struct DmaReqState : public Packet::SenderState
{ {
/** Event to call on the device when this transaction (all packets) /** Event to call on the device when this transaction (all packets)
* complete. */ * complete. */

View file

@ -55,7 +55,6 @@
#include <queue> #include <queue>
#include <string> #include <string>
#include "base/fast_alloc.hh"
#include "base/types.hh" #include "base/types.hh"
#include "mem/mem_object.hh" #include "mem/mem_object.hh"
#include "mem/packet.hh" #include "mem/packet.hh"
@ -85,7 +84,7 @@ class Bridge : public MemObject
* state and original source. It has enough information to also * state and original source. It has enough information to also
* restore the response once it comes back to the bridge. * restore the response once it comes back to the bridge.
*/ */
class RequestState : public Packet::SenderState, public FastAlloc class RequestState : public Packet::SenderState
{ {
public: public:

View file

@ -50,7 +50,6 @@
* Cache definitions. * Cache definitions.
*/ */
#include "base/fast_alloc.hh"
#include "base/misc.hh" #include "base/misc.hh"
#include "base/range.hh" #include "base/range.hh"
#include "base/types.hh" #include "base/types.hh"
@ -349,7 +348,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
} }
class ForwardResponseRecord : public Packet::SenderState, public FastAlloc class ForwardResponseRecord : public Packet::SenderState
{ {
Packet::SenderState *prevSenderState; Packet::SenderState *prevSenderState;
PortID prevSrc; PortID prevSrc;

View file

@ -58,7 +58,6 @@
#include "base/cast.hh" #include "base/cast.hh"
#include "base/compiler.hh" #include "base/compiler.hh"
#include "base/fast_alloc.hh"
#include "base/flags.hh" #include "base/flags.hh"
#include "base/misc.hh" #include "base/misc.hh"
#include "base/printable.hh" #include "base/printable.hh"
@ -227,7 +226,7 @@ class MemCmd
* ultimate destination and back, possibly being conveyed by several * ultimate destination and back, possibly being conveyed by several
* different Packets along the way.) * different Packets along the way.)
*/ */
class Packet : public FastAlloc, public Printable class Packet : public Printable
{ {
public: public:
typedef uint32_t FlagsType; typedef uint32_t FlagsType;
@ -358,7 +357,7 @@ class Packet : public FastAlloc, public Printable
* Object used to maintain state of a PrintReq. The senderState * Object used to maintain state of a PrintReq. The senderState
* field of a PrintReq should always be of this type. * field of a PrintReq should always be of this type.
*/ */
class PrintReqState : public SenderState, public FastAlloc class PrintReqState : public SenderState
{ {
private: private:
/** /**

View file

@ -42,7 +42,6 @@
#include <cassert> #include <cassert>
#include <climits> #include <climits>
#include "base/fast_alloc.hh"
#include "base/flags.hh" #include "base/flags.hh"
#include "base/misc.hh" #include "base/misc.hh"
#include "base/types.hh" #include "base/types.hh"
@ -53,7 +52,7 @@ class Request;
typedef Request* RequestPtr; typedef Request* RequestPtr;
typedef uint16_t MasterID; typedef uint16_t MasterID;
class Request : public FastAlloc class Request
{ {
public: public:
typedef uint32_t FlagsType; typedef uint32_t FlagsType;
@ -229,7 +228,7 @@ class Request : public FastAlloc
setThreadContext(cid, tid); setThreadContext(cid, tid);
} }
~Request() {} // for FastAlloc ~Request() {}
/** /**
* Set up CPU and thread numbers. * Set up CPU and thread numbers.

View file

@ -73,7 +73,6 @@
%include <std_string.i> %include <std_string.i>
%include <stdint.i> %include <stdint.i>
%import "base/fast_alloc.hh"
%import "sim/serialize.hh" %import "sim/serialize.hh"
%include "base/types.hh" %include "base/types.hh"

View file

@ -42,7 +42,6 @@
#include <iosfwd> #include <iosfwd>
#include <string> #include <string>
#include "base/fast_alloc.hh"
#include "base/flags.hh" #include "base/flags.hh"
#include "base/misc.hh" #include "base/misc.hh"
#include "base/trace.hh" #include "base/trace.hh"
@ -61,7 +60,7 @@ extern EventQueue mainEventQueue;
* *
* Caution, the order of members is chosen to maximize data packing. * Caution, the order of members is chosen to maximize data packing.
*/ */
class Event : public Serializable, public FastAlloc class Event : public Serializable
{ {
friend class EventQueue; friend class EventQueue;