sim: Remove FastAlloc
While FastAlloc provides a small performance increase (~1.5%) over regular malloc it isn't thread safe. After removing FastAlloc and using tcmalloc I've seen a performance increase of 12% over libc malloc when running twolf for ARM.
This commit is contained in:
parent
d6997777be
commit
1b370431d0
18 changed files with 16 additions and 298 deletions
|
@ -833,11 +833,6 @@ sticky_vars.AddVariables(
|
|||
ListVariable('CPU_MODELS', 'CPU models',
|
||||
sorted(n for n,m in CpuModel.dict.iteritems() if m.default),
|
||||
sorted(CpuModel.list)),
|
||||
BoolVariable('NO_FAST_ALLOC', 'Disable fast object allocator', False),
|
||||
BoolVariable('FORCE_FAST_ALLOC',
|
||||
'Enable fast object allocator, even for gem5.debug', False),
|
||||
BoolVariable('FAST_ALLOC_STATS', 'Enable fast object allocator statistics',
|
||||
False),
|
||||
BoolVariable('EFENCE', 'Link with Electric Fence malloc debugger',
|
||||
False),
|
||||
BoolVariable('SS_COMPATIBLE_FP',
|
||||
|
@ -852,8 +847,7 @@ sticky_vars.AddVariables(
|
|||
)
|
||||
|
||||
# These variables get exported to #defines in config/*.hh (see src/SConscript).
|
||||
export_vars += ['USE_FENV', 'NO_FAST_ALLOC', 'FORCE_FAST_ALLOC',
|
||||
'FAST_ALLOC_STATS', 'SS_COMPATIBLE_FP',
|
||||
export_vars += ['USE_FENV', 'SS_COMPATIBLE_FP',
|
||||
'TARGET_ISA', 'CP_ANNOTATE', 'USE_POSIX_CLOCK' ]
|
||||
|
||||
###################################################
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
|
||||
#include "arch/x86/pagetable.hh"
|
||||
#include "arch/x86/tlb.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/types.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
@ -86,7 +85,7 @@ namespace X86ISA
|
|||
WalkerPort port;
|
||||
|
||||
// State to track each walk of the page table
|
||||
class WalkerState : public FastAlloc
|
||||
class WalkerState
|
||||
{
|
||||
private:
|
||||
enum State {
|
||||
|
|
|
@ -40,7 +40,6 @@ Source('callback.cc')
|
|||
Source('circlebuf.cc')
|
||||
Source('cprintf.cc')
|
||||
Source('debug.cc')
|
||||
Source('fast_alloc.cc')
|
||||
if env['USE_FENV']:
|
||||
Source('fenv.c')
|
||||
Source('hostinfo.cc')
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Steve Reinhardt
|
||||
*/
|
||||
|
||||
/*
|
||||
* This code was originally written by Steve Reinhardt as part of
|
||||
* the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
|
||||
* by permission.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
|
||||
#if USE_FAST_ALLOC
|
||||
|
||||
void *FastAlloc::freeLists[Num_Buckets];
|
||||
|
||||
#if FAST_ALLOC_STATS
|
||||
unsigned FastAlloc::newCount[Num_Buckets];
|
||||
unsigned FastAlloc::deleteCount[Num_Buckets];
|
||||
unsigned FastAlloc::allocCount[Num_Buckets];
|
||||
#endif
|
||||
|
||||
void *
|
||||
FastAlloc::moreStructs(int bucket)
|
||||
{
|
||||
assert(bucket > 0 && bucket < Num_Buckets);
|
||||
|
||||
int sz = bucket * Alloc_Quantum;
|
||||
const int nstructs = Num_Structs_Per_New; // how many to allocate?
|
||||
char *p = ::new char[nstructs * sz];
|
||||
|
||||
#if FAST_ALLOC_STATS
|
||||
++allocCount[bucket];
|
||||
#endif
|
||||
|
||||
freeLists[bucket] = p;
|
||||
for (int i = 0; i < (nstructs-2); ++i, p += sz)
|
||||
*(void **)p = p + sz;
|
||||
*(void **)p = 0;
|
||||
|
||||
return (p + sz);
|
||||
}
|
||||
|
||||
#endif // USE_FAST_ALLOC
|
|
@ -1,190 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2001, 2003-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Steve Reinhardt
|
||||
*/
|
||||
|
||||
/*
|
||||
* This code was originally written by Steve Reinhardt as part of
|
||||
* the Wisconsin Wind Tunnel simulator. Relicensed as part of M5
|
||||
* by permission.
|
||||
*/
|
||||
|
||||
#ifndef __BASE_FAST_ALLOC_HH__
|
||||
#define __BASE_FAST_ALLOC_HH__
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
// Fast structure allocator. Designed for small objects that are
|
||||
// frequently allocated and deallocated. This code is derived from the
|
||||
// 'alloc_struct' package used in WWT and Blizzard. C++ provides a
|
||||
// much nicer framework for the same optimization. The package is
|
||||
// implemented as a class, FastAlloc. Allocation and deletion are
|
||||
// performed using FastAlloc's new and delete operators. Any object
|
||||
// that derives from the FastAlloc class will transparently use this
|
||||
// allocation package.
|
||||
|
||||
// The static allocate() and deallocate() methods can also be called
|
||||
// directly if desired.
|
||||
|
||||
// In order for derived classes to call delete with the correct
|
||||
// structure size even when they are deallocated via a base-type
|
||||
// pointer, they must have a virtual destructor. It is sufficient for
|
||||
// FastAlloc to declare a virtual destructor (as it does); it is not
|
||||
// required for derived classes to declare their own destructor. The
|
||||
// compiler will automatically generate a virtual destructor for each
|
||||
// derived class. However, it is more efficient if each derived class
|
||||
// defines an inline destructor, so that the compiler can statically
|
||||
// collapse the destructor call chain back up the inheritance
|
||||
// hierarchy.
|
||||
|
||||
#include "config/fast_alloc_stats.hh"
|
||||
#include "config/force_fast_alloc.hh"
|
||||
#include "config/no_fast_alloc.hh"
|
||||
|
||||
// By default, we want to enable FastAlloc in any build other than
|
||||
// m5.debug. (FastAlloc's reuse policies can mask allocation bugs, so
|
||||
// we typically want it disabled when debugging.) Set
|
||||
// FORCE_FAST_ALLOC to enable even when debugging, and set
|
||||
// NO_FAST_ALLOC to disable even in non-debug builds.
|
||||
#define USE_FAST_ALLOC \
|
||||
(FORCE_FAST_ALLOC || (!defined(DEBUG) && !NO_FAST_ALLOC))
|
||||
|
||||
#if !USE_FAST_ALLOC
|
||||
|
||||
class FastAlloc
|
||||
{
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class FastAlloc
|
||||
{
|
||||
public:
|
||||
static void *allocate(size_t);
|
||||
static void deallocate(void *, size_t);
|
||||
|
||||
void *operator new(size_t);
|
||||
void operator delete(void *, size_t);
|
||||
|
||||
virtual ~FastAlloc() {}
|
||||
|
||||
private:
|
||||
|
||||
// Max_Alloc_Size is the largest object that can be allocated with
|
||||
// this class. There's no fundamental limit, but this limits the
|
||||
// size of the freeLists array. Let's not make this really huge
|
||||
// like in Blizzard.
|
||||
static const size_t Max_Alloc_Size = 512;
|
||||
|
||||
// Alloc_Quantum is the difference in size between adjacent
|
||||
// buckets in the free list array.
|
||||
static const int Log2_Alloc_Quantum = 3;
|
||||
static const int Alloc_Quantum = (1 << Log2_Alloc_Quantum);
|
||||
|
||||
// Num_Buckets = bucketFor(Max_Alloc_Size) + 1
|
||||
static const int Num_Buckets =
|
||||
((Max_Alloc_Size + Alloc_Quantum - 1) >> Log2_Alloc_Quantum) + 1;
|
||||
|
||||
// when we call new() for more structures, how many should we get?
|
||||
static const int Num_Structs_Per_New = 20;
|
||||
|
||||
static int bucketFor(size_t);
|
||||
static void *moreStructs(int bucket);
|
||||
|
||||
static void *freeLists[Num_Buckets];
|
||||
|
||||
#if FAST_ALLOC_STATS
|
||||
static unsigned newCount[Num_Buckets];
|
||||
static unsigned deleteCount[Num_Buckets];
|
||||
static unsigned allocCount[Num_Buckets];
|
||||
#endif
|
||||
};
|
||||
|
||||
inline int
|
||||
FastAlloc::bucketFor(size_t sz)
|
||||
{
|
||||
return (sz + Alloc_Quantum - 1) >> Log2_Alloc_Quantum;
|
||||
}
|
||||
|
||||
inline void *
|
||||
FastAlloc::allocate(size_t sz)
|
||||
{
|
||||
int b;
|
||||
void *p;
|
||||
|
||||
if (sz > Max_Alloc_Size)
|
||||
return (void *)::new char[sz];
|
||||
|
||||
b = bucketFor(sz);
|
||||
p = freeLists[b];
|
||||
|
||||
if (p)
|
||||
freeLists[b] = *(void **)p;
|
||||
else
|
||||
p = moreStructs(b);
|
||||
|
||||
#if FAST_ALLOC_STATS
|
||||
++newCount[b];
|
||||
#endif
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
inline void
|
||||
FastAlloc::deallocate(void *p, size_t sz)
|
||||
{
|
||||
int b;
|
||||
|
||||
if (sz > Max_Alloc_Size) {
|
||||
::delete [] (char *)p;
|
||||
return;
|
||||
}
|
||||
|
||||
b = bucketFor(sz);
|
||||
*(void **)p = freeLists[b];
|
||||
freeLists[b] = p;
|
||||
#if FAST_ALLOC_STATS
|
||||
++deleteCount[b];
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void *
|
||||
FastAlloc::operator new(size_t sz)
|
||||
{
|
||||
return allocate(sz);
|
||||
}
|
||||
|
||||
inline void
|
||||
FastAlloc::operator delete(void *p, size_t sz)
|
||||
{
|
||||
deallocate(p, sz);
|
||||
}
|
||||
|
||||
#endif // USE_FAST_ALLOC
|
||||
|
||||
#endif // __BASE_FAST_ALLOC_HH__
|
|
@ -51,7 +51,6 @@
|
|||
#include <queue>
|
||||
|
||||
#include "arch/utility.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/checker/cpu.hh"
|
||||
|
@ -73,7 +72,7 @@
|
|||
*/
|
||||
|
||||
template <class Impl>
|
||||
class BaseDynInst : public FastAlloc, public RefCounted
|
||||
class BaseDynInst : public RefCounted
|
||||
{
|
||||
public:
|
||||
// Typedef for the CPU.
|
||||
|
|
|
@ -41,7 +41,6 @@
|
|||
#include "arch/mt.hh"
|
||||
#include "arch/types.hh"
|
||||
#include "arch/utility.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "base/types.hh"
|
||||
#include "config/the_isa.hh"
|
||||
|
@ -73,7 +72,7 @@
|
|||
class ResourceRequest;
|
||||
class Packet;
|
||||
|
||||
class InOrderDynInst : public FastAlloc, public RefCounted
|
||||
class InOrderDynInst : public RefCounted
|
||||
{
|
||||
public:
|
||||
// Binary machine instruction type.
|
||||
|
|
|
@ -41,7 +41,6 @@
|
|||
#include "arch/isa_traits.hh"
|
||||
#include "arch/locked_mem.hh"
|
||||
#include "arch/mmapped_ipr.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/hashmap.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
|
@ -271,7 +270,7 @@ class LSQUnit {
|
|||
MasterPort *dcachePort;
|
||||
|
||||
/** Derived class to hold any sender state the LSQ needs. */
|
||||
class LSQSenderState : public Packet::SenderState, public FastAlloc
|
||||
class LSQSenderState : public Packet::SenderState
|
||||
{
|
||||
public:
|
||||
/** Default constructor. */
|
||||
|
|
|
@ -37,7 +37,6 @@
|
|||
#include <queue>
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/hashmap.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
|
@ -301,7 +300,7 @@ class OzoneLWLSQ {
|
|||
};
|
||||
|
||||
/** Derived class to hold any sender state the LSQ needs. */
|
||||
class LSQSenderState : public Packet::SenderState, public FastAlloc
|
||||
class LSQSenderState : public Packet::SenderState
|
||||
{
|
||||
public:
|
||||
/** Default constructor. */
|
||||
|
|
|
@ -34,7 +34,6 @@
|
|||
|
||||
#include <set>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/port.hh"
|
||||
|
@ -112,7 +111,7 @@ class MemTest : public MemObject
|
|||
CpuPort funcPort;
|
||||
PortProxy funcProxy;
|
||||
|
||||
class MemTestSenderState : public Packet::SenderState, public FastAlloc
|
||||
class MemTestSenderState : public Packet::SenderState
|
||||
{
|
||||
public:
|
||||
/** Constructor. */
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
|
||||
#include <set>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/port.hh"
|
||||
|
@ -99,7 +98,7 @@ class NetworkTest : public MemObject
|
|||
|
||||
CpuPort cachePort;
|
||||
|
||||
class NetworkTestSenderState : public Packet::SenderState, public FastAlloc
|
||||
class NetworkTestSenderState : public Packet::SenderState
|
||||
{
|
||||
public:
|
||||
/** Constructor. */
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
class DmaPort : public MasterPort
|
||||
{
|
||||
protected:
|
||||
struct DmaReqState : public Packet::SenderState, public FastAlloc
|
||||
struct DmaReqState : public Packet::SenderState
|
||||
{
|
||||
/** Event to call on the device when this transaction (all packets)
|
||||
* complete. */
|
||||
|
|
|
@ -55,7 +55,6 @@
|
|||
#include <queue>
|
||||
#include <string>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/types.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/packet.hh"
|
||||
|
@ -85,7 +84,7 @@ class Bridge : public MemObject
|
|||
* state and original source. It has enough information to also
|
||||
* restore the response once it comes back to the bridge.
|
||||
*/
|
||||
class RequestState : public Packet::SenderState, public FastAlloc
|
||||
class RequestState : public Packet::SenderState
|
||||
{
|
||||
|
||||
public:
|
||||
|
|
3
src/mem/cache/cache_impl.hh
vendored
3
src/mem/cache/cache_impl.hh
vendored
|
@ -50,7 +50,6 @@
|
|||
* Cache definitions.
|
||||
*/
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/range.hh"
|
||||
#include "base/types.hh"
|
||||
|
@ -349,7 +348,7 @@ Cache<TagStore>::access(PacketPtr pkt, BlkType *&blk,
|
|||
}
|
||||
|
||||
|
||||
class ForwardResponseRecord : public Packet::SenderState, public FastAlloc
|
||||
class ForwardResponseRecord : public Packet::SenderState
|
||||
{
|
||||
Packet::SenderState *prevSenderState;
|
||||
PortID prevSrc;
|
||||
|
|
|
@ -58,7 +58,6 @@
|
|||
|
||||
#include "base/cast.hh"
|
||||
#include "base/compiler.hh"
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/flags.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/printable.hh"
|
||||
|
@ -227,7 +226,7 @@ class MemCmd
|
|||
* ultimate destination and back, possibly being conveyed by several
|
||||
* different Packets along the way.)
|
||||
*/
|
||||
class Packet : public FastAlloc, public Printable
|
||||
class Packet : public Printable
|
||||
{
|
||||
public:
|
||||
typedef uint32_t FlagsType;
|
||||
|
@ -358,7 +357,7 @@ class Packet : public FastAlloc, public Printable
|
|||
* Object used to maintain state of a PrintReq. The senderState
|
||||
* field of a PrintReq should always be of this type.
|
||||
*/
|
||||
class PrintReqState : public SenderState, public FastAlloc
|
||||
class PrintReqState : public SenderState
|
||||
{
|
||||
private:
|
||||
/**
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
#include <cassert>
|
||||
#include <climits>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/flags.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/types.hh"
|
||||
|
@ -53,7 +52,7 @@ class Request;
|
|||
typedef Request* RequestPtr;
|
||||
typedef uint16_t MasterID;
|
||||
|
||||
class Request : public FastAlloc
|
||||
class Request
|
||||
{
|
||||
public:
|
||||
typedef uint32_t FlagsType;
|
||||
|
@ -229,7 +228,7 @@ class Request : public FastAlloc
|
|||
setThreadContext(cid, tid);
|
||||
}
|
||||
|
||||
~Request() {} // for FastAlloc
|
||||
~Request() {}
|
||||
|
||||
/**
|
||||
* Set up CPU and thread numbers.
|
||||
|
|
|
@ -73,7 +73,6 @@
|
|||
%include <std_string.i>
|
||||
%include <stdint.i>
|
||||
|
||||
%import "base/fast_alloc.hh"
|
||||
%import "sim/serialize.hh"
|
||||
|
||||
%include "base/types.hh"
|
||||
|
|
|
@ -42,7 +42,6 @@
|
|||
#include <iosfwd>
|
||||
#include <string>
|
||||
|
||||
#include "base/fast_alloc.hh"
|
||||
#include "base/flags.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/trace.hh"
|
||||
|
@ -61,7 +60,7 @@ extern EventQueue mainEventQueue;
|
|||
*
|
||||
* Caution, the order of members is chosen to maximize data packing.
|
||||
*/
|
||||
class Event : public Serializable, public FastAlloc
|
||||
class Event : public Serializable
|
||||
{
|
||||
friend class EventQueue;
|
||||
|
||||
|
|
Loading…
Reference in a new issue