gcc: Enable Link-Time Optimization for gcc >= 4.6
This patch adds Link-Time Optimization when building the fast target using gcc >= 4.6, and adds a scons flag to disable it (-no-lto). No check is performed to guarantee that the linker supports LTO and use of the linker plugin, so the user has to ensure that binutils GNU ld >= 2.21 or the gold linker is available. Typically, if gcc >= 4.6 is available, the latter should not be a problem. Currently the LTO option is only useful for gcc >= 4.6, due to the limited support on clang and earlier versions of gcc. The intention is to also add support for clang once the LTO integration matures. The same number of jobs is used for the parallel phase of LTO as the jobs specified on the scons command line, using the -flto=n flag that was introduced with gcc 4.6. The gold linker also supports concurrent and incremental linking, but this is not used at this point. The compilation and linking time is increased by almost 50% on average, although ARM seems to be particularly demanding with an increase of almost 100%. Also beware when using this as gcc uses a tremendous amount of memory and temp space in the process. You have been warned. After some careful consideration, and plenty discussions, the flag is only added to the fast target, and the warning that was issued in an earlier version of this patch is now removed. Similarly, the flag used to enable LTO, now the default is to use it, and the flag has been modified to disable LTO. The rationale behind this decision is that opt is used for development, whereas fast is only used for long runs, e.g. regressions or more elaborate experiments where the additional compile and link time is amortized by a much larger run time. When it comes to the return on investment, the regression seems to be roughly 15% faster with LTO. For a bit more detail, I ran twolf on ARM.fast, with three repeated runs, and they all finish within 42 minutes (+- 25 seconds) without LTO and 31 minutes (+- 25 seconds) with LTO, i.e. LTO gives an impressive >25% speed-up for this case. Without LTO (ARM.fast twolf) real 42m37.632s user 42m34.448s sys 0m0.390s real 41m51.793s user 41m50.384s sys 0m0.131s real 41m45.491s user 41m39.791s sys 0m0.139s With LTO (ARM.fast twolf) real 30m33.588s user 30m5.701s sys 0m0.141s real 31m27.791s user 31m24.674s sys 0m0.111s real 31m25.500s user 31m16.731s sys 0m0.106s
This commit is contained in:
parent
a57eda0843
commit
d1f3a3b91a
2 changed files with 36 additions and 1 deletions
24
SConstruct
24
SConstruct
|
@ -165,6 +165,8 @@ AddLocalOption('--default', dest='default', type='string', action='store',
|
||||||
help='Override which build_opts file to use for defaults')
|
help='Override which build_opts file to use for defaults')
|
||||||
AddLocalOption('--ignore-style', dest='ignore_style', action='store_true',
|
AddLocalOption('--ignore-style', dest='ignore_style', action='store_true',
|
||||||
help='Disable style checking hooks')
|
help='Disable style checking hooks')
|
||||||
|
AddLocalOption('--no-lto', dest='no_lto', action='store_true',
|
||||||
|
help='Disable Link-Time Optimization for fast')
|
||||||
AddLocalOption('--update-ref', dest='update_ref', action='store_true',
|
AddLocalOption('--update-ref', dest='update_ref', action='store_true',
|
||||||
help='Update test reference outputs')
|
help='Update test reference outputs')
|
||||||
AddLocalOption('--verbose', dest='verbose', action='store_true',
|
AddLocalOption('--verbose', dest='verbose', action='store_true',
|
||||||
|
@ -477,6 +479,10 @@ else:
|
||||||
main['SHCXXCOMSTR'] = Transform("SHCXX")
|
main['SHCXXCOMSTR'] = Transform("SHCXX")
|
||||||
Export('MakeAction')
|
Export('MakeAction')
|
||||||
|
|
||||||
|
# Initialize the Link-Time Optimization (LTO) flags
|
||||||
|
main['LTO_CCFLAGS'] = []
|
||||||
|
main['LTO_LDFLAGS'] = []
|
||||||
|
|
||||||
CXX_version = readCommand([main['CXX'],'--version'], exception=False)
|
CXX_version = readCommand([main['CXX'],'--version'], exception=False)
|
||||||
CXX_V = readCommand([main['CXX'],'-V'], exception=False)
|
CXX_V = readCommand([main['CXX'],'-V'], exception=False)
|
||||||
|
|
||||||
|
@ -506,6 +512,24 @@ if main['GCC']:
|
||||||
# http://gcc.gnu.org/projects/cxx0x.html for details
|
# http://gcc.gnu.org/projects/cxx0x.html for details
|
||||||
if compareVersions(gcc_version, '4.4') >= 0:
|
if compareVersions(gcc_version, '4.4') >= 0:
|
||||||
main.Append(CXXFLAGS=['-std=c++0x'])
|
main.Append(CXXFLAGS=['-std=c++0x'])
|
||||||
|
|
||||||
|
# LTO support is only really working properly from 4.6 and beyond
|
||||||
|
if compareVersions(gcc_version, '4.6') >= 0:
|
||||||
|
# Add the appropriate Link-Time Optimization (LTO) flags
|
||||||
|
# unless LTO is explicitly turned off. Note that these flags
|
||||||
|
# are only used by the fast target.
|
||||||
|
if not GetOption('no_lto'):
|
||||||
|
# Pass the LTO flag when compiling to produce GIMPLE
|
||||||
|
# output, we merely create the flags here and only append
|
||||||
|
# them later/
|
||||||
|
main['LTO_CCFLAGS'] = ['-flto=%d' % GetOption('num_jobs')]
|
||||||
|
|
||||||
|
# Use the same amount of jobs for LTO as we are running
|
||||||
|
# scons with, we hardcode the use of the linker plugin
|
||||||
|
# which requires either gold or GNU ld >= 2.21
|
||||||
|
main['LTO_LDFLAGS'] = ['-flto=%d' % GetOption('num_jobs'),
|
||||||
|
'-fuse-linker-plugin']
|
||||||
|
|
||||||
elif main['ICC']:
|
elif main['ICC']:
|
||||||
pass #Fix me... add warning flags once we clean up icc warnings
|
pass #Fix me... add warning flags once we clean up icc warnings
|
||||||
elif main['SUNCC']:
|
elif main['SUNCC']:
|
||||||
|
|
|
@ -943,15 +943,26 @@ ccflags = {'debug' : [], 'opt' : ['-g'], 'fast' : [], 'prof' : ['-g', '-pg'],
|
||||||
ldflags = {'debug' : [], 'opt' : [], 'fast' : [], 'prof' : ['-pg'],
|
ldflags = {'debug' : [], 'opt' : [], 'fast' : [], 'prof' : ['-pg'],
|
||||||
'perf' : ['-Wl,--no-as-needed', '-lprofiler', '-Wl,--as-needed']}
|
'perf' : ['-Wl,--no-as-needed', '-lprofiler', '-Wl,--as-needed']}
|
||||||
|
|
||||||
|
# For Link Time Optimization, the optimisation flags used to compile
|
||||||
|
# individual files are decoupled from those used at link time
|
||||||
|
# (i.e. you can compile with -O3 and perform LTO with -O0), so we need
|
||||||
|
# to also update the linker flags based on the target.
|
||||||
if env['GCC']:
|
if env['GCC']:
|
||||||
if sys.platform == 'sunos5':
|
if sys.platform == 'sunos5':
|
||||||
ccflags['debug'] += ['-gstabs+']
|
ccflags['debug'] += ['-gstabs+']
|
||||||
else:
|
else:
|
||||||
ccflags['debug'] += ['-ggdb3']
|
ccflags['debug'] += ['-ggdb3']
|
||||||
ldflags['debug'] += ['-O0']
|
ldflags['debug'] += ['-O0']
|
||||||
# opt, fast, prof and perf all share the same cc flags
|
# opt, fast, prof and perf all share the same cc flags, also add
|
||||||
|
# the optimization to the ldflags as LTO defers the optimization
|
||||||
|
# to link time
|
||||||
for target in ['opt', 'fast', 'prof', 'perf']:
|
for target in ['opt', 'fast', 'prof', 'perf']:
|
||||||
ccflags[target] += ['-O3']
|
ccflags[target] += ['-O3']
|
||||||
|
ldflags[target] += ['-O3']
|
||||||
|
|
||||||
|
ccflags['fast'] += env['LTO_CCFLAGS']
|
||||||
|
ldflags['fast'] += env['LTO_LDFLAGS']
|
||||||
|
|
||||||
elif env['SUNCC']:
|
elif env['SUNCC']:
|
||||||
ccflags['debug'] += ['-g0']
|
ccflags['debug'] += ['-g0']
|
||||||
ccflags['opt'] += ['-O']
|
ccflags['opt'] += ['-O']
|
||||||
|
|
Loading…
Reference in a new issue