ANDROID: kbuild: add support for clang LTO

This change adds the configuration option CONFIG_LTO_CLANG, and
build system support for clang's Link Time Optimization (LTO). In
preparation for LTO support for other compilers, potentially common
parts of the changes are gated behind CONFIG_LTO instead.

With -flto, instead of object files, clang produces LLVM bitcode,
which is compiled into a native object at link time, allowing the
final binary to be optimized globally. For more details, see:

  https://llvm.org/docs/LinkTimeOptimization.html

While the kernel normally uses GNU ld for linking, LLVM supports LTO
only with lld or GNU gold linkers. This patch set assumes lld will
be used.

Bug: 62093296
Bug: 67506682
Bug: 133186739
Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
This commit is contained in:
Sami Tolvanen 2017-11-28 08:48:49 -08:00 committed by Alistair Strachan
parent 319e1fdbd7
commit 6b12b774bb
7 changed files with 246 additions and 22 deletions

View file

@ -608,6 +608,16 @@ ifdef CONFIG_FUNCTION_TRACER
CC_FLAGS_FTRACE := -pg
endif
# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
# ar/cc/ld-* macros return correct values.
ifdef CONFIG_LTO_CLANG
# use llvm-ar for building symbol tables from IR files, and llvm-nm instead
# of objdump for processing symbol versions and exports
LLVM_AR := llvm-ar
LLVM_NM := llvm-nm
export LLVM_AR LLVM_NM
endif
# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
# values of the respective KBUILD_* variables
ARCH_CPPFLAGS :=
@ -795,6 +805,22 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections
LDFLAGS_vmlinux += --gc-sections
endif
ifdef CONFIG_LTO_CLANG
lto-clang-flags := -flto -fvisibility=hidden
# allow disabling only clang LTO where needed
DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
export DISABLE_LTO_CLANG
endif
ifdef CONFIG_LTO
LTO_CFLAGS := $(lto-clang-flags)
KBUILD_CFLAGS += $(LTO_CFLAGS)
DISABLE_LTO := $(DISABLE_LTO_CLANG)
export LTO_CFLAGS DISABLE_LTO
endif
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
@ -1583,7 +1609,8 @@ clean: $(clean-dirs)
-o -name modules.builtin -o -name '.tmp_*.o.*' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
-o -name '*.gcno' \) -type f -print | xargs rm -f
-o -name '*.gcno' \
-o -name '*.*.symversions' \) -type f -print | xargs rm -f
# Generate tags for editors
# ---------------------------------------------------------------------------

View file

@ -474,6 +474,44 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.
config LTO
def_bool n
config ARCH_SUPPORTS_LTO_CLANG
bool
help
An architecture should select this option if it supports:
- compiling with clang,
- compiling inline assembly with clang's integrated assembler,
- and linking with LLD.
choice
prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
default LTO_NONE
help
This option turns on Link-Time Optimization (LTO).
config LTO_NONE
bool "None"
config LTO_CLANG
bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
depends on ARCH_SUPPORTS_LTO_CLANG
depends on !FTRACE_MCOUNT_RECORD
depends on CC_IS_CLANG && LD_IS_LLD
select LTO
help
This option enables clang's Link Time Optimization (LTO), which allows
the compiler to optimize the kernel globally at link time. If you
enable this option, the compiler generates LLVM IR instead of object
files, and the actual compilation from IR occurs at the LTO link step,
which may take several minutes.
If you select this option, you must compile the kernel with clang and
LLD.
endchoice
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help

View file

@ -66,7 +66,7 @@
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*

View file

@ -185,6 +185,23 @@ else
cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
ifdef CONFIG_LTO_CLANG
# Generate .o.symversions files for each .o with exported symbols, and link these
# to the kernel and/or modules at the end.
cmd_modversions_c = \
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
> $(@D)/$(@F).symversions; \
fi; \
else \
if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
> $(@D)/$(@F).symversions; \
fi; \
fi; \
mv -f $(@D)/.tmp_$(@F) $@;
else
cmd_modversions_c = \
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
@ -197,6 +214,7 @@ cmd_modversions_c = \
mv -f $(@D)/.tmp_$(@F) $@; \
fi;
endif
endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifndef CC_USING_RECORD_MCOUNT
@ -429,6 +447,26 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
# To build objects in subdirs, we need to descend into the directories
$(sort $(subdir-obj-y)): $(subdir-ym) ;
ifdef CONFIG_LTO_CLANG
ifdef CONFIG_MODVERSIONS
# combine symversions for later processing
update_lto_symversions = \
rm -f $@.symversions; \
for i in $(filter-out FORCE,$^); do \
if [ -f $$i.symversions ]; then \
cat $$i.symversions \
>> $@.symversions; \
fi; \
done;
endif
# rebuild the symbol table with llvm-ar to include IR files
update_lto_symtable = ; \
mv -f $@ $@.tmp; \
$(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \
$$($(AR) t $@.tmp); \
rm -f $@.tmp
endif
#
# Rule to compile a set of .o files into one .o file
#
@ -439,7 +477,8 @@ ifdef builtin-target
# scripts/link-vmlinux.sh builds an aggregate built-in.a with a symbol
# table and index.
quiet_cmd_ar_builtin = AR $@
cmd_ar_builtin = rm -f $@; \
cmd_ar_builtin = $(update_lto_symversions) \
rm -f $@; \
$(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(filter $(real-obj-y), $^)
$(builtin-target): $(real-obj-y) FORCE
@ -468,7 +507,11 @@ ifdef lib-target
quiet_cmd_link_l_target = AR $@
# lib target archives do get a symbol table and index
cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
cmd_link_l_target = \
$(update_lto_symversions) \
rm -f $@; \
$(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \
$(update_lto_symtable)
$(lib-target): $(lib-y) FORCE
$(call if_changed,link_l_target)
@ -479,13 +522,34 @@ dummy-object = $(obj)/.lib_exports.o
ksyms-lds = $(dot-target).lds
quiet_cmd_export_list = EXPORTS $@
cmd_export_list = $(OBJDUMP) -h $< | \
sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
rm -f $(dummy-object);\
filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p'
link_export_list = rm -f $(dummy-object); \
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
$(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
rm $(dummy-object) $(ksyms-lds)
ifdef CONFIG_LTO_CLANG
# objdump doesn't understand IR files and llvm-nm doesn't support archives,
# so we'll walk through each file in the archive separately
cmd_export_list = \
rm -f $(ksyms-lds); \
for o in $$($(AR) t $<); do \
if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \
$(OBJDUMP) -h $$o | \
$(filter_export_list) \
>>$(ksyms-lds); \
else \
$(LLVM_NM) $$o | \
$(filter_export_list) \
>>$(ksyms-lds); \
fi; \
done; \
$(link_export_list)
else
cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \
$(link_export_list)
endif
$(obj)/lib-ksyms.o: $(lib-target) FORCE
$(call if_changed,export_list)
@ -509,13 +573,31 @@ $($(subst $(obj)/,,$(@:.o=-objs))) \
$($(subst $(obj)/,,$(@:.o=-y))) \
$($(subst $(obj)/,,$(@:.o=-m)))), $^)
cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
quiet_cmd_link_multi-y = AR $@
cmd_link_multi-y = $(update_lto_symversions) \
rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \
$(update_lto_symtable)
quiet_cmd_link_multi-m = LD [M] $@
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
ifdef CONFIG_LTO_CLANG
# don't compile IR until needed
cmd_link_multi-m = $(cmd_link_multi-y)
else
cmd_link_multi-m = $(cmd_link_multi-link)
endif
$(multi-used-y): FORCE
$(call if_changed,link_multi-y)
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
@{ echo $(@:.o=.ko); echo $(link_multi_deps); \
$(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
$(call multi_depend, $(multi-used-y), .o, -objs -y)
$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
targets += $(multi-used-m)

View file

@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \
MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we
# need to link them into actual objects before passing them to modpost
modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
ifdef CONFIG_LTO_CLANG
quiet_cmd_cc_lto_link_modules = LD [M] $@
cmd_cc_lto_link_modules = \
$(LD) $(ld_flags) -r -o $(@) \
$(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
echo -T $(@:$(modpost-ext).o=.o.symversions)) \
--whole-archive $(filter-out FORCE,$^)
$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
$(call if_changed,cc_lto_link_modules)
endif
# We can go over command line length here, so be careful.
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T -
PHONY += __modpost
__modpost: $(modules:.ko=.o) FORCE
__modpost: $(modules:.ko=$(modpost-ext).o) FORCE
$(call cmd,modpost) $(wildcard vmlinux)
quiet_cmd_kernel-mod = MODPOST $@
@ -100,7 +116,6 @@ vmlinux.o: FORCE
# Declare generated files as targets for modpost
$(modules:.ko=.mod.c): __modpost ;
# Step 5), compile all *.mod.c files
# modname is set to make c_flags define KBUILD_MODNAME
@ -119,13 +134,24 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
# Step 6), final link of the modules with optional arch pass after final link
quiet_cmd_ld_ko_o = LD [M] $@
ifdef CONFIG_LTO_CLANG
cmd_ld_ko_o = \
$(LD) -r $(LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
$(shell [ -s $(@:.ko=.o.symversions) ] && \
echo -T $(@:.ko=.o.symversions)) \
-o $@ --whole-archive \
$(filter-out FORCE,$(^:$(modpost-ext).o=.o))
else
cmd_ld_ko_o = \
$(LD) -r $(KBUILD_LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
-o $@ $(filter-out FORCE,$^) ; \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
endif
$(modules): %.ko :%.o %.mod.o FORCE
$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE
+$(call if_changed,ld_ko_o)
targets += $(modules)

View file

@ -60,6 +60,38 @@ archive_builtin()
${AR} rcsTP${KBUILD_ARFLAGS} built-in.a \
${KBUILD_VMLINUX_INIT} \
${KBUILD_VMLINUX_MAIN}
# rebuild with llvm-ar to update the symbol table
if [ -n "${CONFIG_LTO_CLANG}" ]; then
mv -f built-in.a built-in.a.tmp
${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.a $(${AR} t built-in.a.tmp)
rm -f built-in.a.tmp
fi
}
# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
# .tmp_symversions
modversions()
{
if [ -z "${CONFIG_LTO_CLANG}" ]; then
return
fi
if [ -z "${CONFIG_MODVERSIONS}" ]; then
return
fi
rm -f .tmp_symversions
for a in built-in.a ${KBUILD_VMLINUX_LIBS}; do
for o in $(${AR} t $a); do
if [ -f ${o}.symversions ]; then
cat ${o}.symversions >> .tmp_symversions
fi
done
done
echo "-T .tmp_symversions"
}
# Link of vmlinux.o used for section mismatch analysis
@ -75,7 +107,13 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# This might take a while, so indicate that we're doing
# an LTO link
info LTO vmlinux.o
fi
${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
}
# Link of vmlinux
@ -87,13 +125,20 @@ vmlinux_link()
local objects
if [ "${SRCARCH}" != "um" ]; then
objects="--whole-archive \
built-in.a \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${1}"
if [ -z "${CONFIG_LTO_CLANG}" ]; then
objects="--whole-archive \
built-in.a \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${1}"
else
objects="--start-group \
vmlinux.o \
--end-group \
${1}"
fi
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
-T ${lds} ${objects}
@ -114,7 +159,6 @@ vmlinux_link()
fi
}
# Create ${2} .o file with all symbols from the ${1} object file
kallsyms()
{
@ -159,6 +203,7 @@ cleanup()
{
rm -f .tmp_System.map
rm -f .tmp_kallsyms*
rm -f .tmp_symversions
rm -f .tmp_vmlinux*
rm -f built-in.a
rm -f System.map
@ -220,7 +265,6 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
archive_builtin
#link vmlinux.o
info LD vmlinux.o
modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches

View file

@ -145,6 +145,9 @@ static struct module *new_module(const char *modname)
p[strlen(p) - 2] = '\0';
mod->is_dot_o = 1;
}
/* strip trailing .lto */
if (strends(p, ".lto"))
p[strlen(p) - 4] = '\0';
/* add to list */
mod->name = p;
@ -1927,6 +1930,10 @@ static char *remove_dot(char *s)
size_t m = strspn(s + n + 1, "0123456789");
if (m && (s[n + m] == '.' || s[n + m] == 0))
s[n] = 0;
/* strip trailing .lto */
if (strends(s, ".lto"))
s[strlen(s) - 4] = '\0';
}
return s;
}