ANDROID: kbuild: add support for clang LTO
This change adds the configuration option CONFIG_LTO_CLANG, and build system support for clang's Link Time Optimization (LTO). In preparation for LTO support for other compilers, potentially common parts of the changes are gated behind CONFIG_LTO instead. With -flto, instead of object files, clang produces LLVM bitcode, which is compiled into a native object at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html While the kernel normally uses GNU ld for linking, LLVM supports LTO only with lld or GNU gold linkers. This patch set assumes lld will be used. Bug: 62093296 Bug: 67506682 Bug: 133186739 Change-Id: Ibcd9fc7ec501b4f30b43b4877897615645f8655f Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
This commit is contained in:
parent
319e1fdbd7
commit
6b12b774bb
7 changed files with 246 additions and 22 deletions
29
Makefile
29
Makefile
|
@ -608,6 +608,16 @@ ifdef CONFIG_FUNCTION_TRACER
|
|||
CC_FLAGS_FTRACE := -pg
|
||||
endif
|
||||
|
||||
# Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure
|
||||
# ar/cc/ld-* macros return correct values.
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# use llvm-ar for building symbol tables from IR files, and llvm-nm instead
|
||||
# of objdump for processing symbol versions and exports
|
||||
LLVM_AR := llvm-ar
|
||||
LLVM_NM := llvm-nm
|
||||
export LLVM_AR LLVM_NM
|
||||
endif
|
||||
|
||||
# The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default
|
||||
# values of the respective KBUILD_* variables
|
||||
ARCH_CPPFLAGS :=
|
||||
|
@ -795,6 +805,22 @@ KBUILD_CFLAGS_KERNEL += -ffunction-sections -fdata-sections
|
|||
LDFLAGS_vmlinux += --gc-sections
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
lto-clang-flags := -flto -fvisibility=hidden
|
||||
|
||||
# allow disabling only clang LTO where needed
|
||||
DISABLE_LTO_CLANG := -fno-lto -fvisibility=default
|
||||
export DISABLE_LTO_CLANG
|
||||
endif
|
||||
|
||||
ifdef CONFIG_LTO
|
||||
LTO_CFLAGS := $(lto-clang-flags)
|
||||
KBUILD_CFLAGS += $(LTO_CFLAGS)
|
||||
|
||||
DISABLE_LTO := $(DISABLE_LTO_CLANG)
|
||||
export LTO_CFLAGS DISABLE_LTO
|
||||
endif
|
||||
|
||||
# arch Makefile may override CC so keep this after arch Makefile is included
|
||||
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
|
||||
|
||||
|
@ -1583,7 +1609,8 @@ clean: $(clean-dirs)
|
|||
-o -name modules.builtin -o -name '.tmp_*.o.*' \
|
||||
-o -name '*.c.[012]*.*' \
|
||||
-o -name '*.ll' \
|
||||
-o -name '*.gcno' \) -type f -print | xargs rm -f
|
||||
-o -name '*.gcno' \
|
||||
-o -name '*.*.symversions' \) -type f -print | xargs rm -f
|
||||
|
||||
# Generate tags for editors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
38
arch/Kconfig
38
arch/Kconfig
|
@ -474,6 +474,44 @@ config STACKPROTECTOR_STRONG
|
|||
about 20% of all kernel functions, which increases the kernel code
|
||||
size by about 2%.
|
||||
|
||||
config LTO
|
||||
def_bool n
|
||||
|
||||
config ARCH_SUPPORTS_LTO_CLANG
|
||||
bool
|
||||
help
|
||||
An architecture should select this option if it supports:
|
||||
- compiling with clang,
|
||||
- compiling inline assembly with clang's integrated assembler,
|
||||
- and linking with LLD.
|
||||
|
||||
choice
|
||||
prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)"
|
||||
default LTO_NONE
|
||||
help
|
||||
This option turns on Link-Time Optimization (LTO).
|
||||
|
||||
config LTO_NONE
|
||||
bool "None"
|
||||
|
||||
config LTO_CLANG
|
||||
bool "Use clang Link Time Optimization (LTO) (EXPERIMENTAL)"
|
||||
depends on ARCH_SUPPORTS_LTO_CLANG
|
||||
depends on !FTRACE_MCOUNT_RECORD
|
||||
depends on CC_IS_CLANG && LD_IS_LLD
|
||||
select LTO
|
||||
help
|
||||
This option enables clang's Link Time Optimization (LTO), which allows
|
||||
the compiler to optimize the kernel globally at link time. If you
|
||||
enable this option, the compiler generates LLVM IR instead of object
|
||||
files, and the actual compilation from IR occurs at the LTO link step,
|
||||
which may take several minutes.
|
||||
|
||||
If you select this option, you must compile the kernel with clang and
|
||||
LLD.
|
||||
|
||||
endchoice
|
||||
|
||||
config HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
bool
|
||||
help
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
* RODATA_MAIN is not used because existing code already defines .rodata.x
|
||||
* sections to be brought in with rodata.
|
||||
*/
|
||||
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
|
||||
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
|
||||
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
|
||||
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX*
|
||||
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
|
||||
|
|
|
@ -185,6 +185,23 @@ else
|
|||
|
||||
cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# Generate .o.symversions files for each .o with exported symbols, and link these
|
||||
# to the kernel and/or modules at the end.
|
||||
cmd_modversions_c = \
|
||||
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) >/dev/null 2>/dev/null; then \
|
||||
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
|
||||
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
|
||||
> $(@D)/$(@F).symversions; \
|
||||
fi; \
|
||||
else \
|
||||
if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
|
||||
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
|
||||
> $(@D)/$(@F).symversions; \
|
||||
fi; \
|
||||
fi; \
|
||||
mv -f $(@D)/.tmp_$(@F) $@;
|
||||
else
|
||||
cmd_modversions_c = \
|
||||
if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \
|
||||
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
|
||||
|
@ -197,6 +214,7 @@ cmd_modversions_c = \
|
|||
mv -f $(@D)/.tmp_$(@F) $@; \
|
||||
fi;
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef CONFIG_FTRACE_MCOUNT_RECORD
|
||||
ifndef CC_USING_RECORD_MCOUNT
|
||||
|
@ -429,6 +447,26 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
|
|||
# To build objects in subdirs, we need to descend into the directories
|
||||
$(sort $(subdir-obj-y)): $(subdir-ym) ;
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
ifdef CONFIG_MODVERSIONS
|
||||
# combine symversions for later processing
|
||||
update_lto_symversions = \
|
||||
rm -f $@.symversions; \
|
||||
for i in $(filter-out FORCE,$^); do \
|
||||
if [ -f $$i.symversions ]; then \
|
||||
cat $$i.symversions \
|
||||
>> $@.symversions; \
|
||||
fi; \
|
||||
done;
|
||||
endif
|
||||
# rebuild the symbol table with llvm-ar to include IR files
|
||||
update_lto_symtable = ; \
|
||||
mv -f $@ $@.tmp; \
|
||||
$(LLVM_AR) rcsT$(KBUILD_ARFLAGS) $@ \
|
||||
$$($(AR) t $@.tmp); \
|
||||
rm -f $@.tmp
|
||||
endif
|
||||
|
||||
#
|
||||
# Rule to compile a set of .o files into one .o file
|
||||
#
|
||||
|
@ -439,7 +477,8 @@ ifdef builtin-target
|
|||
# scripts/link-vmlinux.sh builds an aggregate built-in.a with a symbol
|
||||
# table and index.
|
||||
quiet_cmd_ar_builtin = AR $@
|
||||
cmd_ar_builtin = rm -f $@; \
|
||||
cmd_ar_builtin = $(update_lto_symversions) \
|
||||
rm -f $@; \
|
||||
$(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(filter $(real-obj-y), $^)
|
||||
|
||||
$(builtin-target): $(real-obj-y) FORCE
|
||||
|
@ -468,7 +507,11 @@ ifdef lib-target
|
|||
quiet_cmd_link_l_target = AR $@
|
||||
|
||||
# lib target archives do get a symbol table and index
|
||||
cmd_link_l_target = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y)
|
||||
cmd_link_l_target = \
|
||||
$(update_lto_symversions) \
|
||||
rm -f $@; \
|
||||
$(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(lib-y) \
|
||||
$(update_lto_symtable)
|
||||
|
||||
$(lib-target): $(lib-y) FORCE
|
||||
$(call if_changed,link_l_target)
|
||||
|
@ -479,13 +522,34 @@ dummy-object = $(obj)/.lib_exports.o
|
|||
ksyms-lds = $(dot-target).lds
|
||||
|
||||
quiet_cmd_export_list = EXPORTS $@
|
||||
cmd_export_list = $(OBJDUMP) -h $< | \
|
||||
sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
|
||||
rm -f $(dummy-object);\
|
||||
filter_export_list = sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p'
|
||||
link_export_list = rm -f $(dummy-object); \
|
||||
echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
|
||||
$(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
|
||||
rm $(dummy-object) $(ksyms-lds)
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# objdump doesn't understand IR files and llvm-nm doesn't support archives,
|
||||
# so we'll walk through each file in the archive separately
|
||||
cmd_export_list = \
|
||||
rm -f $(ksyms-lds); \
|
||||
for o in $$($(AR) t $<); do \
|
||||
if $(OBJDUMP) -h $$o >/dev/null 2>/dev/null; then \
|
||||
$(OBJDUMP) -h $$o | \
|
||||
$(filter_export_list) \
|
||||
>>$(ksyms-lds); \
|
||||
else \
|
||||
$(LLVM_NM) $$o | \
|
||||
$(filter_export_list) \
|
||||
>>$(ksyms-lds); \
|
||||
fi; \
|
||||
done; \
|
||||
$(link_export_list)
|
||||
else
|
||||
cmd_export_list = $(OBJDUMP) -h $< | $(filter_export_list) >$(ksyms-lds); \
|
||||
$(link_export_list)
|
||||
endif
|
||||
|
||||
$(obj)/lib-ksyms.o: $(lib-target) FORCE
|
||||
$(call if_changed,export_list)
|
||||
|
||||
|
@ -509,13 +573,31 @@ $($(subst $(obj)/,,$(@:.o=-objs))) \
|
|||
$($(subst $(obj)/,,$(@:.o=-y))) \
|
||||
$($(subst $(obj)/,,$(@:.o=-m)))), $^)
|
||||
|
||||
cmd_link_multi-link = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
|
||||
|
||||
quiet_cmd_link_multi-y = AR $@
|
||||
cmd_link_multi-y = $(update_lto_symversions) \
|
||||
rm -f $@; $(AR) rcSTP$(KBUILD_ARFLAGS) $@ $(link_multi_deps) \
|
||||
$(update_lto_symtable)
|
||||
|
||||
quiet_cmd_link_multi-m = LD [M] $@
|
||||
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalysis)
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
# don't compile IR until needed
|
||||
cmd_link_multi-m = $(cmd_link_multi-y)
|
||||
else
|
||||
cmd_link_multi-m = $(cmd_link_multi-link)
|
||||
endif
|
||||
|
||||
$(multi-used-y): FORCE
|
||||
$(call if_changed,link_multi-y)
|
||||
|
||||
$(multi-used-m): FORCE
|
||||
$(call if_changed,link_multi-m)
|
||||
@{ echo $(@:.o=.ko); echo $(link_multi_deps); \
|
||||
$(cmd_undef_syms); } > $(MODVERDIR)/$(@F:.o=.mod)
|
||||
|
||||
$(call multi_depend, $(multi-used-y), .o, -objs -y)
|
||||
$(call multi_depend, $(multi-used-m), .o, -objs -y -m)
|
||||
|
||||
targets += $(multi-used-m)
|
||||
|
|
|
@ -83,12 +83,28 @@ modpost = scripts/mod/modpost \
|
|||
|
||||
MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS)))
|
||||
|
||||
# If CONFIG_LTO_CLANG is enabled, .o files are either LLVM IR, or empty, so we
|
||||
# need to link them into actual objects before passing them to modpost
|
||||
modpost-ext = $(if $(CONFIG_LTO_CLANG),.lto,)
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
quiet_cmd_cc_lto_link_modules = LD [M] $@
|
||||
cmd_cc_lto_link_modules = \
|
||||
$(LD) $(ld_flags) -r -o $(@) \
|
||||
$(shell [ -s $(@:$(modpost-ext).o=.o.symversions) ] && \
|
||||
echo -T $(@:$(modpost-ext).o=.o.symversions)) \
|
||||
--whole-archive $(filter-out FORCE,$^)
|
||||
|
||||
$(modules:.ko=$(modpost-ext).o): %$(modpost-ext).o: %.o FORCE
|
||||
$(call if_changed,cc_lto_link_modules)
|
||||
endif
|
||||
|
||||
# We can go over command line length here, so be careful.
|
||||
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
|
||||
cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/.o/' | $(modpost) $(MODPOST_OPT) -s -T -
|
||||
cmd_modpost = $(MODLISTCMD) | sed 's/\.ko$$/$(modpost-ext)\.o/' | $(modpost) $(MODPOST_OPT) -s -T -
|
||||
|
||||
PHONY += __modpost
|
||||
__modpost: $(modules:.ko=.o) FORCE
|
||||
__modpost: $(modules:.ko=$(modpost-ext).o) FORCE
|
||||
$(call cmd,modpost) $(wildcard vmlinux)
|
||||
|
||||
quiet_cmd_kernel-mod = MODPOST $@
|
||||
|
@ -100,7 +116,6 @@ vmlinux.o: FORCE
|
|||
# Declare generated files as targets for modpost
|
||||
$(modules:.ko=.mod.c): __modpost ;
|
||||
|
||||
|
||||
# Step 5), compile all *.mod.c files
|
||||
|
||||
# modname is set to make c_flags define KBUILD_MODNAME
|
||||
|
@ -119,13 +134,24 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
|
|||
|
||||
# Step 6), final link of the modules with optional arch pass after final link
|
||||
quiet_cmd_ld_ko_o = LD [M] $@
|
||||
|
||||
ifdef CONFIG_LTO_CLANG
|
||||
cmd_ld_ko_o = \
|
||||
$(LD) -r $(LDFLAGS) \
|
||||
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
|
||||
$(shell [ -s $(@:.ko=.o.symversions) ] && \
|
||||
echo -T $(@:.ko=.o.symversions)) \
|
||||
-o $@ --whole-archive \
|
||||
$(filter-out FORCE,$(^:$(modpost-ext).o=.o))
|
||||
else
|
||||
cmd_ld_ko_o = \
|
||||
$(LD) -r $(KBUILD_LDFLAGS) \
|
||||
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
|
||||
-o $@ $(filter-out FORCE,$^) ; \
|
||||
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
|
||||
endif
|
||||
|
||||
$(modules): %.ko :%.o %.mod.o FORCE
|
||||
$(modules): %.ko: %$(modpost-ext).o %.mod.o FORCE
|
||||
+$(call if_changed,ld_ko_o)
|
||||
|
||||
targets += $(modules)
|
||||
|
|
|
@ -60,6 +60,38 @@ archive_builtin()
|
|||
${AR} rcsTP${KBUILD_ARFLAGS} built-in.a \
|
||||
${KBUILD_VMLINUX_INIT} \
|
||||
${KBUILD_VMLINUX_MAIN}
|
||||
|
||||
# rebuild with llvm-ar to update the symbol table
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
mv -f built-in.a built-in.a.tmp
|
||||
${LLVM_AR} rcsT${KBUILD_ARFLAGS} built-in.a $(${AR} t built-in.a.tmp)
|
||||
rm -f built-in.a.tmp
|
||||
fi
|
||||
}
|
||||
|
||||
# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
|
||||
# .tmp_symversions
|
||||
modversions()
|
||||
{
|
||||
if [ -z "${CONFIG_LTO_CLANG}" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
if [ -z "${CONFIG_MODVERSIONS}" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
rm -f .tmp_symversions
|
||||
|
||||
for a in built-in.a ${KBUILD_VMLINUX_LIBS}; do
|
||||
for o in $(${AR} t $a); do
|
||||
if [ -f ${o}.symversions ]; then
|
||||
cat ${o}.symversions >> .tmp_symversions
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "-T .tmp_symversions"
|
||||
}
|
||||
|
||||
# Link of vmlinux.o used for section mismatch analysis
|
||||
|
@ -75,7 +107,13 @@ modpost_link()
|
|||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group"
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
|
||||
if [ -n "${CONFIG_LTO_CLANG}" ]; then
|
||||
# This might take a while, so indicate that we're doing
|
||||
# an LTO link
|
||||
info LTO vmlinux.o
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} -r -o ${1} $(modversions) ${objects}
|
||||
}
|
||||
|
||||
# Link of vmlinux
|
||||
|
@ -87,13 +125,20 @@ vmlinux_link()
|
|||
local objects
|
||||
|
||||
if [ "${SRCARCH}" != "um" ]; then
|
||||
objects="--whole-archive \
|
||||
built-in.a \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${1}"
|
||||
if [ -z "${CONFIG_LTO_CLANG}" ]; then
|
||||
objects="--whole-archive \
|
||||
built-in.a \
|
||||
--no-whole-archive \
|
||||
--start-group \
|
||||
${KBUILD_VMLINUX_LIBS} \
|
||||
--end-group \
|
||||
${1}"
|
||||
else
|
||||
objects="--start-group \
|
||||
vmlinux.o \
|
||||
--end-group \
|
||||
${1}"
|
||||
fi
|
||||
|
||||
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \
|
||||
-T ${lds} ${objects}
|
||||
|
@ -114,7 +159,6 @@ vmlinux_link()
|
|||
fi
|
||||
}
|
||||
|
||||
|
||||
# Create ${2} .o file with all symbols from the ${1} object file
|
||||
kallsyms()
|
||||
{
|
||||
|
@ -159,6 +203,7 @@ cleanup()
|
|||
{
|
||||
rm -f .tmp_System.map
|
||||
rm -f .tmp_kallsyms*
|
||||
rm -f .tmp_symversions
|
||||
rm -f .tmp_vmlinux*
|
||||
rm -f built-in.a
|
||||
rm -f System.map
|
||||
|
@ -220,7 +265,6 @@ ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init
|
|||
archive_builtin
|
||||
|
||||
#link vmlinux.o
|
||||
info LD vmlinux.o
|
||||
modpost_link vmlinux.o
|
||||
|
||||
# modpost vmlinux.o to check for section mismatches
|
||||
|
|
|
@ -145,6 +145,9 @@ static struct module *new_module(const char *modname)
|
|||
p[strlen(p) - 2] = '\0';
|
||||
mod->is_dot_o = 1;
|
||||
}
|
||||
/* strip trailing .lto */
|
||||
if (strends(p, ".lto"))
|
||||
p[strlen(p) - 4] = '\0';
|
||||
|
||||
/* add to list */
|
||||
mod->name = p;
|
||||
|
@ -1927,6 +1930,10 @@ static char *remove_dot(char *s)
|
|||
size_t m = strspn(s + n + 1, "0123456789");
|
||||
if (m && (s[n + m] == '.' || s[n + m] == 0))
|
||||
s[n] = 0;
|
||||
|
||||
/* strip trailing .lto */
|
||||
if (strends(s, ".lto"))
|
||||
s[strlen(s) - 4] = '\0';
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue