i18n: Only include editor translations above a threshold

This reduces the size of the editor binaries significantly, as we otherwise
embed all WIP translations, including ones with very low completion ratios,
and end up paying for the size of all `msgid`s for each locale.

Cf. https://github.com/godotengine/godot-proposals/issues/3421 for details.

The thresholds used are:
- 30% for the editor interface (should already include most common strings
  while more obscure ones like UndoRedo action names might be untranslated).
- 10% for the class reference: this is a HUGE resource and 10% is already
  a lot of useful content, especially if focused on the most used APIs.

For 3.x, we also exclude languages that require complex text layout support
to be displayed properly.

This currently reduces the size of the editor binary by 17% on Linux.

The list will be synced manually every now and then.
This commit is contained in:
Rémi Verschelde 2021-10-20 13:47:50 +02:00
parent 485d9c3054
commit 8425c58991
No known key found for this signature in database
GPG key ID: C3336907360768E1
3 changed files with 62 additions and 7 deletions

View file

@ -21,3 +21,26 @@ merge:
check:
@for po in $(POFILES); do msgfmt -c $$po -o /dev/null; done
# Generate completion ratio from statistics string such as:
# 2775 translated messages, 272 fuzzy translations, 151 untranslated messages.
# First number can be 0, second and third numbers are only present if non-zero.
include-list:
@list=""; \
threshold=0.10; \
exclude_ctl="ar bn fa he hi ml si ta te ur"; \
for po in $(POFILES); do \
lang=`basename $$po .po`; \
if `grep -q $$lang <<< $$exclude_ctl`; then continue; fi; \
res=`msgfmt --statistics $$po -o /dev/null 2>&1 | sed 's/[^0-9,]*//g'`; \
complete=`cut -d',' -f1 <<< $$res`; \
fuzzy_or_untranslated=`cut -d',' -f2 <<< $$res`; \
untranslated_maybe=`cut -d',' -f3 <<< $$res`; \
if [ -z "$$fuzzy_or_untranslated" ]; then fuzzy_or_untranslated=0; fi; \
if [ -z "$$untranslated_maybe" ]; then untranslated_maybe=0; fi; \
incomplete=`expr $$fuzzy_or_untranslated + $$untranslated_maybe`; \
if `awk "BEGIN {exit !($$complete / ($$complete + $$incomplete) > $$threshold)}"`; then \
list+="$$lang,"; \
fi; \
done; \
echo $$list;

View file

@ -5,7 +5,6 @@ Import("env")
env.editor_sources = []
import os
import os.path
import glob
from platform_methods import run_in_subprocess
from compat import open_utf8
@ -61,31 +60,41 @@ if env["tools"]:
else:
docs += Glob(d + "/*.xml") # Custom.
_make_doc_data_class_path(os.path.join(env.Dir("#").abspath, "editor/doc"))
_make_doc_data_class_path(env.Dir("#editor/doc").abspath)
docs = sorted(docs)
env.Depends("#editor/doc_data_compressed.gen.h", docs)
env.CommandNoCache("#editor/doc_data_compressed.gen.h", docs, run_in_subprocess(editor_builders.make_doc_header))
path = env.Dir(".").abspath
# Editor interface and class reference translations incur a significant size
# cost for the editor binary (see godot-proposals#3421).
# To limit it, we only include translations with a high enough completion
# ratio (30% for the editor UI, 10% for the class reference).
# Generated with `make include-list` for each resource.
# Note: In 3.x, we also exclude languages that depend on complex text
# layouts to be displayed properly: ar,bn,fa,he,hi,ml,si,ta,te,ur.
# Editor translations
tlist = glob.glob(path + "/translations/*.po")
to_include = (
"bg,ca,cs,de,el,eo,es_AR,es,fi,fr,gl,hu,id,it,ja,ko,ms,nb,nl,pl,pt_BR,pt,ro,ru,sk,sv,th,tr,uk,vi,zh_CN,zh_TW"
).split(",")
tlist = [env.Dir("#editor/translations").abspath + "/" + f + ".po" for f in to_include]
env.Depends("#editor/editor_translations.gen.h", tlist)
env.CommandNoCache(
"#editor/editor_translations.gen.h", tlist, run_in_subprocess(editor_builders.make_editor_translations_header)
)
# Documentation translations
tlist = glob.glob(env.Dir("#doc").abspath + "/translations/*.po")
to_include = "es,fr,ja,zh_CN".split(",")
tlist = [env.Dir("#doc/translations").abspath + "/" + f + ".po" for f in to_include]
env.Depends("#editor/doc_translations.gen.h", tlist)
env.CommandNoCache(
"#editor/doc_translations.gen.h", tlist, run_in_subprocess(editor_builders.make_doc_translations_header)
)
# Fonts
flist = glob.glob(path + "/../thirdparty/fonts/*.ttf")
flist.extend(glob.glob(path + "/../thirdparty/fonts/*.otf"))
flist = glob.glob(env.Dir("#thirdparty").abspath + "/fonts/*.ttf")
flist.extend(glob.glob(env.Dir("#thirdparty").abspath + "/fonts/*.otf"))
flist.sort()
env.Depends("#editor/builtin_fonts.gen.h", flist)
env.CommandNoCache("#editor/builtin_fonts.gen.h", flist, run_in_subprocess(editor_builders.make_fonts_header))

View file

@ -18,3 +18,26 @@ merge:
check:
@for po in $(POFILES); do msgfmt -c $$po -o /dev/null; done
# Generate completion ratio from statistics string such as:
# 2775 translated messages, 272 fuzzy translations, 151 untranslated messages.
# First number can be 0, second and third numbers are only present if non-zero.
include-list:
@list=""; \
threshold=0.30; \
exclude_ctl="ar bn fa he hi ml si ta te ur"; \
for po in $(POFILES); do \
lang=`basename $$po .po`; \
if `grep -q $$lang <<< $$exclude_ctl`; then continue; fi; \
res=`msgfmt --statistics $$po -o /dev/null 2>&1 | sed 's/[^0-9,]*//g'`; \
complete=`cut -d',' -f1 <<< $$res`; \
fuzzy_or_untranslated=`cut -d',' -f2 <<< $$res`; \
untranslated_maybe=`cut -d',' -f3 <<< $$res`; \
if [ -z "$$fuzzy_or_untranslated" ]; then fuzzy_or_untranslated=0; fi; \
if [ -z "$$untranslated_maybe" ]; then untranslated_maybe=0; fi; \
incomplete=`expr $$fuzzy_or_untranslated + $$untranslated_maybe`; \
if `awk "BEGIN {exit !($$complete / ($$complete + $$incomplete) > $$threshold)}"`; then \
list+="$$lang,"; \
fi; \
done; \
echo $$list;