diff --git a/modules/text_server_adv/SCsub b/modules/text_server_adv/SCsub
index 3c468e61d7c..09d9b2dd67d 100644
--- a/modules/text_server_adv/SCsub
+++ b/modules/text_server_adv/SCsub
@@ -401,6 +401,8 @@ if env["builtin_icu4c"]:
"common/uloc.cpp",
"common/uloc_keytype.cpp",
"common/uloc_tag.cpp",
+ "common/ulocale.cpp",
+ "common/ulocbuilder.cpp",
"common/umapfile.cpp",
"common/umath.cpp",
"common/umutablecptrie.cpp",
@@ -466,7 +468,7 @@ if env["builtin_icu4c"]:
]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
- icu_data_name = "icudt73l.dat"
+ icu_data_name = "icudt74l.dat"
if env.editor_build:
env_icu.Depends("#thirdparty/icu4c/icudata.gen.h", "#thirdparty/icu4c/" + icu_data_name)
diff --git a/modules/text_server_adv/gdextension_build/SConstruct b/modules/text_server_adv/gdextension_build/SConstruct
index 4093842650f..f6d3b24cee4 100644
--- a/modules/text_server_adv/gdextension_build/SConstruct
+++ b/modules/text_server_adv/gdextension_build/SConstruct
@@ -623,6 +623,8 @@ thirdparty_icu_sources = [
"common/uloc.cpp",
"common/uloc_keytype.cpp",
"common/uloc_tag.cpp",
+ "common/ulocale.cpp",
+ "common/ulocbuilder.cpp",
"common/umapfile.cpp",
"common/umath.cpp",
"common/umutablecptrie.cpp",
@@ -688,7 +690,7 @@ thirdparty_icu_sources = [
]
thirdparty_icu_sources = [thirdparty_icu_dir + file for file in thirdparty_icu_sources]
-icu_data_name = "icudt73l.dat"
+icu_data_name = "icudt74l.dat"
if env["static_icu_data"]:
env_icu.Depends("../../../thirdparty/icu4c/icudata.gen.h", "../../../thirdparty/icu4c/" + icu_data_name)
diff --git a/thirdparty/README.md b/thirdparty/README.md
index be0693bc34e..d908017ab19 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -353,7 +353,7 @@ Files extracted from upstream source:
## icu4c
- Upstream: https://github.com/unicode-org/icu
-- Version: 73.2 (680f521746a3bd6a86f25f25ee50a62d88b489cf, 2023)
+- Version: 74.1 (9edac7b78327a1cb58db29e2714b15f9fa14e4d7, 2023)
- License: Unicode
Files extracted from upstream source:
@@ -365,7 +365,7 @@ Files extracted from upstream source:
Files generated from upstream source:
-- The `icudt73l.dat` built with the provided `godot_data.json` config file (see
+- The `icudt74l.dat` built with the provided `godot_data.json` config file (see
https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md
for instructions).
@@ -375,7 +375,7 @@ Files generated from upstream source:
3. Reconfigure ICU with custom data config:
`ICU_DATA_FILTER_FILE={GODOT_SOURCE}/thirdparty/icu4c/godot_data.json ./runConfigureICU {PLATFORM} --with-data-packaging=common`
4. Delete `data/out` folder and rebuild data: `cd data && rm -rf ./out && make`
-5. Copy `source/data/out/icudt73l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt73l.dat`
+5. Copy `source/data/out/icudt74l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt74l.dat`
## jpeg-compressor
diff --git a/thirdparty/icu4c/LICENSE b/thirdparty/icu4c/LICENSE
index 22472dc2ec2..9f54372febc 100644
--- a/thirdparty/icu4c/LICENSE
+++ b/thirdparty/icu4c/LICENSE
@@ -1,49 +1,42 @@
-UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
-
-See Terms of Use LanguageBreakEngines should normally be implemented so as to
* be shared between threads without locking.
Find any breaks within a run in the supplied text.
@@ -80,6 +83,35 @@ class LanguageBreakEngine : public UMemory { }; +/******************************************************************* + * BreakEngineWrapper + */ + +/** + *BreakEngineWrapper implement LanguageBreakEngine by + * a thin wrapper that delegate the task to ExternalBreakEngine + *
+ */ +class BreakEngineWrapper : public LanguageBreakEngine { + public: + + BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status); + + virtual ~BreakEngineWrapper(); + + virtual UBool handles(UChar32 c, const char* locale) const override; + + virtual int32_t findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks, + UBool isPhraseBreaking, + UErrorCode &status) const override; + + private: + LocalPointerFind any breaks within a run in the supplied text.
@@ -247,9 +281,18 @@ class ICULanguageBreakFactory : public LanguageBreakFactory { * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. + * @param locale The locale. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override; + virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override; + + /** + * Add and adopt the engine and return an URegistryKey. + * @param engine The ExternalBreakEngine to be added and adopt. The caller + * pass the ownership and should not release the memory after this. + * @param status the error code. + */ + virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status); protected: /** @@ -258,9 +301,10 @@ protected: * * @param c A character that begins a run for which a LanguageBreakEngine is * sought. + * @param locale The locale. * @return A LanguageBreakEngine with the desired characteristics, or 0. */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale); /** *Create a DictionaryMatcher for the specified script and break type.
@@ -269,6 +313,9 @@ protected: * @return A DictionaryMatcher with the desired characteristics, or nullptr. */ virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); + + private: + void ensureEngines(UErrorCode& status); }; U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/brkiter.cpp b/thirdparty/icu4c/common/brkiter.cpp index 41e4e0dff57..b452cf2c050 100644 --- a/thirdparty/icu4c/common/brkiter.cpp +++ b/thirdparty/icu4c/common/brkiter.cpp @@ -27,6 +27,7 @@ #include "unicode/rbbi.h" #include "unicode/brkiter.h" #include "unicode/udata.h" +#include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/filteredbrk.h" @@ -121,8 +122,11 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st // If there is a result, set the valid locale and actual locale, and the kind if (U_SUCCESS(status) && result != nullptr) { U_LOCALE_BASED(locBased, *(BreakIterator*)result); + locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale.data()); + uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY); + result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate } ures_close(b); @@ -202,18 +206,20 @@ BreakIterator::getAvailableLocales(int32_t& count) BreakIterator::BreakIterator() { - *validLocale = *actualLocale = 0; + *validLocale = *actualLocale = *requestLocale = 0; } BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); + uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); } BreakIterator &BreakIterator::operator =(const BreakIterator &other) { if (this != &other) { uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); + uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); } return *this; } @@ -493,12 +499,18 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) Locale BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { + if (type == ULOC_REQUESTED_LOCALE) { + return Locale(requestLocale); + } U_LOCALE_BASED(locBased, *this); return locBased.getLocale(type, status); } const char * BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { + if (type == ULOC_REQUESTED_LOCALE) { + return requestLocale; + } U_LOCALE_BASED(locBased, *this); return locBased.getLocaleID(type, status); } diff --git a/thirdparty/icu4c/common/characterproperties.cpp b/thirdparty/icu4c/common/characterproperties.cpp index 978e6761cee..f1e15b488d5 100644 --- a/thirdparty/icu4c/common/characterproperties.cpp +++ b/thirdparty/icu4c/common/characterproperties.cpp @@ -169,7 +169,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { case UPROPS_SRC_INPC: case UPROPS_SRC_INSC: case UPROPS_SRC_VO: - uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + uprops_addPropertyStarts(src, &sa, &errorCode); break; case UPROPS_SRC_EMOJI: { const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode); @@ -178,6 +178,14 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { } break; } + case UPROPS_SRC_IDSU: + // New in Unicode 15.1 for just two characters. + sa.add(sa.set, 0x2FFE); + sa.add(sa.set, 0x2FFF + 1); + break; + case UPROPS_SRC_ID_COMPAT_MATH: + uprops_addPropertyStarts(src, &sa, &errorCode); + break; default: errorCode = U_INTERNAL_PROGRAM_ERROR; break; diff --git a/thirdparty/icu4c/common/dictbe.cpp b/thirdparty/icu4c/common/dictbe.cpp index 0e420c67c5d..3d672c03bfb 100644 --- a/thirdparty/icu4c/common/dictbe.cpp +++ b/thirdparty/icu4c/common/dictbe.cpp @@ -42,7 +42,7 @@ DictionaryBreakEngine::~DictionaryBreakEngine() { } UBool -DictionaryBreakEngine::handles(UChar32 c) const { +DictionaryBreakEngine::handles(UChar32 c, const char*) const { return fSet.contains(c); } @@ -54,13 +54,13 @@ DictionaryBreakEngine::findBreaks( UText *text, UBool isPhraseBreaking, UErrorCode& status) const { if (U_FAILURE(status)) return 0; - (void)startPos; // TODO: remove this param? int32_t result = 0; // Find the span of characters included in the set. // The span to break begins at the current position in the text, and // extends towards the start or end of the text, depending on 'reverse'. + utext_setNativeIndex(text, startPos); int32_t start = (int32_t)utext_getNativeIndex(text); int32_t current; int32_t rangeStart; diff --git a/thirdparty/icu4c/common/dictbe.h b/thirdparty/icu4c/common/dictbe.h index a2c761bdc3a..e512071fa45 100644 --- a/thirdparty/icu4c/common/dictbe.h +++ b/thirdparty/icu4c/common/dictbe.h @@ -62,10 +62,11 @@ class DictionaryBreakEngine : public LanguageBreakEngine { * a particular kind of break. * * @param c A character which begins a run that the engine might handle + * @param locale The locale. * @return true if this engine handles the particular character and break * type. */ - virtual UBool handles(UChar32 c) const override; + virtual UBool handles(UChar32 c, const char* locale) const override; /** *Find any breaks within a run in the supplied text.
diff --git a/thirdparty/icu4c/common/loadednormalizer2impl.cpp b/thirdparty/icu4c/common/loadednormalizer2impl.cpp index 768564edc89..99b8f3e86c6 100644 --- a/thirdparty/icu4c/common/loadednormalizer2impl.cpp +++ b/thirdparty/icu4c/common/loadednormalizer2impl.cpp @@ -143,6 +143,9 @@ static icu::UInitOnce nfkcInitOnce {}; static Norm2AllModes *nfkc_cfSingleton; static icu::UInitOnce nfkc_cfInitOnce {}; +static Norm2AllModes *nfkc_scfSingleton; +static icu::UInitOnce nfkc_scfInitOnce {}; + static UHashtable *cache=nullptr; // UInitOnce singleton initialization function @@ -156,6 +159,8 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode); } else if (uprv_strcmp(what, "nfkc_cf") == 0) { nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode); + } else if (uprv_strcmp(what, "nfkc_scf") == 0) { + nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode); } else { UPRV_UNREACHABLE_EXIT; // Unknown singleton } @@ -183,6 +188,10 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { nfkc_cfSingleton = nullptr; nfkc_cfInitOnce.reset(); + delete nfkc_scfSingleton; + nfkc_scfSingleton = nullptr; + nfkc_scfInitOnce.reset(); + uhash_close(cache); cache=nullptr; return true; @@ -213,6 +222,13 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { return nfkc_cfSingleton; } +const Norm2AllModes * +Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode); + return nfkc_scfSingleton; +} + #if !NORM2_HARDCODE_NFC_DATA const Normalizer2 * Normalizer2::getNFCInstance(UErrorCode &errorCode) { @@ -261,6 +277,12 @@ Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { return allModes!=nullptr ? &allModes->comp : nullptr; } +const Normalizer2 * +Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); + return allModes!=nullptr ? &allModes->comp : nullptr; +} + const Normalizer2 * Normalizer2::getInstance(const char *packageName, const char *name, @@ -281,6 +303,8 @@ Normalizer2::getInstance(const char *packageName, allModes=Norm2AllModes::getNFKCInstance(errorCode); } else if(0==uprv_strcmp(name, "nfkc_cf")) { allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc_scf")) { + allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); } } if(allModes==nullptr && U_SUCCESS(errorCode)) { @@ -393,6 +417,11 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); } +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode); +} + U_CAPI const UNormalizer2 * U_EXPORT2 unorm2_getInstance(const char *packageName, const char *name, diff --git a/thirdparty/icu4c/common/localefallback_data.h b/thirdparty/icu4c/common/localefallback_data.h index d2eb7eaafde..c847edefff5 100644 --- a/thirdparty/icu4c/common/localefallback_data.h +++ b/thirdparty/icu4c/common/localefallback_data.h @@ -18,94 +18,93 @@ const char scriptCodeChars[] = "Kana\0Kawi\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Lepc\0Lina\0" "Linb\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Marc\0Medf\0Merc\0Mlym\0Modi\0" "Mong\0Mroo\0Mtei\0Mymr\0Narb\0Newa\0Nkoo\0Nshu\0Ogam\0Olck\0Orkh\0" - "Orya\0Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0Rohg\0Runr\0" - "Samr\0Sarb\0Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tagb\0Takr\0" - "Tale\0Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0Tnsa\0" - "Toto\0Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; + "Orya\0Osge\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0Rohg\0Runr\0Samr\0" + "Sarb\0Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tagb\0Takr\0Tale\0" + "Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0Tnsa\0Toto\0" + "Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; const char dsLocaleIDChars[] = - "aaf\0aao\0aat\0ab\0abh\0abl\0abq\0abv\0acm\0acq\0acw\0acx\0adf\0" - "adp\0adx\0ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0" - "aho\0ahr\0aib\0aij\0ain\0aio\0aiq\0ajp\0ajt\0akk\0akv\0alk\0all\0" - "alr\0alt\0alw\0am\0ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0" - "aph\0aqc\0ar\0arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0" - "auj\0auz\0av\0avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0" - "az_IR\0az_RU\0azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0" - "bee\0bej\0bfb\0bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0" - "bgp\0bgq\0bgw\0bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0" - "bho\0bht\0bhu\0biy\0bjf\0bji\0bjj\0bjm\0blk\0blt\0bmj\0bn\0bns\0" - "bo\0bph\0bpx\0bpy\0bqi\0bra\0brb\0brd\0brh\0brk\0brv\0brx\0bsh\0" - "bsk\0bsq\0bst\0btd\0btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0byn\0byw\0" - "bzi\0cbn\0ccp\0cde\0cdh\0cdi\0cdj\0cdm\0cdo\0cdz\0ce\0cgk\0chg\0" - "chm\0chr\0chx\0cih\0cja\0cji\0cjm\0cjy\0ckb\0ckt\0clh\0clw\0cmg\0" - "cna\0cnp\0cog\0cop\0cpg\0cr\0crh\0crj\0crk\0crl\0crm\0csh\0csp\0" - "csw\0ctd\0ctg\0ctn\0ctt\0cu\0cuu\0cv\0czh\0czk\0daq\0dar\0dcc\0" - "ddo\0def\0deh\0der\0dgl\0dhi\0dhn\0dho\0dhw\0dka\0dlg\0dmf\0dmk\0" - "dml\0dng\0dnu\0dnv\0doi\0dox\0dre\0drh\0drq\0drs\0dry\0dso\0dty\0" - "dub\0duh\0dus\0dv\0dwk\0dwz\0dz\0dzl\0ecr\0ecy\0egy\0eky\0el\0" - "emg\0emu\0enf\0enh\0era\0esg\0esh\0ett\0eve\0evn\0fa\0fay\0faz\0" - "fia\0fmu\0fub\0gan\0gaq\0gas\0gau\0gbj\0gbk\0gbl\0gbm\0gbz\0gdb\0" - "gdo\0gdx\0gez\0ggg\0ggn\0gha\0ghe\0ghr\0ght\0gig\0gin\0gjk\0gju\0" - "gld\0glh\0glk\0gmv\0gmy\0goe\0gof\0gok\0gom\0gon\0got\0gra\0grc\0" - "grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0gyo\0gzi\0ha_CM\0ha_SD\0hac\0" - "hak\0har\0haz\0hbo\0hdy\0he\0hi\0hii\0hit\0hkh\0hlb\0hlu\0hmd\0" - "hmj\0hmq\0hnd\0hne\0hnj\0hnj_AU\0hnj_CN\0hnj_FR\0hnj_GF\0hnj_LA\0" - "hnj_MM\0hnj_SR\0hnj_TH\0hnj_US\0hnj_VN\0hno\0hoc\0hoh\0hoj\0how\0" - "hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0huy\0huz\0hy\0hyw\0ii\0" - "imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0iu\0iw\0ja\0jad\0jat\0" - "jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0ji\0jje\0jkm\0jml\0jna\0" - "jnd\0jnl\0jns\0jog\0jpa\0jpr\0jul\0jun\0juy\0jya\0jye\0ka\0kaa\0" - "kap\0kaw\0kbd\0kbu\0kby\0kca\0kdq\0kdt\0ket\0kex\0key\0kfa\0kfb\0" - "kfc\0kfd\0kfe\0kfh\0kfi\0kfk\0kfm\0kfp\0kfq\0kfr\0kfs\0kfx\0kfy\0" - "kgj\0kgy\0khb\0khf\0khg\0khn\0kht\0khv\0khw\0kif\0kim\0kip\0kjg\0" - "kjh\0kjl\0kjo\0kjp\0kjt\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0" - "kkh\0kkt\0kle\0klj\0klr\0km\0kmj\0kmz\0kn\0ko\0koi\0kok\0kpt\0" - "kpy\0kqd\0kqy\0kra\0krc\0krk\0krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0" - "ktb\0ktl\0ktp\0ku_LB\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0" - "kxc\0kxf\0kxk\0kxl\0kxm\0kxp\0kxv\0ky\0ky_CN\0kyu\0kyv\0kyw\0" - "kzh\0lab\0lad\0lae\0lah\0lbc\0lbe\0lbf\0lbj\0lbm\0lbo\0lbr\0lcp\0" - "lep\0lez\0lhm\0lhs\0lif\0lis\0lkh\0lki\0lmh\0lmn\0lo\0loy\0lpo\0" - "lrc\0lrk\0lrl\0lsa\0lsd\0lss\0luk\0luu\0luv\0luz\0lwl\0lwm\0lya\0" - "lzh\0mag\0mai\0man_GN\0mby\0mde\0mdf\0mdx\0mdy\0mfa\0mfi\0mgp\0" - "mhj\0mid\0mjl\0mjq\0mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0" - "mkm\0ml\0mlf\0mn\0mn_CN\0mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0" - "mrj\0mro\0mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mvf\0" - "mvy\0mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0nao\0ncd\0" - "ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0nli\0nlm\0" - "nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0nsd\0nsf\0" - "nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0oaa\0oac\0oar\0oav\0" - "obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0ole\0omk\0omp\0" - "omr\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0osi\0ota\0otb\0otk\0" - "oty\0oui\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0pch\0" - "pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0pho\0" - "phr\0pht\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0ppa\0pra\0" + "aaf\0aao\0aat\0ab\0abh\0abl\0abv\0acm\0acq\0acw\0acx\0adf\0adx\0" + "ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0aho\0ahr\0" + "aib\0aij\0ain\0aio\0aiq\0ajp\0akk\0akv\0alk\0all\0alr\0alt\0alw\0" + "am\0ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0aph\0aqc\0ar\0" + "arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0auj\0auz\0av\0" + "avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0az_IR\0az_RU\0" + "azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0bee\0bej\0bfb\0" + "bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0bgp\0bgq\0bgw\0" + "bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0bho\0bht\0bhu\0" + "biy\0bjf\0bjj\0bjm\0blk\0blt\0bmj\0bn\0bns\0bo\0bph\0bpx\0bpy\0" + "bqi\0bra\0brb\0brd\0brh\0brk\0brv\0brx\0bsh\0bsk\0bsq\0bst\0btd\0" + "btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0byn\0byw\0bzi\0cbn\0ccp\0cde\0" + "cdh\0cdi\0cdj\0cdm\0cdo\0cdz\0ce\0cgk\0chg\0chm\0chr\0chx\0cih\0" + "cja\0cji\0cjm\0cjy\0ckb\0ckt\0clh\0clw\0cmg\0cna\0cnp\0cog\0cop\0" + "cpg\0cr\0crh\0crj\0crk\0crl\0crm\0csh\0csp\0csw\0ctd\0ctg\0ctn\0" + "ctt\0cu\0cuu\0cv\0czh\0czk\0daq\0dar\0dcc\0ddo\0def\0deh\0der\0" + "dhi\0dhn\0dho\0dhw\0dka\0dlg\0dmf\0dmk\0dml\0dng\0dnu\0dnv\0doi\0" + "dox\0dre\0drq\0drs\0dry\0dso\0dty\0dub\0duh\0dus\0dv\0dwk\0dwz\0" + "dz\0dzl\0ecr\0ecy\0egy\0eky\0el\0emg\0emu\0enf\0enh\0era\0esg\0" + "esh\0ett\0eve\0evn\0fa\0fay\0faz\0fia\0fmu\0fub\0gan\0gaq\0gas\0" + "gau\0gbj\0gbk\0gbl\0gbm\0gbz\0gdb\0gdo\0gdx\0gez\0ggg\0gha\0ghe\0" + "ghr\0ght\0gig\0gin\0gjk\0gju\0gld\0glh\0glk\0gmv\0gmy\0goe\0gof\0" + "gok\0gom\0gon\0got\0gra\0grc\0grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0" + "gyo\0gzi\0ha_CM\0ha_SD\0hac\0hak\0har\0haz\0hbo\0hdy\0he\0hi\0" + "hii\0hit\0hkh\0hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0hnj_AU\0" + "hnj_CN\0hnj_FR\0hnj_GF\0hnj_LA\0hnj_MM\0hnj_SR\0hnj_TH\0hnj_VN\0" + "hno\0hoc\0hoh\0hoj\0how\0hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0" + "huy\0huz\0hy\0hyw\0ii\0imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0" + "iu\0iw\0ja\0jad\0jat\0jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0" + "ji\0jje\0jkm\0jml\0jna\0jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0jrb_MA\0" + "jul\0jun\0juy\0jya\0jye\0ka\0kaa\0kap\0kaw\0kbd\0kbu\0kby\0kca\0" + "kdq\0kdt\0ket\0kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0kfh\0kfi\0kfk\0" + "kfm\0kfp\0kfq\0kfr\0kfs\0kfx\0kfy\0kgj\0kgy\0khb\0khf\0khg\0khn\0" + "kht\0khv\0khw\0kif\0kim\0kip\0kjg\0kjh\0kjl\0kjo\0kjp\0kjt\0kk\0" + "kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0kkh\0kkt\0kle\0klj\0klr\0km\0" + "kmj\0kmz\0kn\0ko\0koi\0kok\0kpt\0kpy\0kqd\0kqy\0kra\0krc\0krk\0" + "krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0ktb\0ktl\0ktp\0ku_LB\0kuf\0" + "kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kxf\0kxk\0kxm\0kxp\0ky\0ky_CN\0" + "kyu\0kyv\0kyw\0lab\0lad\0lae\0lah\0lbc\0lbe\0lbf\0lbj\0lbm\0lbo\0" + "lbr\0lcp\0lep\0lez\0lhm\0lhs\0lif\0lis\0lkh\0lki\0lmh\0lmn\0lo\0" + "loy\0lpo\0lrc\0lrk\0lrl\0lsa\0lsd\0lss\0luk\0luu\0luv\0luz\0lwl\0" + "lwm\0lya\0lzh\0mag\0mai\0man_GN\0mby\0mde\0mdf\0mdx\0mdy\0mfa\0" + "mfi\0mgp\0mhj\0mid\0mjl\0mjq\0mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0" + "mke\0mki\0mkm\0ml\0mlf\0mn\0mn_CN\0mni\0mnj\0mns\0mnw\0mpz\0mr\0" + "mra\0mrd\0mrj\0mro\0mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0" + "muz\0mvf\0mvy\0mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0" + "nao\0ncd\0ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0" + "nli\0nlm\0nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0" + "nsd\0nsf\0nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0oaa\0oac\0" + "oar\0oav\0obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0ole\0" + "omk\0omp\0omr\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0osi\0ota\0" + "otb\0otk\0oty\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0" + "pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0" + "pho\0phr\0pht\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0pra\0" "prc\0prd\0prp\0prt\0prx\0ps\0psh\0psi\0pst\0pum\0pwo\0pwr\0pww\0" "pyx\0qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0" - "rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0rsk\0rtw\0ru\0rue\0rut\0rwr\0" - "ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scp\0sct\0scu\0" - "scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sds\0sel\0sfm\0sga\0sgh\0" - "sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0shv\0si\0sia\0" - "sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0slq\0smh\0smp\0smu\0" - "smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0sr\0srb\0srh\0" - "srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0swi\0swv\0syc\0" - "syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0tcx\0tcy\0tda\0" - "tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0th\0the\0thf\0" - "thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0tjl\0tjo\0tkb\0" - "tks\0tkt\0tmk\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0trw\0tsd\0tsf\0" - "tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0ude\0udg\0" - "udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0ulc\0unr\0" - "unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0vaa\0vaf\0" - "vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wlo\0" - "wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0xcr\0xdq\0" - "xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0xmn\0xmr\0" - "xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xss\0xub\0xuj\0" - "xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0" - "yi\0yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0" - "ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0" - "zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0" - "zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0" - "zh_US\0zh_VN\0zhd\0zhx\0zkb\0zko\0zkt\0zkz\0zlj\0zln\0zlq\0zqe\0" - "zrp\0zum\0zyg\0zyn\0zzj\0"; + "rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0rom_BG\0rsk\0rtw\0ru\0rue\0" + "rut\0rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scl_IN\0" + "scp\0sct\0scu\0scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sds\0sel\0" + "sfm\0sga\0sgh\0sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0" + "shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0slq\0" + "smh\0smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0" + "sr\0srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0" + "swi\0swv\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0" + "tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0" + "th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0" + "tjl\0tjo\0tkb\0tks\0tkt\0tmk\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0" + "trw\0tsd\0tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0" + "ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0" + "ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0" + "vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0" + "wbr\0wlo\0wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0" + "xcr\0xdq\0xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0" + "xmn\0xmr\0xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xss\0" + "xub\0xuj\0xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0" + "ygp\0yhd\0yi\0yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0" + "ysn\0ysp\0ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0" + "zba\0zch\0zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0" + "zh_GB\0zh_GF\0zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0" + "zh_TH\0zh_TW\0zh_US\0zh_VN\0zhd\0zhx\0zkb\0zko\0zkt\0zkz\0zlj\0" + "zln\0zlq\0zqe\0zrp\0zum\0zyg\0zyn\0zzj\0"; const int32_t defaultScriptTable[] = { 0, 320, // aaf -> Mlym @@ -113,1033 +112,1022 @@ const int32_t defaultScriptTable[] = { 8, 150, // aat -> Grek 12, 100, // ab -> Cyrl 15, 10, // abh -> Arab - 19, 425, // abl -> Rjng - 23, 100, // abq -> Cyrl - 27, 10, // abv -> Arab - 31, 10, // acm -> Arab - 35, 10, // acq -> Arab - 39, 10, // acw -> Arab - 43, 10, // acx -> Arab - 47, 10, // adf -> Arab - 51, 540, // adp -> Tibt - 55, 540, // adx -> Tibt - 59, 100, // ady -> Cyrl - 63, 25, // ae -> Avst - 66, 10, // aeb -> Arab - 70, 10, // aec -> Arab - 74, 10, // aee -> Arab - 78, 10, // aeq -> Arab - 82, 10, // afb -> Arab - 86, 105, // agi -> Deva - 90, 120, // agj -> Ethi - 94, 100, // agx -> Cyrl - 98, 120, // ahg -> Ethi - 102, 5, // aho -> Ahom - 106, 105, // ahr -> Deva - 110, 10, // aib -> Arab - 114, 185, // aij -> Hebr - 118, 220, // ain -> Kana - 122, 345, // aio -> Mymr - 126, 10, // aiq -> Arab - 130, 10, // ajp -> Arab - 134, 10, // ajt -> Arab - 138, 575, // akk -> Xsux - 142, 100, // akv -> Cyrl - 146, 260, // alk -> Laoo - 150, 320, // all -> Mlym - 154, 100, // alr -> Cyrl - 158, 100, // alt -> Cyrl - 162, 120, // alw -> Ethi - 166, 120, // am -> Ethi - 169, 210, // ams -> Jpan - 173, 480, // amw -> Syrc - 177, 100, // ani -> Cyrl - 181, 105, // anp -> Deva - 185, 105, // anr -> Deva - 189, 120, // anu -> Ethi - 193, 45, // aot -> Beng - 197, 10, // apc -> Arab - 201, 10, // apd -> Arab - 205, 105, // aph -> Deva - 209, 100, // aqc -> Cyrl - 213, 10, // ar -> Arab - 216, 15, // arc -> Armi - 220, 10, // arq -> Arab - 224, 10, // ars -> Arab - 228, 10, // ary -> Arab - 232, 10, // arz -> Arab - 236, 45, // as -> Beng - 239, 455, // ase -> Sgnw - 243, 10, // ask -> Arab - 247, 10, // atn -> Arab - 251, 100, // atv -> Cyrl - 255, 10, // auj -> Arab - 259, 10, // auz -> Arab - 263, 100, // av -> Cyrl - 266, 10, // avd -> Arab - 270, 10, // avl -> Arab - 274, 105, // awa -> Deva - 278, 120, // awn -> Ethi - 282, 20, // axm -> Armn - 286, 10, // ayh -> Arab - 290, 10, // ayl -> Arab - 294, 10, // ayn -> Arab - 298, 10, // ayp -> Arab - 302, 10, // az_IQ -> Arab - 308, 10, // az_IR -> Arab - 314, 100, // az_RU -> Cyrl - 320, 10, // azb -> Arab - 324, 100, // ba -> Cyrl - 327, 10, // bal -> Arab - 331, 105, // bap -> Deva - 335, 30, // bax -> Bamu - 339, 125, // bbl -> Geor - 343, 120, // bcq -> Ethi - 347, 385, // bdv -> Orya - 351, 10, // bdz -> Arab - 355, 100, // be -> Cyrl - 358, 105, // bee -> Deva - 362, 10, // bej -> Arab - 366, 105, // bfb -> Deva - 370, 505, // bfq -> Taml - 374, 10, // bft -> Arab - 378, 540, // bfu -> Tibt - 382, 385, // bfw -> Orya - 386, 105, // bfy -> Deva - 390, 105, // bfz -> Deva - 394, 100, // bg -> Cyrl - 397, 105, // bgc -> Deva - 401, 105, // bgd -> Deva - 405, 10, // bgn -> Arab - 409, 10, // bgp -> Arab - 413, 105, // bgq -> Deva - 417, 105, // bgw -> Deva - 421, 150, // bgx -> Grek - 425, 105, // bha -> Deva - 429, 105, // bhb -> Deva - 433, 105, // bhd -> Deva - 437, 10, // bhe -> Arab - 441, 100, // bhh -> Cyrl - 445, 105, // bhi -> Deva - 449, 105, // bhj -> Deva - 453, 10, // bhm -> Arab - 457, 480, // bhn -> Syrc - 461, 105, // bho -> Deva - 465, 490, // bht -> Takr - 469, 105, // bhu -> Deva - 473, 105, // biy -> Deva - 477, 480, // bjf -> Syrc - 481, 120, // bji -> Ethi - 485, 105, // bjj -> Deva - 489, 10, // bjm -> Arab - 493, 345, // blk -> Mymr - 497, 515, // blt -> Tavt - 501, 105, // bmj -> Deva - 505, 45, // bn -> Beng - 508, 105, // bns -> Deva - 512, 540, // bo -> Tibt - 515, 100, // bph -> Cyrl - 519, 105, // bpx -> Deva - 523, 45, // bpy -> Beng - 527, 10, // bqi -> Arab - 531, 105, // bra -> Deva - 535, 235, // brb -> Khmr - 539, 105, // brd -> Deva - 543, 10, // brh -> Arab - 547, 10, // brk -> Arab - 551, 260, // brv -> Laoo - 555, 105, // brx -> Deva - 559, 10, // bsh -> Arab - 563, 10, // bsk -> Arab - 567, 35, // bsq -> Bass - 571, 120, // bst -> Ethi - 575, 40, // btd -> Batk - 579, 40, // btm -> Batk - 583, 105, // btv -> Deva - 587, 100, // bua -> Cyrl - 591, 345, // bwe -> Mymr - 595, 100, // bxm -> Cyrl - 599, 330, // bxu -> Mong - 603, 105, // byh -> Deva - 607, 120, // byn -> Ethi - 611, 105, // byw -> Deva - 615, 535, // bzi -> Thai - 619, 535, // cbn -> Thai - 623, 60, // ccp -> Cakm - 627, 520, // cde -> Telu - 631, 105, // cdh -> Deva - 635, 155, // cdi -> Gujr - 639, 105, // cdj -> Deva - 643, 105, // cdm -> Deva - 647, 175, // cdo -> Hans - 651, 45, // cdz -> Beng - 655, 100, // ce -> Cyrl - 658, 540, // cgk -> Tibt - 662, 10, // chg -> Arab - 666, 100, // chm -> Cyrl - 670, 80, // chr -> Cher - 674, 105, // chx -> Deva - 678, 105, // cih -> Deva - 682, 10, // cja -> Arab - 686, 100, // cji -> Cyrl - 690, 75, // cjm -> Cham - 694, 175, // cjy -> Hans - 698, 10, // ckb -> Arab - 702, 100, // ckt -> Cyrl - 706, 10, // clh -> Arab - 710, 100, // clw -> Cyrl - 714, 475, // cmg -> Soyo - 718, 540, // cna -> Tibt - 722, 175, // cnp -> Hans - 726, 535, // cog -> Thai - 730, 90, // cop -> Copt - 734, 150, // cpg -> Grek - 738, 65, // cr -> Cans - 741, 100, // crh -> Cyrl - 745, 65, // crj -> Cans - 749, 65, // crk -> Cans - 753, 65, // crl -> Cans - 757, 65, // crm -> Cans - 761, 345, // csh -> Mymr - 765, 175, // csp -> Hans - 769, 65, // csw -> Cans - 773, 400, // ctd -> Pauc - 777, 45, // ctg -> Beng - 781, 105, // ctn -> Deva - 785, 505, // ctt -> Taml - 789, 100, // cu -> Cyrl - 792, 255, // cuu -> Lana - 796, 100, // cv -> Cyrl - 799, 175, // czh -> Hans - 803, 185, // czk -> Hebr - 807, 105, // daq -> Deva - 811, 100, // dar -> Cyrl - 815, 10, // dcc -> Arab - 819, 100, // ddo -> Cyrl - 823, 10, // def -> Arab - 827, 10, // deh -> Arab - 831, 45, // der -> Beng - 835, 10, // dgl -> Arab - 839, 105, // dhi -> Deva - 843, 155, // dhn -> Gujr - 847, 105, // dho -> Deva - 851, 105, // dhw -> Deva - 855, 540, // dka -> Tibt - 859, 100, // dlg -> Cyrl - 863, 310, // dmf -> Medf - 867, 10, // dmk -> Arab - 871, 10, // dml -> Arab - 875, 100, // dng -> Cyrl - 879, 345, // dnu -> Mymr - 883, 345, // dnv -> Mymr - 887, 105, // doi -> Deva - 891, 120, // dox -> Ethi - 895, 540, // dre -> Tibt - 899, 330, // drh -> Mong - 903, 105, // drq -> Deva - 907, 120, // drs -> Ethi - 911, 105, // dry -> Deva - 915, 385, // dso -> Orya - 919, 105, // dty -> Deva - 923, 155, // dub -> Gujr - 927, 105, // duh -> Deva - 931, 105, // dus -> Deva - 935, 530, // dv -> Thaa - 938, 385, // dwk -> Orya - 942, 105, // dwz -> Deva - 946, 540, // dz -> Tibt - 949, 540, // dzl -> Tibt - 953, 150, // ecr -> Grek - 957, 95, // ecy -> Cprt - 961, 110, // egy -> Egyp - 965, 215, // eky -> Kali - 969, 150, // el -> Grek - 972, 105, // emg -> Deva - 976, 105, // emu -> Deva - 980, 100, // enf -> Cyrl - 984, 100, // enh -> Cyrl - 988, 505, // era -> Taml - 992, 135, // esg -> Gonm - 996, 10, // esh -> Arab - 1000, 200, // ett -> Ital - 1004, 100, // eve -> Cyrl - 1008, 100, // evn -> Cyrl - 1012, 10, // fa -> Arab - 1015, 10, // fay -> Arab - 1019, 10, // faz -> Arab - 1023, 10, // fia -> Arab - 1027, 105, // fmu -> Deva - 1031, 10, // fub -> Arab - 1035, 175, // gan -> Hans - 1039, 385, // gaq -> Orya - 1043, 155, // gas -> Gujr - 1047, 520, // gau -> Telu - 1051, 385, // gbj -> Orya - 1055, 105, // gbk -> Deva - 1059, 155, // gbl -> Gujr - 1063, 105, // gbm -> Deva - 1067, 10, // gbz -> Arab - 1071, 385, // gdb -> Orya - 1075, 100, // gdo -> Cyrl - 1079, 105, // gdx -> Deva - 1083, 120, // gez -> Ethi - 1087, 10, // ggg -> Arab - 1091, 105, // ggn -> Deva - 1095, 10, // gha -> Arab - 1099, 105, // ghe -> Deva - 1103, 10, // ghr -> Arab - 1107, 540, // ght -> Tibt - 1111, 10, // gig -> Arab - 1115, 100, // gin -> Cyrl - 1119, 10, // gjk -> Arab - 1123, 10, // gju -> Arab - 1127, 100, // gld -> Cyrl - 1131, 10, // glh -> Arab - 1135, 10, // glk -> Arab - 1139, 120, // gmv -> Ethi - 1143, 275, // gmy -> Linb - 1147, 540, // goe -> Tibt - 1151, 120, // gof -> Ethi - 1155, 105, // gok -> Deva - 1159, 105, // gom -> Deva - 1163, 520, // gon -> Telu - 1167, 140, // got -> Goth - 1171, 105, // gra -> Deva - 1175, 95, // grc -> Cprt - 1179, 45, // grt -> Beng - 1183, 120, // gru -> Ethi - 1187, 155, // gu -> Gujr - 1190, 105, // gvr -> Deva - 1194, 10, // gwc -> Arab - 1198, 10, // gwf -> Arab - 1202, 10, // gwt -> Arab - 1206, 105, // gyo -> Deva - 1210, 10, // gzi -> Arab - 1214, 10, // ha_CM -> Arab - 1220, 10, // ha_SD -> Arab - 1226, 10, // hac -> Arab - 1230, 175, // hak -> Hans - 1234, 120, // har -> Ethi - 1238, 10, // haz -> Arab - 1242, 185, // hbo -> Hebr - 1246, 120, // hdy -> Ethi - 1250, 185, // he -> Hebr - 1253, 105, // hi -> Deva - 1256, 490, // hii -> Takr - 1260, 575, // hit -> Xsux - 1264, 10, // hkh -> Arab - 1268, 105, // hlb -> Deva - 1272, 190, // hlu -> Hluw - 1276, 415, // hmd -> Plrd - 1280, 50, // hmj -> Bopo - 1284, 50, // hmq -> Bopo - 1288, 10, // hnd -> Arab - 1292, 105, // hne -> Deva - 1296, 195, // hnj -> Hmnp - 1300, 260, // hnj_AU -> Laoo - 1307, 260, // hnj_CN -> Laoo - 1314, 260, // hnj_FR -> Laoo - 1321, 260, // hnj_GF -> Laoo - 1328, 260, // hnj_LA -> Laoo - 1335, 260, // hnj_MM -> Laoo - 1342, 260, // hnj_SR -> Laoo - 1349, 260, // hnj_TH -> Laoo - 1356, 195, // hnj_US -> Hmnp - 1363, 260, // hnj_VN -> Laoo - 1370, 10, // hno -> Arab - 1374, 105, // hoc -> Deva - 1378, 10, // hoh -> Arab - 1382, 105, // hoj -> Deva - 1386, 170, // how -> Hani - 1390, 105, // hoy -> Deva - 1394, 345, // hpo -> Mymr - 1398, 480, // hrt -> Syrc - 1402, 10, // hrz -> Arab - 1406, 175, // hsn -> Hans - 1410, 10, // hss -> Arab - 1414, 575, // htx -> Xsux - 1418, 105, // hut -> Deva - 1422, 185, // huy -> Hebr - 1426, 100, // huz -> Cyrl - 1430, 20, // hy -> Armn - 1433, 20, // hyw -> Armn - 1437, 580, // ii -> Yiii - 1440, 285, // imy -> Lyci - 1444, 100, // inh -> Cyrl - 1448, 345, // int -> Mymr - 1452, 120, // ior -> Ethi - 1456, 505, // iru -> Taml - 1460, 10, // isk -> Arab - 1464, 185, // itk -> Hebr - 1468, 100, // itl -> Cyrl - 1472, 65, // iu -> Cans - 1475, 185, // iw -> Hebr - 1478, 210, // ja -> Jpan - 1481, 10, // jad -> Arab - 1485, 10, // jat -> Arab - 1489, 185, // jbe -> Hebr - 1493, 10, // jbn -> Arab - 1497, 100, // jct -> Cyrl - 1501, 540, // jda -> Tibt - 1505, 10, // jdg -> Arab - 1509, 100, // jdt -> Cyrl - 1513, 105, // jee -> Deva - 1517, 125, // jge -> Geor - 1521, 185, // ji -> Hebr - 1524, 165, // jje -> Hang - 1528, 345, // jkm -> Mymr - 1532, 105, // jml -> Deva - 1536, 490, // jna -> Takr - 1540, 10, // jnd -> Arab - 1544, 105, // jnl -> Deva - 1548, 105, // jns -> Deva - 1552, 10, // jog -> Arab - 1556, 185, // jpa -> Hebr - 1560, 185, // jpr -> Hebr - 1564, 105, // jul -> Deva - 1568, 385, // jun -> Orya - 1572, 385, // juy -> Orya - 1576, 540, // jya -> Tibt - 1580, 185, // jye -> Hebr - 1584, 125, // ka -> Geor - 1587, 100, // kaa -> Cyrl - 1591, 100, // kap -> Cyrl - 1595, 225, // kaw -> Kawi - 1599, 100, // kbd -> Cyrl - 1603, 10, // kbu -> Arab - 1607, 10, // kby -> Arab - 1611, 100, // kca -> Cyrl - 1615, 45, // kdq -> Beng - 1619, 535, // kdt -> Thai - 1623, 100, // ket -> Cyrl - 1627, 105, // kex -> Deva - 1631, 520, // key -> Telu - 1635, 245, // kfa -> Knda - 1639, 105, // kfb -> Deva - 1643, 520, // kfc -> Telu - 1647, 245, // kfd -> Knda - 1651, 505, // kfe -> Taml - 1655, 320, // kfh -> Mlym - 1659, 505, // kfi -> Taml - 1663, 105, // kfk -> Deva - 1667, 10, // kfm -> Arab - 1671, 105, // kfp -> Deva - 1675, 105, // kfq -> Deva - 1679, 105, // kfr -> Deva - 1683, 105, // kfs -> Deva - 1687, 105, // kfx -> Deva - 1691, 105, // kfy -> Deva - 1695, 105, // kgj -> Deva - 1699, 105, // kgy -> Deva - 1703, 500, // khb -> Talu - 1707, 535, // khf -> Thai - 1711, 540, // khg -> Tibt - 1715, 105, // khn -> Deva - 1719, 345, // kht -> Mymr - 1723, 100, // khv -> Cyrl - 1727, 10, // khw -> Arab - 1731, 105, // kif -> Deva - 1735, 100, // kim -> Cyrl - 1739, 105, // kip -> Deva - 1743, 260, // kjg -> Laoo - 1747, 100, // kjh -> Cyrl - 1751, 105, // kjl -> Deva - 1755, 105, // kjo -> Deva - 1759, 345, // kjp -> Mymr - 1763, 535, // kjt -> Thai - 1767, 100, // kk -> Cyrl - 1770, 10, // kk_AF -> Arab - 1776, 10, // kk_CN -> Arab - 1782, 10, // kk_IR -> Arab - 1788, 10, // kk_MN -> Arab - 1794, 540, // kkf -> Tibt - 1798, 255, // kkh -> Lana - 1802, 105, // kkt -> Deva - 1806, 105, // kle -> Deva - 1810, 10, // klj -> Arab - 1814, 105, // klr -> Deva - 1818, 235, // km -> Khmr - 1821, 105, // kmj -> Deva - 1825, 10, // kmz -> Arab - 1829, 245, // kn -> Knda - 1832, 250, // ko -> Kore - 1835, 100, // koi -> Cyrl - 1839, 105, // kok -> Deva - 1843, 100, // kpt -> Cyrl - 1847, 100, // kpy -> Cyrl - 1851, 480, // kqd -> Syrc - 1855, 120, // kqy -> Ethi - 1859, 105, // kra -> Deva - 1863, 100, // krc -> Cyrl - 1867, 100, // krk -> Cyrl - 1871, 235, // krr -> Khmr - 1875, 105, // kru -> Deva - 1879, 235, // krv -> Khmr - 1883, 10, // ks -> Arab - 1886, 345, // ksu -> Mymr - 1890, 345, // ksw -> Mymr - 1894, 105, // ksz -> Deva - 1898, 120, // ktb -> Ethi - 1902, 10, // ktl -> Arab - 1906, 415, // ktp -> Plrd - 1910, 10, // ku_LB -> Arab - 1916, 260, // kuf -> Laoo - 1920, 100, // kum -> Cyrl - 1924, 100, // kv -> Cyrl - 1927, 100, // kva -> Cyrl - 1931, 345, // kvq -> Mymr - 1935, 345, // kvt -> Mymr - 1939, 10, // kvx -> Arab - 1943, 215, // kvy -> Kali - 1947, 120, // kxc -> Ethi - 1951, 345, // kxf -> Mymr - 1955, 345, // kxk -> Mymr - 1959, 105, // kxl -> Deva - 1963, 535, // kxm -> Thai - 1967, 10, // kxp -> Arab - 1971, 385, // kxv -> Orya - 1975, 100, // ky -> Cyrl - 1978, 10, // ky_CN -> Arab - 1984, 215, // kyu -> Kali - 1988, 105, // kyv -> Deva - 1992, 105, // kyw -> Deva - 1996, 10, // kzh -> Arab - 2000, 270, // lab -> Lina - 2004, 185, // lad -> Hebr - 2008, 105, // lae -> Deva - 2012, 10, // lah -> Arab - 2016, 280, // lbc -> Lisu - 2020, 100, // lbe -> Cyrl - 2024, 105, // lbf -> Deva - 2028, 540, // lbj -> Tibt - 2032, 105, // lbm -> Deva - 2036, 260, // lbo -> Laoo - 2040, 105, // lbr -> Deva - 2044, 535, // lcp -> Thai - 2048, 265, // lep -> Lepc - 2052, 100, // lez -> Cyrl - 2056, 105, // lhm -> Deva - 2060, 480, // lhs -> Syrc - 2064, 105, // lif -> Deva - 2068, 280, // lis -> Lisu - 2072, 540, // lkh -> Tibt - 2076, 10, // lki -> Arab - 2080, 105, // lmh -> Deva - 2084, 520, // lmn -> Telu - 2088, 260, // lo -> Laoo - 2091, 105, // loy -> Deva - 2095, 415, // lpo -> Plrd - 2099, 10, // lrc -> Arab - 2103, 10, // lrk -> Arab - 2107, 10, // lrl -> Arab - 2111, 10, // lsa -> Arab - 2115, 185, // lsd -> Hebr - 2119, 10, // lss -> Arab - 2123, 540, // luk -> Tibt - 2127, 105, // luu -> Deva - 2131, 10, // luv -> Arab - 2135, 10, // luz -> Arab - 2139, 535, // lwl -> Thai - 2143, 535, // lwm -> Thai - 2147, 540, // lya -> Tibt - 2151, 175, // lzh -> Hans - 2155, 105, // mag -> Deva - 2159, 105, // mai -> Deva - 2163, 360, // man_GN -> Nkoo - 2170, 10, // mby -> Arab - 2174, 10, // mde -> Arab - 2178, 100, // mdf -> Cyrl - 2182, 120, // mdx -> Ethi - 2186, 120, // mdy -> Ethi - 2190, 10, // mfa -> Arab - 2194, 10, // mfi -> Arab - 2198, 105, // mgp -> Deva - 2202, 10, // mhj -> Arab - 2206, 295, // mid -> Mand - 2210, 105, // mjl -> Deva - 2214, 320, // mjq -> Mlym - 2218, 320, // mjr -> Mlym - 2222, 105, // mjt -> Deva - 2226, 520, // mju -> Telu - 2230, 320, // mjv -> Mlym - 2234, 105, // mjz -> Deva - 2238, 100, // mk -> Cyrl - 2241, 105, // mkb -> Deva - 2245, 105, // mke -> Deva - 2249, 10, // mki -> Arab - 2253, 535, // mkm -> Thai - 2257, 320, // ml -> Mlym - 2260, 535, // mlf -> Thai - 2264, 100, // mn -> Cyrl - 2267, 330, // mn_CN -> Mong - 2273, 45, // mni -> Beng - 2277, 10, // mnj -> Arab - 2281, 100, // mns -> Cyrl - 2285, 345, // mnw -> Mymr - 2289, 535, // mpz -> Thai - 2293, 105, // mr -> Deva - 2296, 535, // mra -> Thai - 2300, 105, // mrd -> Deva - 2304, 100, // mrj -> Cyrl - 2308, 335, // mro -> Mroo - 2312, 105, // mrr -> Deva - 2316, 10, // ms_CC -> Arab - 2322, 100, // mtm -> Cyrl - 2326, 105, // mtr -> Deva - 2330, 100, // mud -> Cyrl - 2334, 540, // muk -> Tibt - 2338, 105, // mut -> Deva - 2342, 505, // muv -> Taml - 2346, 120, // muz -> Ethi - 2350, 330, // mvf -> Mong - 2354, 10, // mvy -> Arab - 2358, 120, // mvz -> Ethi - 2362, 105, // mwr -> Deva - 2366, 345, // mwt -> Mymr - 2370, 195, // mww -> Hmnp - 2374, 345, // my -> Mymr - 2377, 120, // mym -> Ethi - 2381, 100, // myv -> Cyrl - 2385, 295, // myz -> Mand - 2389, 10, // mzn -> Arab - 2393, 175, // nan -> Hans - 2397, 105, // nao -> Deva - 2401, 105, // ncd -> Deva - 2405, 260, // ncq -> Laoo - 2409, 100, // ndf -> Cyrl - 2413, 105, // ne -> Deva - 2416, 100, // neg -> Cyrl - 2420, 540, // neh -> Tibt - 2424, 575, // nei -> Xsux - 2428, 105, // new -> Deva - 2432, 260, // ngt -> Laoo - 2436, 100, // nio -> Cyrl - 2440, 520, // nit -> Telu - 2444, 100, // niv -> Cyrl - 2448, 10, // nli -> Arab - 2452, 10, // nlm -> Arab - 2456, 105, // nlx -> Deva - 2460, 105, // nmm -> Deva - 2464, 565, // nnp -> Wcho - 2468, 255, // nod -> Lana - 2472, 105, // noe -> Deva - 2476, 100, // nog -> Cyrl - 2480, 105, // noi -> Deva - 2484, 435, // non -> Runr - 2488, 580, // nos -> Yiii - 2492, 540, // npb -> Tibt - 2496, 360, // nqo -> Nkoo - 2500, 580, // nsd -> Yiii - 2504, 580, // nsf -> Yiii - 2508, 65, // nsk -> Cans - 2512, 545, // nst -> Tnsa - 2516, 580, // nsv -> Yiii - 2520, 580, // nty -> Yiii - 2524, 10, // ntz -> Arab - 2528, 355, // nwc -> Newa - 2532, 105, // nwx -> Deva - 2536, 535, // nyl -> Thai - 2540, 10, // nyq -> Arab - 2544, 100, // oaa -> Cyrl - 2548, 100, // oac -> Cyrl - 2552, 480, // oar -> Syrc - 2556, 125, // oav -> Geor - 2560, 410, // obm -> Phnx - 2564, 345, // obr -> Mymr - 2568, 10, // odk -> Arab - 2572, 575, // oht -> Xsux - 2576, 65, // oj -> Cans - 2579, 65, // ojs -> Cans - 2583, 165, // okm -> Hang - 2587, 170, // oko -> Hani - 2591, 235, // okz -> Khmr - 2595, 105, // ola -> Deva - 2599, 540, // ole -> Tibt - 2603, 100, // omk -> Cyrl - 2607, 340, // omp -> Mtei - 2611, 325, // omr -> Modi - 2615, 105, // oon -> Deva - 2619, 385, // or -> Orya - 2622, 520, // ort -> Telu - 2626, 10, // oru -> Arab - 2630, 100, // orv -> Cyrl - 2634, 100, // os -> Cyrl - 2637, 390, // osa -> Osge - 2641, 200, // osc -> Ital - 2645, 205, // osi -> Java - 2649, 10, // ota -> Arab - 2653, 540, // otb -> Tibt - 2657, 380, // otk -> Orkh - 2661, 145, // oty -> Gran - 2665, 395, // oui -> Ougr - 2669, 160, // pa -> Guru - 2672, 10, // pa_PK -> Arab - 2678, 405, // pal -> Phli - 2682, 100, // paq -> Cyrl - 2686, 10, // pbt -> Arab - 2690, 235, // pcb -> Khmr - 2694, 345, // pce -> Mymr - 2698, 320, // pcf -> Mlym - 2702, 320, // pcg -> Mlym - 2706, 105, // pch -> Deva - 2710, 105, // pci -> Deva - 2714, 520, // pcj -> Telu - 2718, 385, // peg -> Orya - 2722, 570, // peo -> Xpeo - 2726, 230, // pgd -> Khar - 2730, 105, // pgg -> Deva - 2734, 370, // pgl -> Ogam - 2738, 200, // pgn -> Ital - 2742, 105, // phd -> Deva - 2746, 345, // phk -> Mymr - 2750, 10, // phl -> Arab - 2754, 410, // phn -> Phnx - 2758, 260, // pho -> Laoo - 2762, 10, // phr -> Arab - 2766, 535, // pht -> Thai - 2770, 10, // phv -> Arab - 2774, 105, // phw -> Deva - 2778, 460, // pi -> Sinh - 2781, 55, // pka -> Brah - 2785, 320, // pkr -> Mlym - 2789, 10, // plk -> Arab - 2793, 345, // pll -> Mymr - 2797, 55, // pmh -> Brah - 2801, 150, // pnt -> Grek - 2805, 105, // ppa -> Deva - 2809, 230, // pra -> Khar - 2813, 10, // prc -> Arab - 2817, 10, // prd -> Arab - 2821, 155, // prp -> Gujr - 2825, 535, // prt -> Thai - 2829, 10, // prx -> Arab - 2833, 10, // ps -> Arab - 2836, 10, // psh -> Arab - 2840, 10, // psi -> Arab - 2844, 10, // pst -> Arab - 2848, 105, // pum -> Deva - 2852, 345, // pwo -> Mymr - 2856, 105, // pwr -> Deva - 2860, 535, // pww -> Thai - 2864, 345, // pyx -> Mymr - 2868, 10, // qxq -> Arab - 2872, 105, // raa -> Deva - 2876, 105, // rab -> Deva - 2880, 105, // raf -> Deva - 2884, 45, // rah -> Beng - 2888, 105, // raj -> Deva - 2892, 105, // rav -> Deva - 2896, 345, // rbb -> Mymr - 2900, 10, // rdb -> Arab - 2904, 385, // rei -> Orya - 2908, 430, // rhg -> Rohg - 2912, 105, // rji -> Deva - 2916, 105, // rjs -> Deva - 2920, 235, // rka -> Khmr - 2924, 345, // rki -> Mymr - 2928, 45, // rkt -> Beng - 2932, 20, // rmi -> Armn - 2936, 10, // rmt -> Arab - 2940, 345, // rmz -> Mymr - 2944, 100, // rsk -> Cyrl - 2948, 105, // rtw -> Deva - 2952, 100, // ru -> Cyrl - 2955, 100, // rue -> Cyrl - 2959, 100, // rut -> Cyrl - 2963, 105, // rwr -> Deva - 2967, 220, // ryu -> Kana - 2971, 105, // sa -> Deva - 2974, 100, // sah -> Cyrl - 2978, 440, // sam -> Samr - 2982, 375, // sat -> Olck - 2986, 450, // saz -> Saur - 2990, 10, // sbn -> Arab - 2994, 540, // sbu -> Tibt - 2998, 105, // sck -> Deva - 3002, 10, // scl -> Arab - 3006, 105, // scp -> Deva - 3010, 260, // sct -> Laoo - 3014, 490, // scu -> Takr - 3018, 150, // scx -> Grek - 3022, 10, // sd -> Arab - 3025, 105, // sd_IN -> Deva - 3031, 10, // sdb -> Arab - 3035, 10, // sdf -> Arab - 3039, 10, // sdg -> Arab - 3043, 10, // sdh -> Arab - 3047, 10, // sds -> Arab - 3051, 100, // sel -> Cyrl - 3055, 415, // sfm -> Plrd - 3059, 370, // sga -> Ogam - 3063, 100, // sgh -> Cyrl - 3067, 105, // sgj -> Deva - 3071, 10, // sgr -> Arab - 3075, 540, // sgt -> Tibt - 3079, 120, // sgw -> Ethi - 3083, 10, // sgy -> Arab - 3087, 10, // shd -> Arab - 3091, 525, // shi -> Tfng - 3095, 10, // shm -> Arab - 3099, 345, // shn -> Mymr - 3103, 10, // shu -> Arab - 3107, 10, // shv -> Arab - 3111, 460, // si -> Sinh - 3114, 100, // sia -> Cyrl - 3118, 540, // sip -> Tibt - 3122, 10, // siy -> Arab - 3126, 10, // siz -> Arab - 3130, 100, // sjd -> Cyrl - 3134, 105, // sjp -> Deva - 3138, 100, // sjt -> Cyrl - 3142, 535, // skb -> Thai - 3146, 105, // skj -> Deva - 3150, 10, // skr -> Arab - 3154, 10, // slq -> Arab - 3158, 580, // smh -> Yiii - 3162, 440, // smp -> Samr - 3166, 235, // smu -> Khmr - 3170, 10, // smy -> Arab - 3174, 515, // soa -> Tavt - 3178, 465, // sog -> Sogd - 3182, 105, // soi -> Deva - 3186, 535, // sou -> Thai - 3190, 540, // spt -> Tibt - 3194, 385, // spv -> Orya - 3198, 10, // sqo -> Arab - 3202, 260, // sqq -> Laoo - 3206, 10, // sqt -> Arab - 3210, 100, // sr -> Cyrl - 3213, 470, // srb -> Sora - 3217, 10, // srh -> Arab - 3221, 105, // srx -> Deva - 3225, 10, // srz -> Arab - 3229, 10, // ssh -> Arab - 3233, 260, // sss -> Laoo - 3237, 10, // sts -> Arab - 3241, 120, // stv -> Ethi - 3245, 100, // sty -> Cyrl - 3249, 105, // suz -> Deva - 3253, 125, // sva -> Geor - 3257, 10, // swb -> Arab - 3261, 170, // swi -> Hani - 3265, 105, // swv -> Deva - 3269, 480, // syc -> Syrc - 3273, 45, // syl -> Beng - 3277, 480, // syn -> Syrc - 3281, 480, // syr -> Syrc - 3285, 105, // syw -> Deva - 3289, 505, // ta -> Taml - 3292, 100, // tab -> Cyrl - 3296, 105, // taj -> Deva - 3300, 485, // tbk -> Tagb - 3304, 540, // tcn -> Tibt - 3308, 345, // tco -> Mymr - 3312, 505, // tcx -> Taml - 3316, 245, // tcy -> Knda - 3320, 525, // tda -> Tfng - 3324, 105, // tdb -> Deva - 3328, 495, // tdd -> Tale - 3332, 105, // tdg -> Deva - 3336, 105, // tdh -> Deva - 3340, 520, // te -> Telu - 3343, 205, // tes -> Java - 3347, 100, // tg -> Cyrl - 3350, 10, // tg_PK -> Arab - 3356, 105, // tge -> Deva - 3360, 540, // tgf -> Tibt - 3364, 535, // th -> Thai - 3367, 105, // the -> Deva - 3371, 105, // thf -> Deva - 3375, 495, // thi -> Tale - 3379, 105, // thl -> Deva - 3383, 535, // thm -> Thai - 3387, 105, // thq -> Deva - 3391, 105, // thr -> Deva - 3395, 105, // ths -> Deva - 3399, 120, // ti -> Ethi - 3402, 120, // tig -> Ethi - 3406, 105, // tij -> Deva - 3410, 100, // tin -> Cyrl - 3414, 345, // tjl -> Mymr - 3418, 10, // tjo -> Arab - 3422, 105, // tkb -> Deva - 3426, 10, // tks -> Arab - 3430, 105, // tkt -> Deva - 3434, 105, // tmk -> Deva - 3438, 480, // tmr -> Syrc - 3442, 60, // tnv -> Cakm - 3446, 10, // tov -> Arab - 3450, 235, // tpu -> Khmr - 3454, 10, // tra -> Arab - 3458, 185, // trg -> Hebr - 3462, 10, // trm -> Arab - 3466, 10, // trw -> Arab - 3470, 150, // tsd -> Grek - 3474, 105, // tsf -> Deva - 3478, 540, // tsj -> Tibt - 3482, 100, // tt -> Cyrl - 3485, 260, // tth -> Laoo - 3489, 260, // tto -> Laoo - 3493, 535, // tts -> Thai - 3497, 345, // tvn -> Mymr - 3501, 105, // twm -> Deva - 3505, 510, // txg -> Tang - 3509, 550, // txo -> Toto - 3513, 515, // tyr -> Tavt - 3517, 100, // tyv -> Cyrl - 3521, 100, // ude -> Cyrl - 3525, 320, // udg -> Mlym - 3529, 0, // udi -> Aghb - 3533, 100, // udm -> Cyrl - 3537, 10, // ug -> Arab - 3540, 100, // ug_KZ -> Cyrl - 3546, 100, // ug_MN -> Cyrl - 3552, 555, // uga -> Ugar - 3556, 100, // ugh -> Cyrl - 3560, 535, // ugo -> Thai - 3564, 100, // uk -> Cyrl - 3567, 385, // uki -> Orya - 3571, 100, // ulc -> Cyrl - 3575, 45, // unr -> Beng - 3579, 105, // unr_NP -> Deva - 3586, 45, // unx -> Beng - 3590, 10, // ur -> Arab - 3593, 535, // urk -> Thai - 3597, 10, // ush -> Arab - 3601, 150, // uum -> Grek - 3605, 10, // uz_AF -> Arab - 3611, 100, // uz_CN -> Cyrl - 3617, 10, // uzs -> Arab - 3621, 505, // vaa -> Taml - 3625, 10, // vaf -> Arab - 3629, 105, // vah -> Deva - 3633, 560, // vai -> Vaii - 3637, 105, // vas -> Deva - 3641, 105, // vav -> Deva - 3645, 105, // vay -> Deva - 3649, 10, // vgr -> Arab - 3653, 245, // vmd -> Knda - 3657, 10, // vmh -> Arab - 3661, 120, // wal -> Ethi - 3665, 10, // wbk -> Arab - 3669, 520, // wbq -> Telu - 3673, 105, // wbr -> Deva - 3677, 10, // wlo -> Arab - 3681, 105, // wme -> Deva - 3685, 10, // wne -> Arab - 3689, 10, // wni -> Arab - 3693, 130, // wsg -> Gong - 3697, 10, // wsv -> Arab - 3701, 105, // wtm -> Deva - 3705, 175, // wuu -> Hans - 3709, 100, // xal -> Cyrl - 3713, 120, // xan -> Ethi - 3717, 100, // xas -> Cyrl - 3721, 85, // xco -> Chrs - 3725, 70, // xcr -> Cari - 3729, 100, // xdq -> Cyrl - 3733, 10, // xhe -> Arab - 3737, 235, // xhm -> Khmr - 3741, 385, // xis -> Orya - 3745, 10, // xka -> Arab - 3749, 10, // xkc -> Arab - 3753, 10, // xkj -> Arab - 3757, 10, // xkp -> Arab - 3761, 285, // xlc -> Lyci - 3765, 290, // xld -> Lydi - 3769, 115, // xly -> Elym - 3773, 125, // xmf -> Geor - 3777, 300, // xmn -> Mani - 3781, 315, // xmr -> Merc - 3785, 350, // xna -> Narb - 3789, 105, // xnr -> Deva - 3793, 150, // xpg -> Grek - 3797, 370, // xpi -> Ogam - 3801, 100, // xpm -> Cyrl - 3805, 420, // xpr -> Prti - 3809, 100, // xrm -> Cyrl - 3813, 100, // xrn -> Cyrl - 3817, 445, // xsa -> Sarb - 3821, 105, // xsr -> Deva - 3825, 100, // xss -> Cyrl - 3829, 505, // xub -> Taml - 3833, 505, // xuj -> Taml - 3837, 200, // xve -> Ital - 3841, 10, // xvi -> Arab - 3845, 100, // xwo -> Cyrl - 3849, 305, // xzh -> Marc - 3853, 100, // yai -> Cyrl - 3857, 105, // ybh -> Deva - 3861, 105, // ybi -> Deva - 3865, 10, // ydg -> Arab - 3869, 320, // yea -> Mlym - 3873, 150, // yej -> Grek - 3877, 520, // yeu -> Telu - 3881, 415, // ygp -> Plrd - 3885, 185, // yhd -> Hebr - 3889, 185, // yi -> Hebr - 3892, 580, // yig -> Yiii - 3896, 185, // yih -> Hebr - 3900, 580, // yiv -> Yiii - 3904, 100, // ykg -> Cyrl - 3908, 415, // yna -> Plrd - 3912, 100, // ynk -> Cyrl - 3916, 210, // yoi -> Jpan - 3920, 535, // yoy -> Thai - 3924, 100, // yrk -> Cyrl - 3928, 580, // ysd -> Yiii - 3932, 580, // ysn -> Yiii - 3936, 580, // ysp -> Yiii - 3940, 100, // ysr -> Cyrl - 3944, 415, // ysy -> Plrd - 3948, 185, // yud -> Hebr - 3952, 180, // yue -> Hant - 3956, 175, // yue_CN -> Hans - 3963, 100, // yug -> Cyrl - 3967, 100, // yux -> Cyrl - 3971, 415, // ywq -> Plrd - 3975, 415, // ywu -> Plrd - 3979, 540, // zau -> Tibt - 3983, 10, // zba -> Arab - 3987, 170, // zch -> Hani - 3991, 10, // zdj -> Arab - 3995, 170, // zeh -> Hani - 3999, 525, // zen -> Tfng - 4003, 170, // zgb -> Hani - 4007, 525, // zgh -> Tfng - 4011, 170, // zgm -> Hani - 4015, 170, // zgn -> Hani - 4019, 175, // zh -> Hans - 4022, 180, // zh_AU -> Hant - 4028, 180, // zh_BN -> Hant - 4034, 180, // zh_GB -> Hant - 4040, 180, // zh_GF -> Hant - 4046, 180, // zh_HK -> Hant - 4052, 180, // zh_ID -> Hant - 4058, 180, // zh_MO -> Hant - 4064, 180, // zh_PA -> Hant - 4070, 180, // zh_PF -> Hant - 4076, 180, // zh_PH -> Hant - 4082, 180, // zh_SR -> Hant - 4088, 180, // zh_TH -> Hant - 4094, 180, // zh_TW -> Hant - 4100, 180, // zh_US -> Hant - 4106, 180, // zh_VN -> Hant - 4112, 170, // zhd -> Hani - 4116, 365, // zhx -> Nshu - 4120, 100, // zkb -> Cyrl - 4124, 100, // zko -> Cyrl - 4128, 240, // zkt -> Kits - 4132, 100, // zkz -> Cyrl - 4136, 170, // zlj -> Hani - 4140, 170, // zln -> Hani - 4144, 170, // zlq -> Hani - 4148, 170, // zqe -> Hani - 4152, 185, // zrp -> Hebr - 4156, 10, // zum -> Arab - 4160, 170, // zyg -> Hani - 4164, 170, // zyn -> Hani - 4168, 170, // zzj -> Hani + 19, 420, // abl -> Rjng + 23, 10, // abv -> Arab + 27, 10, // acm -> Arab + 31, 10, // acq -> Arab + 35, 10, // acw -> Arab + 39, 10, // acx -> Arab + 43, 10, // adf -> Arab + 47, 535, // adx -> Tibt + 51, 100, // ady -> Cyrl + 55, 25, // ae -> Avst + 58, 10, // aeb -> Arab + 62, 10, // aec -> Arab + 66, 10, // aee -> Arab + 70, 10, // aeq -> Arab + 74, 10, // afb -> Arab + 78, 105, // agi -> Deva + 82, 120, // agj -> Ethi + 86, 100, // agx -> Cyrl + 90, 120, // ahg -> Ethi + 94, 5, // aho -> Ahom + 98, 105, // ahr -> Deva + 102, 10, // aib -> Arab + 106, 185, // aij -> Hebr + 110, 220, // ain -> Kana + 114, 345, // aio -> Mymr + 118, 10, // aiq -> Arab + 122, 10, // ajp -> Arab + 126, 570, // akk -> Xsux + 130, 100, // akv -> Cyrl + 134, 260, // alk -> Laoo + 138, 320, // all -> Mlym + 142, 100, // alr -> Cyrl + 146, 100, // alt -> Cyrl + 150, 120, // alw -> Ethi + 154, 120, // am -> Ethi + 157, 210, // ams -> Jpan + 161, 475, // amw -> Syrc + 165, 100, // ani -> Cyrl + 169, 105, // anp -> Deva + 173, 105, // anr -> Deva + 177, 120, // anu -> Ethi + 181, 45, // aot -> Beng + 185, 10, // apc -> Arab + 189, 10, // apd -> Arab + 193, 105, // aph -> Deva + 197, 100, // aqc -> Cyrl + 201, 10, // ar -> Arab + 204, 15, // arc -> Armi + 208, 10, // arq -> Arab + 212, 10, // ars -> Arab + 216, 10, // ary -> Arab + 220, 10, // arz -> Arab + 224, 45, // as -> Beng + 227, 450, // ase -> Sgnw + 231, 10, // ask -> Arab + 235, 10, // atn -> Arab + 239, 100, // atv -> Cyrl + 243, 10, // auj -> Arab + 247, 10, // auz -> Arab + 251, 100, // av -> Cyrl + 254, 10, // avd -> Arab + 258, 10, // avl -> Arab + 262, 105, // awa -> Deva + 266, 120, // awn -> Ethi + 270, 20, // axm -> Armn + 274, 10, // ayh -> Arab + 278, 10, // ayl -> Arab + 282, 10, // ayn -> Arab + 286, 10, // ayp -> Arab + 290, 10, // az_IQ -> Arab + 296, 10, // az_IR -> Arab + 302, 100, // az_RU -> Cyrl + 308, 10, // azb -> Arab + 312, 100, // ba -> Cyrl + 315, 10, // bal -> Arab + 319, 105, // bap -> Deva + 323, 30, // bax -> Bamu + 327, 125, // bbl -> Geor + 331, 120, // bcq -> Ethi + 335, 385, // bdv -> Orya + 339, 10, // bdz -> Arab + 343, 100, // be -> Cyrl + 346, 105, // bee -> Deva + 350, 10, // bej -> Arab + 354, 105, // bfb -> Deva + 358, 500, // bfq -> Taml + 362, 10, // bft -> Arab + 366, 535, // bfu -> Tibt + 370, 385, // bfw -> Orya + 374, 105, // bfy -> Deva + 378, 105, // bfz -> Deva + 382, 100, // bg -> Cyrl + 385, 105, // bgc -> Deva + 389, 105, // bgd -> Deva + 393, 10, // bgn -> Arab + 397, 10, // bgp -> Arab + 401, 105, // bgq -> Deva + 405, 105, // bgw -> Deva + 409, 150, // bgx -> Grek + 413, 105, // bha -> Deva + 417, 105, // bhb -> Deva + 421, 105, // bhd -> Deva + 425, 10, // bhe -> Arab + 429, 100, // bhh -> Cyrl + 433, 105, // bhi -> Deva + 437, 105, // bhj -> Deva + 441, 10, // bhm -> Arab + 445, 475, // bhn -> Syrc + 449, 105, // bho -> Deva + 453, 485, // bht -> Takr + 457, 105, // bhu -> Deva + 461, 105, // biy -> Deva + 465, 475, // bjf -> Syrc + 469, 105, // bjj -> Deva + 473, 10, // bjm -> Arab + 477, 345, // blk -> Mymr + 481, 510, // blt -> Tavt + 485, 105, // bmj -> Deva + 489, 45, // bn -> Beng + 492, 105, // bns -> Deva + 496, 535, // bo -> Tibt + 499, 100, // bph -> Cyrl + 503, 105, // bpx -> Deva + 507, 45, // bpy -> Beng + 511, 10, // bqi -> Arab + 515, 105, // bra -> Deva + 519, 235, // brb -> Khmr + 523, 105, // brd -> Deva + 527, 10, // brh -> Arab + 531, 10, // brk -> Arab + 535, 260, // brv -> Laoo + 539, 105, // brx -> Deva + 543, 10, // bsh -> Arab + 547, 10, // bsk -> Arab + 551, 35, // bsq -> Bass + 555, 120, // bst -> Ethi + 559, 40, // btd -> Batk + 563, 40, // btm -> Batk + 567, 105, // btv -> Deva + 571, 100, // bua -> Cyrl + 575, 345, // bwe -> Mymr + 579, 100, // bxm -> Cyrl + 583, 330, // bxu -> Mong + 587, 105, // byh -> Deva + 591, 120, // byn -> Ethi + 595, 105, // byw -> Deva + 599, 530, // bzi -> Thai + 603, 530, // cbn -> Thai + 607, 60, // ccp -> Cakm + 611, 515, // cde -> Telu + 615, 105, // cdh -> Deva + 619, 155, // cdi -> Gujr + 623, 105, // cdj -> Deva + 627, 105, // cdm -> Deva + 631, 175, // cdo -> Hans + 635, 45, // cdz -> Beng + 639, 100, // ce -> Cyrl + 642, 535, // cgk -> Tibt + 646, 10, // chg -> Arab + 650, 100, // chm -> Cyrl + 654, 80, // chr -> Cher + 658, 105, // chx -> Deva + 662, 105, // cih -> Deva + 666, 10, // cja -> Arab + 670, 100, // cji -> Cyrl + 674, 75, // cjm -> Cham + 678, 175, // cjy -> Hans + 682, 10, // ckb -> Arab + 686, 100, // ckt -> Cyrl + 690, 10, // clh -> Arab + 694, 100, // clw -> Cyrl + 698, 470, // cmg -> Soyo + 702, 535, // cna -> Tibt + 706, 175, // cnp -> Hans + 710, 530, // cog -> Thai + 714, 90, // cop -> Copt + 718, 150, // cpg -> Grek + 722, 65, // cr -> Cans + 725, 100, // crh -> Cyrl + 729, 65, // crj -> Cans + 733, 65, // crk -> Cans + 737, 65, // crl -> Cans + 741, 65, // crm -> Cans + 745, 345, // csh -> Mymr + 749, 175, // csp -> Hans + 753, 65, // csw -> Cans + 757, 395, // ctd -> Pauc + 761, 45, // ctg -> Beng + 765, 105, // ctn -> Deva + 769, 500, // ctt -> Taml + 773, 100, // cu -> Cyrl + 776, 255, // cuu -> Lana + 780, 100, // cv -> Cyrl + 783, 175, // czh -> Hans + 787, 185, // czk -> Hebr + 791, 105, // daq -> Deva + 795, 100, // dar -> Cyrl + 799, 10, // dcc -> Arab + 803, 100, // ddo -> Cyrl + 807, 10, // def -> Arab + 811, 10, // deh -> Arab + 815, 45, // der -> Beng + 819, 105, // dhi -> Deva + 823, 155, // dhn -> Gujr + 827, 105, // dho -> Deva + 831, 105, // dhw -> Deva + 835, 535, // dka -> Tibt + 839, 100, // dlg -> Cyrl + 843, 310, // dmf -> Medf + 847, 10, // dmk -> Arab + 851, 10, // dml -> Arab + 855, 100, // dng -> Cyrl + 859, 345, // dnu -> Mymr + 863, 345, // dnv -> Mymr + 867, 105, // doi -> Deva + 871, 120, // dox -> Ethi + 875, 535, // dre -> Tibt + 879, 105, // drq -> Deva + 883, 120, // drs -> Ethi + 887, 105, // dry -> Deva + 891, 385, // dso -> Orya + 895, 105, // dty -> Deva + 899, 155, // dub -> Gujr + 903, 105, // duh -> Deva + 907, 105, // dus -> Deva + 911, 525, // dv -> Thaa + 914, 385, // dwk -> Orya + 918, 105, // dwz -> Deva + 922, 535, // dz -> Tibt + 925, 535, // dzl -> Tibt + 929, 150, // ecr -> Grek + 933, 95, // ecy -> Cprt + 937, 110, // egy -> Egyp + 941, 215, // eky -> Kali + 945, 150, // el -> Grek + 948, 105, // emg -> Deva + 952, 105, // emu -> Deva + 956, 100, // enf -> Cyrl + 960, 100, // enh -> Cyrl + 964, 500, // era -> Taml + 968, 135, // esg -> Gonm + 972, 10, // esh -> Arab + 976, 200, // ett -> Ital + 980, 100, // eve -> Cyrl + 984, 100, // evn -> Cyrl + 988, 10, // fa -> Arab + 991, 10, // fay -> Arab + 995, 10, // faz -> Arab + 999, 10, // fia -> Arab + 1003, 105, // fmu -> Deva + 1007, 10, // fub -> Arab + 1011, 175, // gan -> Hans + 1015, 385, // gaq -> Orya + 1019, 155, // gas -> Gujr + 1023, 515, // gau -> Telu + 1027, 385, // gbj -> Orya + 1031, 105, // gbk -> Deva + 1035, 155, // gbl -> Gujr + 1039, 105, // gbm -> Deva + 1043, 10, // gbz -> Arab + 1047, 385, // gdb -> Orya + 1051, 100, // gdo -> Cyrl + 1055, 105, // gdx -> Deva + 1059, 120, // gez -> Ethi + 1063, 10, // ggg -> Arab + 1067, 10, // gha -> Arab + 1071, 105, // ghe -> Deva + 1075, 10, // ghr -> Arab + 1079, 535, // ght -> Tibt + 1083, 10, // gig -> Arab + 1087, 100, // gin -> Cyrl + 1091, 10, // gjk -> Arab + 1095, 10, // gju -> Arab + 1099, 100, // gld -> Cyrl + 1103, 10, // glh -> Arab + 1107, 10, // glk -> Arab + 1111, 120, // gmv -> Ethi + 1115, 275, // gmy -> Linb + 1119, 535, // goe -> Tibt + 1123, 120, // gof -> Ethi + 1127, 105, // gok -> Deva + 1131, 105, // gom -> Deva + 1135, 515, // gon -> Telu + 1139, 140, // got -> Goth + 1143, 105, // gra -> Deva + 1147, 95, // grc -> Cprt + 1151, 45, // grt -> Beng + 1155, 120, // gru -> Ethi + 1159, 155, // gu -> Gujr + 1162, 105, // gvr -> Deva + 1166, 10, // gwc -> Arab + 1170, 10, // gwf -> Arab + 1174, 10, // gwt -> Arab + 1178, 105, // gyo -> Deva + 1182, 10, // gzi -> Arab + 1186, 10, // ha_CM -> Arab + 1192, 10, // ha_SD -> Arab + 1198, 10, // hac -> Arab + 1202, 175, // hak -> Hans + 1206, 120, // har -> Ethi + 1210, 10, // haz -> Arab + 1214, 185, // hbo -> Hebr + 1218, 120, // hdy -> Ethi + 1222, 185, // he -> Hebr + 1225, 105, // hi -> Deva + 1228, 485, // hii -> Takr + 1232, 570, // hit -> Xsux + 1236, 10, // hkh -> Arab + 1240, 105, // hlb -> Deva + 1244, 190, // hlu -> Hluw + 1248, 410, // hmd -> Plrd + 1252, 50, // hmj -> Bopo + 1256, 50, // hmq -> Bopo + 1260, 10, // hnd -> Arab + 1264, 105, // hne -> Deva + 1268, 195, // hnj -> Hmnp + 1272, 260, // hnj_AU -> Laoo + 1279, 260, // hnj_CN -> Laoo + 1286, 260, // hnj_FR -> Laoo + 1293, 260, // hnj_GF -> Laoo + 1300, 260, // hnj_LA -> Laoo + 1307, 260, // hnj_MM -> Laoo + 1314, 260, // hnj_SR -> Laoo + 1321, 260, // hnj_TH -> Laoo + 1328, 260, // hnj_VN -> Laoo + 1335, 10, // hno -> Arab + 1339, 105, // hoc -> Deva + 1343, 10, // hoh -> Arab + 1347, 105, // hoj -> Deva + 1351, 170, // how -> Hani + 1355, 105, // hoy -> Deva + 1359, 345, // hpo -> Mymr + 1363, 475, // hrt -> Syrc + 1367, 10, // hrz -> Arab + 1371, 175, // hsn -> Hans + 1375, 10, // hss -> Arab + 1379, 570, // htx -> Xsux + 1383, 105, // hut -> Deva + 1387, 185, // huy -> Hebr + 1391, 100, // huz -> Cyrl + 1395, 20, // hy -> Armn + 1398, 20, // hyw -> Armn + 1402, 575, // ii -> Yiii + 1405, 285, // imy -> Lyci + 1409, 100, // inh -> Cyrl + 1413, 345, // int -> Mymr + 1417, 120, // ior -> Ethi + 1421, 500, // iru -> Taml + 1425, 10, // isk -> Arab + 1429, 185, // itk -> Hebr + 1433, 100, // itl -> Cyrl + 1437, 65, // iu -> Cans + 1440, 185, // iw -> Hebr + 1443, 210, // ja -> Jpan + 1446, 10, // jad -> Arab + 1450, 10, // jat -> Arab + 1454, 185, // jbe -> Hebr + 1458, 10, // jbn -> Arab + 1462, 100, // jct -> Cyrl + 1466, 535, // jda -> Tibt + 1470, 10, // jdg -> Arab + 1474, 100, // jdt -> Cyrl + 1478, 105, // jee -> Deva + 1482, 125, // jge -> Geor + 1486, 185, // ji -> Hebr + 1489, 165, // jje -> Hang + 1493, 345, // jkm -> Mymr + 1497, 105, // jml -> Deva + 1501, 485, // jna -> Takr + 1505, 10, // jnd -> Arab + 1509, 105, // jnl -> Deva + 1513, 105, // jns -> Deva + 1517, 10, // jog -> Arab + 1521, 185, // jpa -> Hebr + 1525, 185, // jpr -> Hebr + 1529, 185, // jrb -> Hebr + 1533, 10, // jrb_MA -> Arab + 1540, 105, // jul -> Deva + 1544, 385, // jun -> Orya + 1548, 385, // juy -> Orya + 1552, 535, // jya -> Tibt + 1556, 185, // jye -> Hebr + 1560, 125, // ka -> Geor + 1563, 100, // kaa -> Cyrl + 1567, 100, // kap -> Cyrl + 1571, 225, // kaw -> Kawi + 1575, 100, // kbd -> Cyrl + 1579, 10, // kbu -> Arab + 1583, 10, // kby -> Arab + 1587, 100, // kca -> Cyrl + 1591, 45, // kdq -> Beng + 1595, 530, // kdt -> Thai + 1599, 100, // ket -> Cyrl + 1603, 105, // kex -> Deva + 1607, 515, // key -> Telu + 1611, 245, // kfa -> Knda + 1615, 105, // kfb -> Deva + 1619, 515, // kfc -> Telu + 1623, 245, // kfd -> Knda + 1627, 500, // kfe -> Taml + 1631, 320, // kfh -> Mlym + 1635, 500, // kfi -> Taml + 1639, 105, // kfk -> Deva + 1643, 10, // kfm -> Arab + 1647, 105, // kfp -> Deva + 1651, 105, // kfq -> Deva + 1655, 105, // kfr -> Deva + 1659, 105, // kfs -> Deva + 1663, 105, // kfx -> Deva + 1667, 105, // kfy -> Deva + 1671, 105, // kgj -> Deva + 1675, 105, // kgy -> Deva + 1679, 495, // khb -> Talu + 1683, 530, // khf -> Thai + 1687, 535, // khg -> Tibt + 1691, 105, // khn -> Deva + 1695, 345, // kht -> Mymr + 1699, 100, // khv -> Cyrl + 1703, 10, // khw -> Arab + 1707, 105, // kif -> Deva + 1711, 100, // kim -> Cyrl + 1715, 105, // kip -> Deva + 1719, 260, // kjg -> Laoo + 1723, 100, // kjh -> Cyrl + 1727, 105, // kjl -> Deva + 1731, 105, // kjo -> Deva + 1735, 345, // kjp -> Mymr + 1739, 530, // kjt -> Thai + 1743, 100, // kk -> Cyrl + 1746, 10, // kk_AF -> Arab + 1752, 10, // kk_CN -> Arab + 1758, 10, // kk_IR -> Arab + 1764, 10, // kk_MN -> Arab + 1770, 535, // kkf -> Tibt + 1774, 255, // kkh -> Lana + 1778, 105, // kkt -> Deva + 1782, 105, // kle -> Deva + 1786, 10, // klj -> Arab + 1790, 105, // klr -> Deva + 1794, 235, // km -> Khmr + 1797, 105, // kmj -> Deva + 1801, 10, // kmz -> Arab + 1805, 245, // kn -> Knda + 1808, 250, // ko -> Kore + 1811, 100, // koi -> Cyrl + 1815, 105, // kok -> Deva + 1819, 100, // kpt -> Cyrl + 1823, 100, // kpy -> Cyrl + 1827, 475, // kqd -> Syrc + 1831, 120, // kqy -> Ethi + 1835, 105, // kra -> Deva + 1839, 100, // krc -> Cyrl + 1843, 100, // krk -> Cyrl + 1847, 235, // krr -> Khmr + 1851, 105, // kru -> Deva + 1855, 235, // krv -> Khmr + 1859, 10, // ks -> Arab + 1862, 345, // ksu -> Mymr + 1866, 345, // ksw -> Mymr + 1870, 105, // ksz -> Deva + 1874, 120, // ktb -> Ethi + 1878, 10, // ktl -> Arab + 1882, 410, // ktp -> Plrd + 1886, 10, // ku_LB -> Arab + 1892, 260, // kuf -> Laoo + 1896, 100, // kum -> Cyrl + 1900, 100, // kv -> Cyrl + 1903, 100, // kva -> Cyrl + 1907, 345, // kvq -> Mymr + 1911, 345, // kvt -> Mymr + 1915, 10, // kvx -> Arab + 1919, 215, // kvy -> Kali + 1923, 345, // kxf -> Mymr + 1927, 345, // kxk -> Mymr + 1931, 530, // kxm -> Thai + 1935, 10, // kxp -> Arab + 1939, 100, // ky -> Cyrl + 1942, 10, // ky_CN -> Arab + 1948, 215, // kyu -> Kali + 1952, 105, // kyv -> Deva + 1956, 105, // kyw -> Deva + 1960, 270, // lab -> Lina + 1964, 185, // lad -> Hebr + 1968, 105, // lae -> Deva + 1972, 10, // lah -> Arab + 1976, 280, // lbc -> Lisu + 1980, 100, // lbe -> Cyrl + 1984, 105, // lbf -> Deva + 1988, 535, // lbj -> Tibt + 1992, 105, // lbm -> Deva + 1996, 260, // lbo -> Laoo + 2000, 105, // lbr -> Deva + 2004, 530, // lcp -> Thai + 2008, 265, // lep -> Lepc + 2012, 100, // lez -> Cyrl + 2016, 105, // lhm -> Deva + 2020, 475, // lhs -> Syrc + 2024, 105, // lif -> Deva + 2028, 280, // lis -> Lisu + 2032, 535, // lkh -> Tibt + 2036, 10, // lki -> Arab + 2040, 105, // lmh -> Deva + 2044, 515, // lmn -> Telu + 2048, 260, // lo -> Laoo + 2051, 105, // loy -> Deva + 2055, 410, // lpo -> Plrd + 2059, 10, // lrc -> Arab + 2063, 10, // lrk -> Arab + 2067, 10, // lrl -> Arab + 2071, 10, // lsa -> Arab + 2075, 185, // lsd -> Hebr + 2079, 10, // lss -> Arab + 2083, 535, // luk -> Tibt + 2087, 105, // luu -> Deva + 2091, 10, // luv -> Arab + 2095, 10, // luz -> Arab + 2099, 530, // lwl -> Thai + 2103, 530, // lwm -> Thai + 2107, 535, // lya -> Tibt + 2111, 175, // lzh -> Hans + 2115, 105, // mag -> Deva + 2119, 105, // mai -> Deva + 2123, 360, // man_GN -> Nkoo + 2130, 10, // mby -> Arab + 2134, 10, // mde -> Arab + 2138, 100, // mdf -> Cyrl + 2142, 120, // mdx -> Ethi + 2146, 120, // mdy -> Ethi + 2150, 10, // mfa -> Arab + 2154, 10, // mfi -> Arab + 2158, 105, // mgp -> Deva + 2162, 10, // mhj -> Arab + 2166, 295, // mid -> Mand + 2170, 105, // mjl -> Deva + 2174, 320, // mjq -> Mlym + 2178, 320, // mjr -> Mlym + 2182, 105, // mjt -> Deva + 2186, 515, // mju -> Telu + 2190, 320, // mjv -> Mlym + 2194, 105, // mjz -> Deva + 2198, 100, // mk -> Cyrl + 2201, 105, // mkb -> Deva + 2205, 105, // mke -> Deva + 2209, 10, // mki -> Arab + 2213, 530, // mkm -> Thai + 2217, 320, // ml -> Mlym + 2220, 530, // mlf -> Thai + 2224, 100, // mn -> Cyrl + 2227, 330, // mn_CN -> Mong + 2233, 45, // mni -> Beng + 2237, 10, // mnj -> Arab + 2241, 100, // mns -> Cyrl + 2245, 345, // mnw -> Mymr + 2249, 530, // mpz -> Thai + 2253, 105, // mr -> Deva + 2256, 530, // mra -> Thai + 2260, 105, // mrd -> Deva + 2264, 100, // mrj -> Cyrl + 2268, 335, // mro -> Mroo + 2272, 105, // mrr -> Deva + 2276, 10, // ms_CC -> Arab + 2282, 100, // mtm -> Cyrl + 2286, 105, // mtr -> Deva + 2290, 100, // mud -> Cyrl + 2294, 535, // muk -> Tibt + 2298, 105, // mut -> Deva + 2302, 500, // muv -> Taml + 2306, 120, // muz -> Ethi + 2310, 330, // mvf -> Mong + 2314, 10, // mvy -> Arab + 2318, 120, // mvz -> Ethi + 2322, 105, // mwr -> Deva + 2326, 345, // mwt -> Mymr + 2330, 195, // mww -> Hmnp + 2334, 345, // my -> Mymr + 2337, 120, // mym -> Ethi + 2341, 100, // myv -> Cyrl + 2345, 295, // myz -> Mand + 2349, 10, // mzn -> Arab + 2353, 175, // nan -> Hans + 2357, 105, // nao -> Deva + 2361, 105, // ncd -> Deva + 2365, 260, // ncq -> Laoo + 2369, 100, // ndf -> Cyrl + 2373, 105, // ne -> Deva + 2376, 100, // neg -> Cyrl + 2380, 535, // neh -> Tibt + 2384, 570, // nei -> Xsux + 2388, 105, // new -> Deva + 2392, 260, // ngt -> Laoo + 2396, 100, // nio -> Cyrl + 2400, 515, // nit -> Telu + 2404, 100, // niv -> Cyrl + 2408, 10, // nli -> Arab + 2412, 10, // nlm -> Arab + 2416, 105, // nlx -> Deva + 2420, 105, // nmm -> Deva + 2424, 560, // nnp -> Wcho + 2428, 255, // nod -> Lana + 2432, 105, // noe -> Deva + 2436, 100, // nog -> Cyrl + 2440, 105, // noi -> Deva + 2444, 430, // non -> Runr + 2448, 575, // nos -> Yiii + 2452, 535, // npb -> Tibt + 2456, 360, // nqo -> Nkoo + 2460, 575, // nsd -> Yiii + 2464, 575, // nsf -> Yiii + 2468, 65, // nsk -> Cans + 2472, 540, // nst -> Tnsa + 2476, 575, // nsv -> Yiii + 2480, 575, // nty -> Yiii + 2484, 10, // ntz -> Arab + 2488, 355, // nwc -> Newa + 2492, 105, // nwx -> Deva + 2496, 530, // nyl -> Thai + 2500, 10, // nyq -> Arab + 2504, 100, // oaa -> Cyrl + 2508, 100, // oac -> Cyrl + 2512, 475, // oar -> Syrc + 2516, 125, // oav -> Geor + 2520, 405, // obm -> Phnx + 2524, 345, // obr -> Mymr + 2528, 10, // odk -> Arab + 2532, 570, // oht -> Xsux + 2536, 65, // oj -> Cans + 2539, 65, // ojs -> Cans + 2543, 165, // okm -> Hang + 2547, 170, // oko -> Hani + 2551, 235, // okz -> Khmr + 2555, 105, // ola -> Deva + 2559, 535, // ole -> Tibt + 2563, 100, // omk -> Cyrl + 2567, 340, // omp -> Mtei + 2571, 325, // omr -> Modi + 2575, 105, // oon -> Deva + 2579, 385, // or -> Orya + 2582, 515, // ort -> Telu + 2586, 10, // oru -> Arab + 2590, 100, // orv -> Cyrl + 2594, 100, // os -> Cyrl + 2597, 390, // osa -> Osge + 2601, 200, // osc -> Ital + 2605, 205, // osi -> Java + 2609, 10, // ota -> Arab + 2613, 535, // otb -> Tibt + 2617, 380, // otk -> Orkh + 2621, 145, // oty -> Gran + 2625, 160, // pa -> Guru + 2628, 10, // pa_PK -> Arab + 2634, 400, // pal -> Phli + 2638, 100, // paq -> Cyrl + 2642, 10, // pbt -> Arab + 2646, 235, // pcb -> Khmr + 2650, 345, // pce -> Mymr + 2654, 320, // pcf -> Mlym + 2658, 320, // pcg -> Mlym + 2662, 105, // pch -> Deva + 2666, 105, // pci -> Deva + 2670, 515, // pcj -> Telu + 2674, 385, // peg -> Orya + 2678, 565, // peo -> Xpeo + 2682, 230, // pgd -> Khar + 2686, 105, // pgg -> Deva + 2690, 370, // pgl -> Ogam + 2694, 200, // pgn -> Ital + 2698, 105, // phd -> Deva + 2702, 345, // phk -> Mymr + 2706, 10, // phl -> Arab + 2710, 405, // phn -> Phnx + 2714, 260, // pho -> Laoo + 2718, 10, // phr -> Arab + 2722, 530, // pht -> Thai + 2726, 10, // phv -> Arab + 2730, 105, // phw -> Deva + 2734, 455, // pi -> Sinh + 2737, 55, // pka -> Brah + 2741, 320, // pkr -> Mlym + 2745, 10, // plk -> Arab + 2749, 345, // pll -> Mymr + 2753, 55, // pmh -> Brah + 2757, 150, // pnt -> Grek + 2761, 230, // pra -> Khar + 2765, 10, // prc -> Arab + 2769, 10, // prd -> Arab + 2773, 155, // prp -> Gujr + 2777, 530, // prt -> Thai + 2781, 10, // prx -> Arab + 2785, 10, // ps -> Arab + 2788, 10, // psh -> Arab + 2792, 10, // psi -> Arab + 2796, 10, // pst -> Arab + 2800, 105, // pum -> Deva + 2804, 345, // pwo -> Mymr + 2808, 105, // pwr -> Deva + 2812, 530, // pww -> Thai + 2816, 345, // pyx -> Mymr + 2820, 10, // qxq -> Arab + 2824, 105, // raa -> Deva + 2828, 105, // rab -> Deva + 2832, 105, // raf -> Deva + 2836, 45, // rah -> Beng + 2840, 105, // raj -> Deva + 2844, 105, // rav -> Deva + 2848, 345, // rbb -> Mymr + 2852, 10, // rdb -> Arab + 2856, 385, // rei -> Orya + 2860, 425, // rhg -> Rohg + 2864, 105, // rji -> Deva + 2868, 105, // rjs -> Deva + 2872, 235, // rka -> Khmr + 2876, 345, // rki -> Mymr + 2880, 45, // rkt -> Beng + 2884, 20, // rmi -> Armn + 2888, 10, // rmt -> Arab + 2892, 345, // rmz -> Mymr + 2896, 100, // rom_BG -> Cyrl + 2903, 100, // rsk -> Cyrl + 2907, 105, // rtw -> Deva + 2911, 100, // ru -> Cyrl + 2914, 100, // rue -> Cyrl + 2918, 100, // rut -> Cyrl + 2922, 105, // rwr -> Deva + 2926, 220, // ryu -> Kana + 2930, 105, // sa -> Deva + 2933, 100, // sah -> Cyrl + 2937, 435, // sam -> Samr + 2941, 375, // sat -> Olck + 2945, 445, // saz -> Saur + 2949, 10, // sbn -> Arab + 2953, 535, // sbu -> Tibt + 2957, 105, // sck -> Deva + 2961, 10, // scl -> Arab + 2965, 10, // scl_IN -> Arab + 2972, 105, // scp -> Deva + 2976, 260, // sct -> Laoo + 2980, 485, // scu -> Takr + 2984, 150, // scx -> Grek + 2988, 10, // sd -> Arab + 2991, 105, // sd_IN -> Deva + 2997, 10, // sdb -> Arab + 3001, 10, // sdf -> Arab + 3005, 10, // sdg -> Arab + 3009, 10, // sdh -> Arab + 3013, 10, // sds -> Arab + 3017, 100, // sel -> Cyrl + 3021, 410, // sfm -> Plrd + 3025, 370, // sga -> Ogam + 3029, 100, // sgh -> Cyrl + 3033, 105, // sgj -> Deva + 3037, 10, // sgr -> Arab + 3041, 535, // sgt -> Tibt + 3045, 120, // sgw -> Ethi + 3049, 10, // sgy -> Arab + 3053, 10, // shd -> Arab + 3057, 520, // shi -> Tfng + 3061, 10, // shm -> Arab + 3065, 345, // shn -> Mymr + 3069, 10, // shu -> Arab + 3073, 10, // shv -> Arab + 3077, 455, // si -> Sinh + 3080, 100, // sia -> Cyrl + 3084, 535, // sip -> Tibt + 3088, 10, // siy -> Arab + 3092, 10, // siz -> Arab + 3096, 100, // sjd -> Cyrl + 3100, 105, // sjp -> Deva + 3104, 100, // sjt -> Cyrl + 3108, 530, // skb -> Thai + 3112, 105, // skj -> Deva + 3116, 10, // skr -> Arab + 3120, 10, // slq -> Arab + 3124, 575, // smh -> Yiii + 3128, 435, // smp -> Samr + 3132, 235, // smu -> Khmr + 3136, 10, // smy -> Arab + 3140, 510, // soa -> Tavt + 3144, 460, // sog -> Sogd + 3148, 105, // soi -> Deva + 3152, 530, // sou -> Thai + 3156, 535, // spt -> Tibt + 3160, 385, // spv -> Orya + 3164, 10, // sqo -> Arab + 3168, 260, // sqq -> Laoo + 3172, 10, // sqt -> Arab + 3176, 100, // sr -> Cyrl + 3179, 465, // srb -> Sora + 3183, 10, // srh -> Arab + 3187, 105, // srx -> Deva + 3191, 10, // srz -> Arab + 3195, 10, // ssh -> Arab + 3199, 260, // sss -> Laoo + 3203, 10, // sts -> Arab + 3207, 120, // stv -> Ethi + 3211, 100, // sty -> Cyrl + 3215, 105, // suz -> Deva + 3219, 125, // sva -> Geor + 3223, 10, // swb -> Arab + 3227, 170, // swi -> Hani + 3231, 105, // swv -> Deva + 3235, 475, // syc -> Syrc + 3239, 45, // syl -> Beng + 3243, 475, // syn -> Syrc + 3247, 475, // syr -> Syrc + 3251, 105, // syw -> Deva + 3255, 500, // ta -> Taml + 3258, 100, // tab -> Cyrl + 3262, 105, // taj -> Deva + 3266, 480, // tbk -> Tagb + 3270, 535, // tcn -> Tibt + 3274, 345, // tco -> Mymr + 3278, 500, // tcx -> Taml + 3282, 245, // tcy -> Knda + 3286, 520, // tda -> Tfng + 3290, 105, // tdb -> Deva + 3294, 490, // tdd -> Tale + 3298, 105, // tdg -> Deva + 3302, 105, // tdh -> Deva + 3306, 515, // te -> Telu + 3309, 205, // tes -> Java + 3313, 100, // tg -> Cyrl + 3316, 10, // tg_PK -> Arab + 3322, 105, // tge -> Deva + 3326, 535, // tgf -> Tibt + 3330, 530, // th -> Thai + 3333, 105, // the -> Deva + 3337, 105, // thf -> Deva + 3341, 490, // thi -> Tale + 3345, 105, // thl -> Deva + 3349, 530, // thm -> Thai + 3353, 105, // thq -> Deva + 3357, 105, // thr -> Deva + 3361, 105, // ths -> Deva + 3365, 120, // ti -> Ethi + 3368, 120, // tig -> Ethi + 3372, 105, // tij -> Deva + 3376, 100, // tin -> Cyrl + 3380, 345, // tjl -> Mymr + 3384, 10, // tjo -> Arab + 3388, 105, // tkb -> Deva + 3392, 10, // tks -> Arab + 3396, 105, // tkt -> Deva + 3400, 105, // tmk -> Deva + 3404, 475, // tmr -> Syrc + 3408, 60, // tnv -> Cakm + 3412, 10, // tov -> Arab + 3416, 235, // tpu -> Khmr + 3420, 10, // tra -> Arab + 3424, 185, // trg -> Hebr + 3428, 10, // trm -> Arab + 3432, 10, // trw -> Arab + 3436, 150, // tsd -> Grek + 3440, 535, // tsj -> Tibt + 3444, 100, // tt -> Cyrl + 3447, 260, // tth -> Laoo + 3451, 260, // tto -> Laoo + 3455, 530, // tts -> Thai + 3459, 345, // tvn -> Mymr + 3463, 105, // twm -> Deva + 3467, 505, // txg -> Tang + 3471, 545, // txo -> Toto + 3475, 510, // tyr -> Tavt + 3479, 100, // tyv -> Cyrl + 3483, 100, // ude -> Cyrl + 3487, 320, // udg -> Mlym + 3491, 0, // udi -> Aghb + 3495, 100, // udm -> Cyrl + 3499, 10, // ug -> Arab + 3502, 100, // ug_KZ -> Cyrl + 3508, 100, // ug_MN -> Cyrl + 3514, 550, // uga -> Ugar + 3518, 100, // ugh -> Cyrl + 3522, 530, // ugo -> Thai + 3526, 100, // uk -> Cyrl + 3529, 385, // uki -> Orya + 3533, 100, // ulc -> Cyrl + 3537, 45, // unr -> Beng + 3541, 105, // unr_NP -> Deva + 3548, 45, // unx -> Beng + 3552, 10, // ur -> Arab + 3555, 530, // urk -> Thai + 3559, 10, // ush -> Arab + 3563, 150, // uum -> Grek + 3567, 10, // uz_AF -> Arab + 3573, 100, // uz_CN -> Cyrl + 3579, 10, // uzs -> Arab + 3583, 500, // vaa -> Taml + 3587, 10, // vaf -> Arab + 3591, 105, // vah -> Deva + 3595, 555, // vai -> Vaii + 3599, 105, // vas -> Deva + 3603, 105, // vav -> Deva + 3607, 105, // vay -> Deva + 3611, 10, // vgr -> Arab + 3615, 245, // vmd -> Knda + 3619, 10, // vmh -> Arab + 3623, 120, // wal -> Ethi + 3627, 10, // wbk -> Arab + 3631, 515, // wbq -> Telu + 3635, 105, // wbr -> Deva + 3639, 10, // wlo -> Arab + 3643, 105, // wme -> Deva + 3647, 10, // wne -> Arab + 3651, 10, // wni -> Arab + 3655, 130, // wsg -> Gong + 3659, 10, // wsv -> Arab + 3663, 105, // wtm -> Deva + 3667, 175, // wuu -> Hans + 3671, 100, // xal -> Cyrl + 3675, 120, // xan -> Ethi + 3679, 100, // xas -> Cyrl + 3683, 85, // xco -> Chrs + 3687, 70, // xcr -> Cari + 3691, 100, // xdq -> Cyrl + 3695, 10, // xhe -> Arab + 3699, 235, // xhm -> Khmr + 3703, 385, // xis -> Orya + 3707, 10, // xka -> Arab + 3711, 10, // xkc -> Arab + 3715, 10, // xkj -> Arab + 3719, 10, // xkp -> Arab + 3723, 285, // xlc -> Lyci + 3727, 290, // xld -> Lydi + 3731, 115, // xly -> Elym + 3735, 125, // xmf -> Geor + 3739, 300, // xmn -> Mani + 3743, 315, // xmr -> Merc + 3747, 350, // xna -> Narb + 3751, 105, // xnr -> Deva + 3755, 150, // xpg -> Grek + 3759, 370, // xpi -> Ogam + 3763, 100, // xpm -> Cyrl + 3767, 415, // xpr -> Prti + 3771, 100, // xrm -> Cyrl + 3775, 100, // xrn -> Cyrl + 3779, 440, // xsa -> Sarb + 3783, 105, // xsr -> Deva + 3787, 100, // xss -> Cyrl + 3791, 500, // xub -> Taml + 3795, 500, // xuj -> Taml + 3799, 200, // xve -> Ital + 3803, 10, // xvi -> Arab + 3807, 100, // xwo -> Cyrl + 3811, 305, // xzh -> Marc + 3815, 100, // yai -> Cyrl + 3819, 105, // ybh -> Deva + 3823, 105, // ybi -> Deva + 3827, 10, // ydg -> Arab + 3831, 320, // yea -> Mlym + 3835, 150, // yej -> Grek + 3839, 515, // yeu -> Telu + 3843, 410, // ygp -> Plrd + 3847, 185, // yhd -> Hebr + 3851, 185, // yi -> Hebr + 3854, 575, // yig -> Yiii + 3858, 185, // yih -> Hebr + 3862, 575, // yiv -> Yiii + 3866, 100, // ykg -> Cyrl + 3870, 410, // yna -> Plrd + 3874, 100, // ynk -> Cyrl + 3878, 210, // yoi -> Jpan + 3882, 530, // yoy -> Thai + 3886, 100, // yrk -> Cyrl + 3890, 575, // ysd -> Yiii + 3894, 575, // ysn -> Yiii + 3898, 575, // ysp -> Yiii + 3902, 100, // ysr -> Cyrl + 3906, 410, // ysy -> Plrd + 3910, 185, // yud -> Hebr + 3914, 180, // yue -> Hant + 3918, 175, // yue_CN -> Hans + 3925, 100, // yug -> Cyrl + 3929, 100, // yux -> Cyrl + 3933, 410, // ywq -> Plrd + 3937, 410, // ywu -> Plrd + 3941, 535, // zau -> Tibt + 3945, 10, // zba -> Arab + 3949, 170, // zch -> Hani + 3953, 10, // zdj -> Arab + 3957, 170, // zeh -> Hani + 3961, 520, // zen -> Tfng + 3965, 170, // zgb -> Hani + 3969, 520, // zgh -> Tfng + 3973, 170, // zgm -> Hani + 3977, 170, // zgn -> Hani + 3981, 175, // zh -> Hans + 3984, 180, // zh_AU -> Hant + 3990, 180, // zh_BN -> Hant + 3996, 180, // zh_GB -> Hant + 4002, 180, // zh_GF -> Hant + 4008, 180, // zh_HK -> Hant + 4014, 180, // zh_ID -> Hant + 4020, 180, // zh_MO -> Hant + 4026, 180, // zh_PA -> Hant + 4032, 180, // zh_PF -> Hant + 4038, 180, // zh_PH -> Hant + 4044, 180, // zh_SR -> Hant + 4050, 180, // zh_TH -> Hant + 4056, 180, // zh_TW -> Hant + 4062, 180, // zh_US -> Hant + 4068, 180, // zh_VN -> Hant + 4074, 170, // zhd -> Hani + 4078, 365, // zhx -> Nshu + 4082, 100, // zkb -> Cyrl + 4086, 100, // zko -> Cyrl + 4090, 240, // zkt -> Kits + 4094, 100, // zkz -> Cyrl + 4098, 170, // zlj -> Hani + 4102, 170, // zln -> Hani + 4106, 170, // zlq -> Hani + 4110, 170, // zqe -> Hani + 4114, 185, // zrp -> Hebr + 4118, 10, // zum -> Arab + 4122, 170, // zyg -> Hani + 4126, 170, // zyn -> Hani + 4130, 170, // zzj -> Hani }; //====================================================================== @@ -1150,35 +1138,36 @@ const char parentLocaleChars[] = "en_AU\0en_BB\0en_BE\0en_BM\0en_BS\0en_BW\0en_BZ\0en_CC\0en_CH\0" "en_CK\0en_CM\0en_CX\0en_CY\0en_DE\0en_DG\0en_DK\0en_DM\0en_Dsrt\0" "en_ER\0en_FI\0en_FJ\0en_FK\0en_FM\0en_GB\0en_GD\0en_GG\0en_GH\0" - "en_GI\0en_GM\0en_GY\0en_HK\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0" - "en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0" - "en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0en_NA\0" - "en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0en_PN\0" - "en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0en_SI\0" - "en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0en_TT\0" - "en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0en_ZM\0" - "en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0es_CR\0" - "es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_MX\0es_NI\0es_PA\0es_PE\0" - "es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0" - "ha_Arab\0hi_Latn\0ht\0iu_Latn\0kk_Arab\0ks_Deva\0ku_Arab\0ky_Arab\0" - "ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0" - "pa_Arab\0pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0" - "pt_MZ\0pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0" - "sd_Sind\0shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0" - "uz_Arab\0uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0" - "zh_Hant_HK\0zh_Hant_MO\0"; + "en_GI\0en_GM\0en_GY\0en_HK\0en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0" + "en_IO\0en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0" + "en_LS\0en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0" + "en_NA\0en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0" + "en_PN\0en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0" + "en_SI\0en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0" + "en_TT\0en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0" + "en_ZM\0en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0" + "es_CR\0es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0es_MX\0es_NI\0" + "es_PA\0es_PE\0es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0" + "ff_Arab\0fr_HT\0ha_Arab\0hi_Latn\0ht\0iu_Latn\0kk_Arab\0ks_Deva\0" + "ku_Arab\0kxv_Deva\0kxv_Orya\0kxv_Telu\0ky_Arab\0ky_Latn\0ml_Arab\0" + "mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0pa_Arab\0pt_AO\0" + "pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0pt_MZ\0pt_PT\0" + "pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0sd_Sind\0shi_Latn\0" + "so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0uz_Arab\0uz_Cyrl\0" + "vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0zh_Hant_HK\0zh_Hant_MO\0" + ""; const int32_t parentLocaleTable[] = { - 0, 1023, // az_Arab -> root - 8, 1023, // az_Cyrl -> root - 16, 1023, // bal_Latn -> root - 25, 1023, // blt_Latn -> root - 34, 1023, // bm_Nkoo -> root - 42, 1023, // bs_Cyrl -> root - 50, 1023, // byn_Latn -> root - 59, 1023, // cu_Glag -> root - 67, 1023, // dje_Arab -> root - 76, 1023, // dyo_Arab -> root + 0, 1062, // az_Arab -> root + 8, 1062, // az_Cyrl -> root + 16, 1062, // bal_Latn -> root + 25, 1062, // blt_Latn -> root + 34, 1062, // bm_Nkoo -> root + 42, 1062, // bs_Cyrl -> root + 50, 1062, // byn_Latn -> root + 59, 1062, // cu_Glag -> root + 67, 1062, // dje_Arab -> root + 76, 1062, // dyo_Arab -> root 92, 85, // en_150 -> en_001 99, 85, // en_AG -> en_001 105, 85, // en_AI -> en_001 @@ -1200,7 +1189,7 @@ const int32_t parentLocaleTable[] = { 201, 85, // en_DG -> en_001 207, 92, // en_DK -> en_150 213, 85, // en_DM -> en_001 - 219, 1023, // en_Dsrt -> root + 219, 1062, // en_Dsrt -> root 227, 85, // en_ER -> en_001 233, 92, // en_FI -> en_150 239, 85, // en_FJ -> en_001 @@ -1214,136 +1203,141 @@ const int32_t parentLocaleTable[] = { 287, 85, // en_GM -> en_001 293, 85, // en_GY -> en_001 299, 85, // en_HK -> en_001 - 305, 85, // en_IE -> en_001 - 311, 85, // en_IL -> en_001 - 317, 85, // en_IM -> en_001 - 323, 85, // en_IN -> en_001 - 329, 85, // en_IO -> en_001 - 335, 85, // en_JE -> en_001 - 341, 85, // en_JM -> en_001 - 347, 85, // en_KE -> en_001 - 353, 85, // en_KI -> en_001 - 359, 85, // en_KN -> en_001 - 365, 85, // en_KY -> en_001 - 371, 85, // en_LC -> en_001 - 377, 85, // en_LR -> en_001 - 383, 85, // en_LS -> en_001 - 389, 85, // en_MG -> en_001 - 395, 85, // en_MO -> en_001 - 401, 85, // en_MS -> en_001 - 407, 85, // en_MT -> en_001 - 413, 85, // en_MU -> en_001 - 419, 85, // en_MV -> en_001 - 425, 85, // en_MW -> en_001 - 431, 85, // en_MY -> en_001 - 437, 85, // en_NA -> en_001 - 443, 85, // en_NF -> en_001 - 449, 85, // en_NG -> en_001 - 455, 92, // en_NL -> en_150 - 461, 85, // en_NR -> en_001 - 467, 85, // en_NU -> en_001 - 473, 85, // en_NZ -> en_001 - 479, 85, // en_PG -> en_001 - 485, 85, // en_PK -> en_001 - 491, 85, // en_PN -> en_001 - 497, 85, // en_PW -> en_001 - 503, 85, // en_RW -> en_001 - 509, 85, // en_SB -> en_001 - 515, 85, // en_SC -> en_001 - 521, 85, // en_SD -> en_001 - 527, 92, // en_SE -> en_150 - 533, 85, // en_SG -> en_001 - 539, 85, // en_SH -> en_001 - 545, 92, // en_SI -> en_150 - 551, 85, // en_SL -> en_001 - 557, 85, // en_SS -> en_001 - 563, 85, // en_SX -> en_001 - 569, 85, // en_SZ -> en_001 - 575, 1023, // en_Shaw -> root - 583, 85, // en_TC -> en_001 - 589, 85, // en_TK -> en_001 - 595, 85, // en_TO -> en_001 - 601, 85, // en_TT -> en_001 - 607, 85, // en_TV -> en_001 - 613, 85, // en_TZ -> en_001 - 619, 85, // en_UG -> en_001 - 625, 85, // en_VC -> en_001 - 631, 85, // en_VG -> en_001 - 637, 85, // en_VU -> en_001 - 643, 85, // en_WS -> en_001 - 649, 85, // en_ZA -> en_001 - 655, 85, // en_ZM -> en_001 - 661, 85, // en_ZW -> en_001 - 674, 667, // es_AR -> es_419 - 680, 667, // es_BO -> es_419 - 686, 667, // es_BR -> es_419 - 692, 667, // es_BZ -> es_419 - 698, 667, // es_CL -> es_419 - 704, 667, // es_CO -> es_419 - 710, 667, // es_CR -> es_419 - 716, 667, // es_CU -> es_419 - 722, 667, // es_DO -> es_419 - 728, 667, // es_EC -> es_419 - 734, 667, // es_GT -> es_419 - 740, 667, // es_HN -> es_419 - 746, 667, // es_MX -> es_419 - 752, 667, // es_NI -> es_419 - 758, 667, // es_PA -> es_419 - 764, 667, // es_PE -> es_419 - 770, 667, // es_PR -> es_419 - 776, 667, // es_PY -> es_419 - 782, 667, // es_SV -> es_419 - 788, 667, // es_US -> es_419 - 794, 667, // es_UY -> es_419 - 800, 667, // es_VE -> es_419 - 806, 1023, // ff_Adlm -> root - 814, 1023, // ff_Arab -> root - 828, 1023, // ha_Arab -> root - 836, 323, // hi_Latn -> en_IN - 844, 822, // ht -> fr_HT - 847, 1023, // iu_Latn -> root - 855, 1023, // kk_Arab -> root - 863, 1023, // ks_Deva -> root - 871, 1023, // ku_Arab -> root - 879, 1023, // ky_Arab -> root - 887, 1023, // ky_Latn -> root - 895, 1023, // ml_Arab -> root - 903, 1023, // mn_Mong -> root - 911, 1023, // mni_Mtei -> root - 920, 1023, // ms_Arab -> root - 928, 934, // nb -> no - 931, 934, // nn -> no - 937, 934, // no_NO -> no - 943, 1023, // pa_Arab -> root - 951, 1005, // pt_AO -> pt_PT - 957, 1005, // pt_CH -> pt_PT - 963, 1005, // pt_CV -> pt_PT - 969, 1005, // pt_FR -> pt_PT - 975, 1005, // pt_GQ -> pt_PT - 981, 1005, // pt_GW -> pt_PT - 987, 1005, // pt_LU -> pt_PT - 993, 1005, // pt_MO -> pt_PT - 999, 1005, // pt_MZ -> pt_PT - 1011, 1005, // pt_ST -> pt_PT - 1017, 1005, // pt_TL -> pt_PT - 1028, 1023, // sat_Deva -> root - 1037, 1023, // sd_Deva -> root - 1045, 1023, // sd_Khoj -> root - 1053, 1023, // sd_Sind -> root - 1061, 1023, // shi_Latn -> root - 1070, 1023, // so_Arab -> root - 1078, 1023, // sr_Latn -> root - 1086, 1023, // sw_Arab -> root - 1094, 1023, // tg_Arab -> root - 1102, 1023, // ug_Cyrl -> root - 1110, 1023, // uz_Arab -> root - 1118, 1023, // uz_Cyrl -> root - 1126, 1023, // vai_Latn -> root - 1135, 1023, // wo_Arab -> root - 1143, 1023, // yo_Arab -> root - 1151, 1023, // yue_Hans -> root - 1160, 1023, // zh_Hant -> root - 1179, 1168, // zh_Hant_MO -> zh_Hant_HK + 305, 85, // en_ID -> en_001 + 311, 85, // en_IE -> en_001 + 317, 85, // en_IL -> en_001 + 323, 85, // en_IM -> en_001 + 329, 85, // en_IN -> en_001 + 335, 85, // en_IO -> en_001 + 341, 85, // en_JE -> en_001 + 347, 85, // en_JM -> en_001 + 353, 85, // en_KE -> en_001 + 359, 85, // en_KI -> en_001 + 365, 85, // en_KN -> en_001 + 371, 85, // en_KY -> en_001 + 377, 85, // en_LC -> en_001 + 383, 85, // en_LR -> en_001 + 389, 85, // en_LS -> en_001 + 395, 85, // en_MG -> en_001 + 401, 85, // en_MO -> en_001 + 407, 85, // en_MS -> en_001 + 413, 85, // en_MT -> en_001 + 419, 85, // en_MU -> en_001 + 425, 85, // en_MV -> en_001 + 431, 85, // en_MW -> en_001 + 437, 85, // en_MY -> en_001 + 443, 85, // en_NA -> en_001 + 449, 85, // en_NF -> en_001 + 455, 85, // en_NG -> en_001 + 461, 92, // en_NL -> en_150 + 467, 85, // en_NR -> en_001 + 473, 85, // en_NU -> en_001 + 479, 85, // en_NZ -> en_001 + 485, 85, // en_PG -> en_001 + 491, 85, // en_PK -> en_001 + 497, 85, // en_PN -> en_001 + 503, 85, // en_PW -> en_001 + 509, 85, // en_RW -> en_001 + 515, 85, // en_SB -> en_001 + 521, 85, // en_SC -> en_001 + 527, 85, // en_SD -> en_001 + 533, 92, // en_SE -> en_150 + 539, 85, // en_SG -> en_001 + 545, 85, // en_SH -> en_001 + 551, 92, // en_SI -> en_150 + 557, 85, // en_SL -> en_001 + 563, 85, // en_SS -> en_001 + 569, 85, // en_SX -> en_001 + 575, 85, // en_SZ -> en_001 + 581, 1062, // en_Shaw -> root + 589, 85, // en_TC -> en_001 + 595, 85, // en_TK -> en_001 + 601, 85, // en_TO -> en_001 + 607, 85, // en_TT -> en_001 + 613, 85, // en_TV -> en_001 + 619, 85, // en_TZ -> en_001 + 625, 85, // en_UG -> en_001 + 631, 85, // en_VC -> en_001 + 637, 85, // en_VG -> en_001 + 643, 85, // en_VU -> en_001 + 649, 85, // en_WS -> en_001 + 655, 85, // en_ZA -> en_001 + 661, 85, // en_ZM -> en_001 + 667, 85, // en_ZW -> en_001 + 680, 673, // es_AR -> es_419 + 686, 673, // es_BO -> es_419 + 692, 673, // es_BR -> es_419 + 698, 673, // es_BZ -> es_419 + 704, 673, // es_CL -> es_419 + 710, 673, // es_CO -> es_419 + 716, 673, // es_CR -> es_419 + 722, 673, // es_CU -> es_419 + 728, 673, // es_DO -> es_419 + 734, 673, // es_EC -> es_419 + 740, 673, // es_GT -> es_419 + 746, 673, // es_HN -> es_419 + 752, 673, // es_JP -> es_419 + 758, 673, // es_MX -> es_419 + 764, 673, // es_NI -> es_419 + 770, 673, // es_PA -> es_419 + 776, 673, // es_PE -> es_419 + 782, 673, // es_PR -> es_419 + 788, 673, // es_PY -> es_419 + 794, 673, // es_SV -> es_419 + 800, 673, // es_US -> es_419 + 806, 673, // es_UY -> es_419 + 812, 673, // es_VE -> es_419 + 818, 1062, // ff_Adlm -> root + 826, 1062, // ff_Arab -> root + 840, 1062, // ha_Arab -> root + 848, 329, // hi_Latn -> en_IN + 856, 834, // ht -> fr_HT + 859, 1062, // iu_Latn -> root + 867, 1062, // kk_Arab -> root + 875, 1062, // ks_Deva -> root + 883, 1062, // ku_Arab -> root + 891, 1062, // kxv_Deva -> root + 900, 1062, // kxv_Orya -> root + 909, 1062, // kxv_Telu -> root + 918, 1062, // ky_Arab -> root + 926, 1062, // ky_Latn -> root + 934, 1062, // ml_Arab -> root + 942, 1062, // mn_Mong -> root + 950, 1062, // mni_Mtei -> root + 959, 1062, // ms_Arab -> root + 967, 973, // nb -> no + 970, 973, // nn -> no + 976, 973, // no_NO -> no + 982, 1062, // pa_Arab -> root + 990, 1044, // pt_AO -> pt_PT + 996, 1044, // pt_CH -> pt_PT + 1002, 1044, // pt_CV -> pt_PT + 1008, 1044, // pt_FR -> pt_PT + 1014, 1044, // pt_GQ -> pt_PT + 1020, 1044, // pt_GW -> pt_PT + 1026, 1044, // pt_LU -> pt_PT + 1032, 1044, // pt_MO -> pt_PT + 1038, 1044, // pt_MZ -> pt_PT + 1050, 1044, // pt_ST -> pt_PT + 1056, 1044, // pt_TL -> pt_PT + 1067, 1062, // sat_Deva -> root + 1076, 1062, // sd_Deva -> root + 1084, 1062, // sd_Khoj -> root + 1092, 1062, // sd_Sind -> root + 1100, 1062, // shi_Latn -> root + 1109, 1062, // so_Arab -> root + 1117, 1062, // sr_Latn -> root + 1125, 1062, // sw_Arab -> root + 1133, 1062, // tg_Arab -> root + 1141, 1062, // ug_Cyrl -> root + 1149, 1062, // uz_Arab -> root + 1157, 1062, // uz_Cyrl -> root + 1165, 1062, // vai_Latn -> root + 1174, 1062, // wo_Arab -> root + 1182, 1062, // yo_Arab -> root + 1190, 1062, // yue_Hans -> root + 1199, 1062, // zh_Hant -> root + 1218, 1207, // zh_Hant_MO -> zh_Hant_HK }; diff --git a/thirdparty/icu4c/common/localematcher.cpp b/thirdparty/icu4c/common/localematcher.cpp index 5f8c703df71..6fc578af118 100644 --- a/thirdparty/icu4c/common/localematcher.cpp +++ b/thirdparty/icu4c/common/localematcher.cpp @@ -307,7 +307,7 @@ LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) { return UND_LSR; } else { - return likelySubtags.makeMaximizedLsrFrom(locale, errorCode); + return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode); } } diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp index 70a794ae076..64ff63f3611 100644 --- a/thirdparty/icu4c/common/locid.cpp +++ b/thirdparty/icu4c/common/locid.cpp @@ -563,7 +563,7 @@ private: LocalMemoryIndicate whether this engine handles a particular character when + * the RuleBasedBreakIterator is used for a particular locale. This method is used + * by the RuleBasedBreakIterator to find a break engine.
+ * @param c A character which begins a run that the engine might handle. + * @param locale The locale. + * @return true if this engine handles the particular character for that locale. + * @internal ICU 74 technology preview + */ + virtual bool isFor(UChar32 c, const char* locale) const = 0; + + /** + *Indicate whether this engine handles a particular character.This method is + * used by the RuleBasedBreakIterator after it already find a break engine to see which + * characters after the first one can be handled by this break engine.
+ * @param c A character that the engine might handle. + * @return true if this engine handles the particular character. + * @internal ICU 74 technology preview + */ + virtual bool handles(UChar32 c) const = 0; + + /** + *Divide up a range of text handled by this break engine.
+ * + * @param text A UText representing the text + * @param start The start of the range of known characters + * @param end The end of the range of known characters + * @param foundBreaks Output of C array of int32_t break positions, or + * nullptr + * @param foundBreaksCapacity The capacity of foundBreaks + * @param status Information on any errors encountered. + * @return The number of breaks found + * @internal ICU 74 technology preview + */ + virtual int32_t fillBreaks(UText* text, int32_t start, int32_t end, + int32_t* foundBreaks, int32_t foundBreaksCapacity, + UErrorCode& status) const = 0; +}; +#endif /* U_HIDE_DRAFT_API */ + + /** * * A subclass of BreakIterator whose behavior is specified using a list of rules. @@ -716,9 +779,10 @@ private: * This function returns the appropriate LanguageBreakEngine for a * given character c. * @param c A character in the dictionary set + * @param locale The locale. * @internal (private) */ - const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); + const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale); public: #ifndef U_HIDE_INTERNAL_API @@ -734,8 +798,24 @@ private: */ void dumpTables(); #endif /* U_HIDE_INTERNAL_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Register a new external break engine. The external break engine will be adopted. + * Because ICU may choose to cache break engine internally, this must + * be called at application startup, prior to any calls to + * object methods of RuleBasedBreakIterator to avoid undefined behavior. + * @param toAdopt the ExternalBreakEngine instance to be adopted + * @param status the in/out status code, no special meanings are assigned + * @internal ICU 74 technology preview + */ + static void U_EXPORT2 registerExternalBreakEngine( + ExternalBreakEngine* toAdopt, UErrorCode& status); +#endif /* U_HIDE_DRAFT_API */ + }; + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h index 4f82a9fb583..7470891338a 100644 --- a/thirdparty/icu4c/common/unicode/uchar.h +++ b/thirdparty/icu4c/common/unicode/uchar.h @@ -60,7 +60,7 @@ U_CDECL_BEGIN * @see u_getUnicodeVersion * @stable ICU 2.0 */ -#define U_UNICODE_VERSION "15.0" +#define U_UNICODE_VERSION "15.1" /** * \file @@ -532,12 +532,33 @@ typedef enum UProperty { * @stable ICU 70 */ UCHAR_RGI_EMOJI=71, +#ifndef U_HIDE_DRAFT_API + /** + * Binary property IDS_Unary_Operator. + * For programmatic determination of Ideographic Description Sequences. + * + * @draft ICU 74 + */ + UCHAR_IDS_UNARY_OPERATOR=72, + /** + * Binary property ID_Compat_Math_Start. + * Used in mathematical identifier profile in UAX #31. + * @draft ICU 74 + */ + UCHAR_ID_COMPAT_MATH_START=73, + /** + * Binary property ID_Compat_Math_Continue. + * Used in mathematical identifier profile in UAX #31. + * @draft ICU 74 + */ + UCHAR_ID_COMPAT_MATH_CONTINUE=74, +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for binary Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_BINARY_LIMIT=72, + UCHAR_BINARY_LIMIT=75, #endif // U_HIDE_DEPRECATED_API /** Enumerated property Bidi_Class. @@ -1900,6 +1921,11 @@ enum UBlockCode { /** @stable ICU 72 */ UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/ + // New block in Unicode 15.1 + + /** @stable ICU 74 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UBlockCode value. @@ -1907,7 +1933,7 @@ enum UBlockCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UBLOCK_COUNT = 328, + UBLOCK_COUNT = 329, #endif // U_HIDE_DEPRECATED_API /** @stable ICU 2.0 */ @@ -2439,6 +2465,16 @@ typedef enum ULineBreak { U_LB_E_MODIFIER = 41, /*[EM]*/ /** @stable ICU 58 */ U_LB_ZWJ = 42, /*[ZWJ]*/ + /** @stable ICU 74 */ + U_LB_AKSARA = 43, /*[AK]*/ + /** @stable ICU 74 */ + U_LB_AKSARA_PREBASE = 44, /*[AP]*/ + /** @stable ICU 74 */ + U_LB_AKSARA_START = 45, /*[AS]*/ + /** @stable ICU 74 */ + U_LB_VIRAMA_FINAL = 46, /*[VF]*/ + /** @stable ICU 74 */ + U_LB_VIRAMA = 47, /*[VI]*/ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal ULineBreak value. @@ -2446,7 +2482,7 @@ typedef enum ULineBreak { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_LB_COUNT = 43 + U_LB_COUNT = 48 #endif // U_HIDE_DEPRECATED_API } ULineBreak; diff --git a/thirdparty/icu4c/common/unicode/ulocale.h b/thirdparty/icu4c/common/unicode/ulocale.h new file mode 100644 index 00000000000..33e92844bc1 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ulocale.h @@ -0,0 +1,229 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef ULOCALE_H +#define ULOCALE_H + +#include "unicode/localpointer.h" +#include "unicode/uenum.h" +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Locale ID functionality similar to C++ class Locale + */ + +#ifndef U_HIDE_DRAFT_API +/** + * Opaque C service object type for the locale API + * @draft ICU 74 + */ +struct ULocale; + +/** + * C typedef for struct ULocale. + * @draft ICU 74 + */ +typedef struct ULocale ULocale; + +/** + * Constructs an ULocale from the locale ID. + * The created ULocale should be destroyed by calling + * ulocale_close(); + * @param localeID the locale, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the locale; if negative, then the locale need to be + * null terminated. + * @param err the error code + * @return the locale. + * + * @draft ICU 74 + */ +U_CAPI ULocale* U_EXPORT2 +ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err); + +/** + * Constructs an ULocale from the provided IETF BCP 47 language tag. + * The created ULocale should be destroyed by calling + * ulocale_close(); + * @param tag the language tag, defined as IETF BCP 47 language tag, const + * char* pointer (need not be terminated when the length is non-negative) + * @param length the length of the tag; if negative, then the tag need to be + * null terminated. + * @param err the error code + * @return the locale. + * + * @draft ICU 74 + */ +U_CAPI ULocale* U_EXPORT2 +ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err); + +/** + * Close the locale and destroy it's internal states. + * + * @param locale the locale + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocale_close(ULocale* locale); + +/** + * Returns the locale's ISO-639 language code. + * + * @param locale the locale + * @return the language code of the locale. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getLanguage(const ULocale* locale); + +/** + * Returns the locale's ISO-15924 abbreviation script code. + * + * @param locale the locale + * @return A pointer to the script. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getScript(const ULocale* locale); + +/** + * Returns the locale's ISO-3166 region code. + * + * @param locale the locale + * @return A pointer to the region. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getRegion(const ULocale* locale); + +/** + * Returns the locale's variant code. + * + * @param locale the locale + * @return A pointer to the variant. + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getVariant(const ULocale* locale); + +/** + * Returns the programmatic name of the entire locale, with the language, + * country and variant separated by underbars. If a field is missing, up + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" + * + * @param locale the locale + * @return A pointer to "name". + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getLocaleID(const ULocale* locale); + +/** + * Returns the programmatic name of the entire locale as ulocale_getLocaleID() + * would return, but without keywords. + * + * @param locale the locale + * @return A pointer to "base name". + * @draft ICU 74 + */ +U_CAPI const char* U_EXPORT2 +ulocale_getBaseName(const ULocale* locale); + +/** + * Gets the bogus state. Locale object can be bogus if it doesn't exist + * + * @param locale the locale + * @return false if it is a real locale, true if it is a bogus locale + * @draft ICU 74 + */ +U_CAPI bool U_EXPORT2 +ulocale_isBogus(const ULocale* locale); + +/** + * Gets the list of keywords for the specified locale. + * + * @param locale the locale + * @param err the error code + * @return pointer to UEnumeration, or nullptr if there are no keywords. + * Client must call uenum_close() to dispose the returned value. + * @draft ICU 74 + */ +U_CAPI UEnumeration* U_EXPORT2 +ulocale_getKeywords(const ULocale* locale, UErrorCode *err); + +/** + * Gets the list of unicode keywords for the specified locale. + * + * @param locale the locale + * @param err the error code + * @return pointer to UEnumeration, or nullptr if there are no keywords. + * Client must call uenum_close() to dispose the returned value. + * @draft ICU 74 + */ +U_CAPI UEnumeration* U_EXPORT2 +ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err); + +/** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * @param locale the locale + * @param keyword the keyword, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keywordLength the length of the keyword; if negative, then the + * keyword need to be null terminated. + * @param valueBuffer The buffer to receive the value. + * @param valueBufferCapacity The capacity of receiving valueBuffer. + * @param err the error code + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocale_getKeywordValue( + const ULocale* locale, const char* keyword, int32_t keywordLength, + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); + +/** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * @param locale the locale + * @param keyword the Unicode keyword, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keywordLength the length of the Unicode keyword; if negative, + * then the keyword need to be null terminated. + * @param valueBuffer The buffer to receive the Unicode value. + * @param valueBufferCapacity The capacity of receiving valueBuffer. + * @param err the error code + * @draft ICU 74 + */ +U_CAPI int32_t U_EXPORT2 +ulocale_getUnicodeKeywordValue( + const ULocale* locale, const char* keyword, int32_t keywordLength, + char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalULocalePointer + * "Smart pointer" class, closes a ULocale via ulocale_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 74 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close); + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif /* U_HIDE_DRAFT_API */ + +#endif /*_ULOCALE */ diff --git a/thirdparty/icu4c/common/unicode/ulocbuilder.h b/thirdparty/icu4c/common/unicode/ulocbuilder.h new file mode 100644 index 00000000000..f2e98612f0a --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ulocbuilder.h @@ -0,0 +1,441 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +#ifndef __ULOCBUILDER_H__ +#define __ULOCBUILDER_H__ + +#include "unicode/localpointer.h" +#include "unicode/ulocale.h" +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Builder API for Locale + */ + +#ifndef U_HIDE_DRAFT_API + +/** + * Opaque C service object type for the locale builder API + * @draft ICU 74 + */ +struct ULocaleBuilder; + +/** + * C typedef for struct ULocaleBuilder. + * @draft ICU 74 + */ +typedef struct ULocaleBuilder ULocaleBuilder; + +/** + *ULocaleBuilder
is used to build valid locale
id
+ * string or IETF BCP 47 language tag from values configured by the setters.
+ * The ULocaleBuilder
checks if a value configured by a
+ * setter satisfies the syntax requirements defined by the Locale
+ * class. A string of Locale created by a ULocaleBuilder
is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ * The following example shows how to create a locale
string
+ * with the ULocaleBuilder
.
+ *
+ *+ * + *+ * UErrorCode err = U_ZERO_ERROR; + * char buffer[ULOC_FULLNAME_CAPACITY]; + * ULocaleBuilder* builder = ulocbld_open(); + * ulocbld_setLanguage(builder, "sr", -1); + * ulocbld_setScript(builder, "Latn", -1); + * ulocbld_setRegion(builder, "RS", -1); + * int32_t length = ulocbld_buildLocaleID( + * builder, buffer, ULOC_FULLNAME_CAPACITY, &error); + * ulocbld_close(builder); + *+ *
ULocaleBuilders can be reused; ulocbld_clear()
resets all
+ * fields to their default values.
+ *
+ *
ULocaleBuilder tracks errors in an internal UErrorCode. For all setters,
+ * except ulocbld_setLanguageTag and ulocbld_setLocale, ULocaleBuilder will return immediately
+ * if the internal UErrorCode is in error state.
+ * To reset internal state and error code, call clear method.
+ * The ulocbld_setLanguageTag and setLocale method will first clear the internal
+ * UErrorCode, then track the error of the validation of the input parameter
+ * into the internal UErrorCode.
+ *
+ * @draft ICU 74
+ */
+
+/**
+ * Constructs an empty ULocaleBuilder. The default value of all
+ * fields, extensions, and private use information is the
+ * empty string. The created builder should be destroyed by calling
+ * ulocbld_close();
+ *
+ * @draft ICU 74
+ */
+U_CAPI ULocaleBuilder* U_EXPORT2
+ulocbld_open();
+
+/**
+ * Close the builder and destroy it's internal states.
+ * @param builder the builder
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_close(ULocaleBuilder* builder);
+
+/**
+ * Resets the ULocaleBuilder
to match the provided
+ * locale
. Existing state is discarded.
+ *
+ *
All fields of the locale must be well-formed. + *
This method clears the internal UErrorCode.
+ *
+ * @param builder the builder
+ * @param locale the locale, a const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the locale; if negative, then the locale need to be
+ * null terminated,
+ *
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length);
+
+/**
+ * Resets the ULocaleBuilder
to match the provided
+ * ULocale
. Existing state is discarded.
+ *
+ *
The locale must be not bogus. + *
This method clears the internal UErrorCode.
+ *
+ * @param builder the builder.
+ * @param locale the locale, a ULocale* pointer. The builder adopts the locale
+ * after the call and the client must not delete it.
+ *
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale);
+
+/**
+ * Resets the ULocaleBuilder to match the provided IETF BCP 47 language tag.
+ * Discards the existing state.
+ * The empty string causes the builder to be reset, like {@link #ulocbld_clear}.
+ * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to their canonical form before being processed.
+ * Otherwise, the language tag
must be well-formed,
+ * or else the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods
+ * will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
This method clears the internal UErrorCode.
+ *
+ * @param builder the builder
+ * @param tag the language tag, defined as IETF BCP 47 language tag, a
+ * const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the tag; if negative, then the tag need to be
+ * null terminated,
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length);
+
+/**
+ * Sets the language. If language
is the empty string, the
+ * language in this ULocaleBuilder
is removed. Otherwise, the
+ * language
must be well-formed, or else the ulocbld_buildLocaleID()
+ * and ulocbld_buildLanguageTag() methods will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
The syntax of language value is defined as
+ * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
+ *
+ * @param builder the builder
+ * @param language the language, a const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the language; if negative, then the language need to be
+ * null terminated,
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length);
+
+/**
+ * Sets the script. If script
is the empty string, the script in
+ * this ULocaleBuilder
is removed.
+ * Otherwise, the script
must be well-formed, or else the
+ * ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later
+ * report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
The script value is a four-letter script code as
+ * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
+ * defined by ISO 15924
+ *
+ * @param builder the builder
+ * @param script the script, a const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the script; if negative, then the script need to be
+ * null terminated,
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length);
+
+/**
+ * Sets the region. If region is the empty string, the region in this
+ * ULocaleBuilder
is removed. Otherwise, the region
+ * must be well-formed, or else the ulocbld_buildLocaleID() and
+ * ulocbld_buildLanguageTag() methods will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
The region value is defined by + * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag) + * as a two-letter ISO 3166 code or a three-digit UN M.49 area code. + * + *
The region value in the Locale
created by the
+ * ULocaleBuilder
is always normalized to upper case.
+ *
+ * @param builder the builder
+ * @param region the region, a const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the region; if negative, then the region need to be
+ * null terminated,
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length);
+
+/**
+ * Sets the variant. If variant is the empty string, the variant in this
+ * ULocaleBuilder
is removed. Otherwise, the variant
+ * must be well-formed, or else the ulocbld_buildLocaleID() and
+ * ulocbld_buildLanguageTag() methods will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
Note: This method checks if variant
+ * satisfies the
+ * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
+ * syntax requirements, and normalizes the value to lowercase letters. However,
+ * the Locale
class does not impose any syntactic
+ * restriction on variant. To set an ill-formed variant, use a Locale constructor.
+ * If there are multiple unicode_variant_subtag, the caller must concatenate
+ * them with '-' as separator (ex: "foobar-fibar").
+ *
+ * @param builder the builder
+ * @param variant the variant, a const char * pointer (need not be terminated when
+ * the length is non-negative)
+ * @param length the length of the variant; if negative, then the variant need to be
+ * null terminated,
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length);
+
+/**
+ * Sets the extension for the given key. If the value is the empty string,
+ * the extension is removed. Otherwise, the key
and
+ * value
must be well-formed, or else the ulocbld_buildLocaleID()
+ * and ulocbld_buildLanguageTag() methods will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ *
Note: The key ('u') is used for the Unicode locale extension. + * Setting a value for this key replaces any existing Unicode locale key/type + * pairs with those defined in the extension. + * + *
Note: The key ('x') is used for the private use code. To be + * well-formed, the value for this key needs only to have subtags of one to + * eight alphanumeric characters, not two to eight as in the general case. + * + * @param builder the builder + * @param key the extension key + * @param value the value, a const char * pointer (need not be terminated when + * the length is non-negative) + * @param length the length of the value; if negative, then the value need to be + * null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length); + +/** + * Sets the Unicode locale keyword type for the given key. If the type + * StringPiece is constructed with a nullptr, the keyword is removed. + * If the type is the empty string, the keyword is set without type subtags. + * Otherwise, the key and type must be well-formed, or else the + * ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later + * report an U_ILLEGAL_ARGUMENT_ERROR. + * + *
Keys and types are converted to lower case. + * + *
Note:Setting the 'u' extension via {@link #ulocbld_setExtension} + * replaces all Unicode locale keywords with those defined in the + * extension. + * + * @param builder the builder + * @param key the Unicode locale key, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param keyLength the length of the key; if negative, then the key need to be + * null terminated, + * @param type the Unicode locale type, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param typeLength the length of the type; if negative, then the type need to + * be null terminated, + * @return This builder. + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder, + const char* key, int32_t keyLength, const char* type, int32_t typeLength); + +/** + * Adds a unicode locale attribute, if not already present, otherwise + * has no effect. The attribute must not be empty string and must be + * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status + * during the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() calls. + * + * @param builder the builder + * @param attribute the attribute, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param length the length of the attribute; if negative, then the attribute + * need to be null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_addUnicodeLocaleAttribute( + ULocaleBuilder* builder, const char* attribute, int32_t length); + +/** + * Removes a unicode locale attribute, if present, otherwise has no + * effect. The attribute must not be empty string and must be well-formed + * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the ulocbld_buildLocaleID() + * and ulocbld_buildLanguageTag() calls. + * + *
Attribute comparison for removal is case-insensitive. + * + * @param builder the builder + * @param attribute the attribute, a const char * pointer (need not be + * terminated when the length is non-negative) + * @param length the length of the attribute; if negative, then the attribute + * need to be null terminated, + * @draft ICU 74 + */ +U_CAPI void U_EXPORT2 +ulocbld_removeUnicodeLocaleAttribute( + ULocaleBuilder* builder, const char* attribute, int32_t length); + +/** + * Resets the builder to its initial, empty state. + *
This method clears the internal UErrorCode.
+ *
+ * @param builder the builder
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_clear(ULocaleBuilder* builder);
+
+/**
+ * Resets the extensions to their initial, empty state.
+ * Language, script, region and variant are unchanged.
+ *
+ * @param builder the builder
+ * @draft ICU 74
+ */
+U_CAPI void U_EXPORT2
+ulocbld_clearExtensions(ULocaleBuilder* builder);
+
+/**
+ * Build the LocaleID string from the fields set on this builder.
+ * If any set methods or during the ulocbld_buildLocaleID() call require memory
+ * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the ulocbld_buildLocaleID() call and the caller is
+ * free to keep using the same builder to build more locales.
+ *
+ * @param builder the builder
+ * @param locale the locale id
+ * @param localeCapacity the size of the locale buffer to store the locale id
+ * @param err the error code
+ * @return the length of the locale id in buffer
+ * @draft ICU 74
+ */
+U_CAPI int32_t U_EXPORT2
+ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale,
+ int32_t localeCapacity, UErrorCode* err);
+
+/**
+ * Build the ULocale object from the fields set on this builder.
+ * If any set methods or during the ulocbld_buildULocale() call require memory
+ * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the ulocbld_buildULocale() call and the caller is
+ * free to keep using the same builder to build more locales.
+ *
+ * @param builder the builder.
+ * @param err the error code.
+ * @return the locale, a ULocale* pointer. The created ULocale must be
+ * destroyed by calling {@link ulocale_close}.
+ * @draft ICU 74
+ */
+U_CAPI ULocale* U_EXPORT2
+ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err);
+
+/**
+ * Build the IETF BCP 47 language tag string from the fields set on this builder.
+ * If any set methods or during the ulocbld_buildLanguageTag() call require memory
+ * allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the ulocbld_buildLanguageTag() call and the caller is free
+ * to keep using the same builder to build more locales.
+ *
+ * @param builder the builder
+ * @param language the language tag
+ * @param languageCapacity the size of the language buffer to store the language
+ * tag
+ * @param err the error code
+ * @return the length of the language tag in buffer
+ * @draft ICU 74
+ */
+U_CAPI int32_t U_EXPORT2
+ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language,
+ int32_t languageCapacity, UErrorCode* err);
+
+/**
+ * Sets the UErrorCode if an error occurred while recording sets.
+ * Preserves older error codes in the outErrorCode.
+ *
+ * @param builder the builder
+ * @param outErrorCode Set to an error code that occurred while setting subtags.
+ * Unchanged if there is no such error or if outErrorCode
+ * already contained an error.
+ * @return true if U_FAILURE(*outErrorCode)
+ * @draft ICU 74
+ */
+U_CAPI UBool U_EXPORT2
+ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalULocaleBuilderPointer
+ * "Smart pointer" class, closes a ULocaleBuilder via ulocbld_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @draft ICU 74
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close);
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif /* U_HIDE_DRAFT_API */
+
+#endif // __ULOCBUILDER_H__
diff --git a/thirdparty/icu4c/common/unicode/unorm2.h b/thirdparty/icu4c/common/unicode/unorm2.h
index 24417b7103c..3844041f170 100644
--- a/thirdparty/icu4c/common/unicode/unorm2.h
+++ b/thirdparty/icu4c/common/unicode/unorm2.h
@@ -181,7 +181,10 @@ U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
/**
- * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
+ * which is equivalent to applying the NFKC_Casefold mappings and then NFC.
+ * See https://www.unicode.org/reports/tr44/#NFKC_Casefold
+ *
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
* Returns an unmodifiable singleton instance. Do not delete it.
* @param pErrorCode Standard ICU error code. Its input value must
@@ -194,6 +197,25 @@ unorm2_getNFKDInstance(UErrorCode *pErrorCode);
U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
+ * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
+ * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
+ *
+ * Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @draft ICU 74
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
+#endif // U_HIDE_DRAFT_API
+
/**
* Returns a UNormalizer2 instance which uses the specified data file
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
@@ -206,7 +228,7 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
*
* @param packageName NULL for ICU built-in data, otherwise application data package name
- * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
* @param mode normalization mode (compose or decompose etc.)
* @param pErrorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h
index b35df453800..74f2cae5106 100644
--- a/thirdparty/icu4c/common/unicode/urename.h
+++ b/thirdparty/icu4c/common/unicode/urename.h
@@ -138,8 +138,8 @@
#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
+#define mixedMeasuresToMicros U_ICU_ENTRY_POINT_RENAME(mixedMeasuresToMicros)
#define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup)
-#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
@@ -193,6 +193,7 @@
#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
+#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
@@ -512,9 +513,6 @@
#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
-#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
-#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
-#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
@@ -532,6 +530,7 @@
#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone)
+#define ucal_getIanaTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getIanaTimeZoneID)
#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
@@ -587,6 +586,7 @@
#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8)
#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
@@ -955,9 +955,16 @@
#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close)
#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next)
#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open)
+#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit)
+#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer)
+#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io)
+#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit)
#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
+#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou)
#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
+#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode)
+#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue)
#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
@@ -969,7 +976,11 @@
#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
+#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit)
#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
+#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou)
+#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64)
+#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop)
#define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString)
#define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition)
#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
@@ -1133,6 +1144,39 @@
#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
+#define ulocale_close U_ICU_ENTRY_POINT_RENAME(ulocale_close)
+#define ulocale_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocale_getBaseName)
+#define ulocale_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywordValue)
+#define ulocale_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywords)
+#define ulocale_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocale_getLanguage)
+#define ulocale_getLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_getLocaleID)
+#define ulocale_getRegion U_ICU_ENTRY_POINT_RENAME(ulocale_getRegion)
+#define ulocale_getScript U_ICU_ENTRY_POINT_RENAME(ulocale_getScript)
+#define ulocale_getUnicodeKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywordValue)
+#define ulocale_getUnicodeKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywords)
+#define ulocale_getVariant U_ICU_ENTRY_POINT_RENAME(ulocale_getVariant)
+#define ulocale_isBogus U_ICU_ENTRY_POINT_RENAME(ulocale_isBogus)
+#define ulocale_openForLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocale_openForLanguageTag)
+#define ulocale_openForLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_openForLocaleID)
+#define ulocbld_addUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_addUnicodeLocaleAttribute)
+#define ulocbld_adoptULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_adoptULocale)
+#define ulocbld_buildLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLanguageTag)
+#define ulocbld_buildLocaleID U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLocaleID)
+#define ulocbld_buildULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_buildULocale)
+#define ulocbld_clear U_ICU_ENTRY_POINT_RENAME(ulocbld_clear)
+#define ulocbld_clearExtensions U_ICU_ENTRY_POINT_RENAME(ulocbld_clearExtensions)
+#define ulocbld_close U_ICU_ENTRY_POINT_RENAME(ulocbld_close)
+#define ulocbld_copyErrorTo U_ICU_ENTRY_POINT_RENAME(ulocbld_copyErrorTo)
+#define ulocbld_open U_ICU_ENTRY_POINT_RENAME(ulocbld_open)
+#define ulocbld_removeUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_removeUnicodeLocaleAttribute)
+#define ulocbld_setExtension U_ICU_ENTRY_POINT_RENAME(ulocbld_setExtension)
+#define ulocbld_setLanguage U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguage)
+#define ulocbld_setLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguageTag)
+#define ulocbld_setLocale U_ICU_ENTRY_POINT_RENAME(ulocbld_setLocale)
+#define ulocbld_setRegion U_ICU_ENTRY_POINT_RENAME(ulocbld_setRegion)
+#define ulocbld_setScript U_ICU_ENTRY_POINT_RENAME(ulocbld_setScript)
+#define ulocbld_setUnicodeLocaleKeyword U_ICU_ENTRY_POINT_RENAME(ulocbld_setUnicodeLocaleKeyword)
+#define ulocbld_setVariant U_ICU_ENTRY_POINT_RENAME(ulocbld_setVariant)
#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
@@ -1213,6 +1257,7 @@
#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKCSimpleCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCSimpleCasefoldInstance)
#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
@@ -1349,6 +1394,7 @@
#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
+#define uprv_currencyLeads U_ICU_ENTRY_POINT_RENAME(uprv_currencyLeads)
#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
@@ -1367,6 +1413,7 @@
#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
+#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass)
#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
@@ -1763,6 +1810,9 @@
#define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64)
#define usnumf_openForLocale U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocale)
#define usnumf_openForLocaleAndGroupingStrategy U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocaleAndGroupingStrategy)
+#define uspoof_areBidiConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusable)
+#define uspoof_areBidiConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUTF8)
+#define uspoof_areBidiConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUnicodeString)
#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
@@ -1778,6 +1828,9 @@
#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
+#define uspoof_getBidiSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeleton)
+#define uspoof_getBidiSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUTF8)
+#define uspoof_getBidiSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUnicodeString)
#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h
index fc784b2492f..9fc98e73807 100644
--- a/thirdparty/icu4c/common/unicode/uvernum.h
+++ b/thirdparty/icu4c/common/unicode/uvernum.h
@@ -53,13 +53,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION_MAJOR_NUM 73
+#define U_ICU_VERSION_MAJOR_NUM 74
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_MINOR_NUM 2
+#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@@ -79,7 +79,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _73
+#define U_ICU_VERSION_SUFFIX _74
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -132,7 +132,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "73.2"
+#define U_ICU_VERSION "74.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@@ -145,13 +145,13 @@
*
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SHORT "73"
+#define U_ICU_VERSION_SHORT "74"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "73.2"
+#define U_ICU_DATA_VERSION "74.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================
diff --git a/thirdparty/icu4c/common/uniquecharstr.h b/thirdparty/icu4c/common/uniquecharstr.h
index 10cc924f7f9..e5d3b734180 100644
--- a/thirdparty/icu4c/common/uniquecharstr.h
+++ b/thirdparty/icu4c/common/uniquecharstr.h
@@ -10,6 +10,7 @@
#include "charstr.h"
#include "uassert.h"
#include "uhash.h"
+#include "cmemory.h"
U_NAMESPACE_BEGIN
@@ -47,22 +48,20 @@ public:
}
/**
- * Adds a string and returns a unique number for it.
- * The string's buffer contents must not change, nor move around in memory,
+ * Adds a NUL-terminated string and returns a unique number for it.
+ * The string must not change, nor move around in memory,
* while this UniqueCharStrings is in use.
- * The string contents must be NUL-terminated exactly at s.length().
*
- * Best used with read-only-alias UnicodeString objects that point to
- * stable storage, such as strings returned by resource bundle functions.
+ * Best used with string data in a stable storage, such as strings returned
+ * by resource bundle functions.
*/
- int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) { return 0; }
+ int32_t add(const char16_t*p, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) { return -1; }
if (isFrozen) {
errorCode = U_NO_WRITE_PERMISSION;
- return 0;
+ return -1;
}
// The string points into the resource bundle.
- const char16_t *p = s.getBuffer();
int32_t oldIndex = uhash_geti(&map, p);
if (oldIndex != 0) { // found duplicate
return oldIndex;
@@ -71,11 +70,33 @@ public:
// The strings object is also terminated with one implicit NUL.
strings->append(0, errorCode);
int32_t newIndex = strings->length();
- strings->appendInvariantChars(s, errorCode);
+ strings->appendInvariantChars(p, u_strlen(p), errorCode);
uhash_puti(&map, const_cast
* The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call
* {@link uspoof_close} when the object goes out of scope:
@@ -339,6 +359,51 @@
* COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
* scripts.
*
+ *
+ * In some circumstances, the only concern is confusion between identifiers displayed with the same
+ * paragraph direction.
+ *
+ *
+ * An example is the case where identifiers are usernames prefixed with the @ symbol.
+ * That symbol will appear to the left in a left-to-right context, and to the right in a
+ * right-to-left context, so that an identifier displayed in a left-to-right context can never be
+ * confused with an identifier displayed in a right-to-left context:
+ *
+ * In that case, the caller should check for both LTR-confusability and RTL-confusability:
+ *
+ * \code{.cpp}
+ * bool confusableInEitherDirection =
+ * uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) ||
+ * uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status);
+ * \endcode
+ *
+ * If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR
+ * with LTR and RTL with RTL.
+ *
+ *
+ * In cases where confusability between the visual appearances of an identifier displayed in a
+ * left-to-right context with another identifier displayed in a right-to-left context is a concern,
+ * the LTR skeleton of one can be compared with the RTL skeleton of the other. However, this
+ * very broad definition of confusability may have unexpected results; for instance, it treats the
+ * ASCII identifiers "Mark_" and "_Mark" as confusable.
+ *
* Advanced bidirectional usage
+ * If the paragraph direction with which the identifiers will be displayed is not known, there are
+ * multiple options for confusable detection depending on the circumstances.
+ *
+ *
+ *
+ * Additional Information
*
* A USpoofChecker
instance may be used repeatedly to perform checks on any number of identifiers.
@@ -519,7 +584,7 @@ typedef enum USpoofChecks {
/**
- * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
+ * Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and
* for returned identifier restriction levels in check results.
*
* @stable ICU 51
@@ -633,8 +698,8 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
/**
* Open a Spoof Checker from the source form of the spoof data.
* The input corresponds to the Unicode data file confusables.txt
- * as described in Unicode UAX #39. The syntax of the source data
- * is as described in UAX #39 for this file, and the content of
+ * as described in Unicode Technical Standard #39. The syntax of the source data
+ * is as described in UTS #39 for this file, and the content of
* this file is acceptable input.
*
* The character encoding of the (char *) input text is UTF-8.
@@ -1111,7 +1176,7 @@ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *
/**
- * Check the whether two specified strings are visually confusable.
+ * Check whether two specified strings are visually confusable.
*
* If the strings are confusable, the return value will be nonzero, as long as
* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
@@ -1159,7 +1224,58 @@ uspoof_areConfusable(const USpoofChecker *sc,
const UChar *id2, int32_t length2,
UErrorCode *status);
-
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Check whether two specified strings are visually confusable when
+ * displayed in a context with the given paragraph direction.
+ *
+ * If the strings are confusable, the return value will be nonzero, as long as
+ * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
+ *
+ * The bits in the return value correspond to flags for each of the classes of
+ * confusables applicable to the two input strings. According to UTS 39
+ * section 4, the possible flags are:
+ *
+ *
+ *
+ *
+ * If one or more of the above flags were not listed in uspoof_setChecks(), this
+ * function will never report that class of confusable. The check
+ * {@link USPOOF_CONFUSABLE} enables all three flags.
+ *
+ *
+ * @param sc The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id1 The first of the two identifiers to be compared for
+ * confusability. The strings are in UTF-16 format.
+ * @param length1 the length of the first identifier, expressed in
+ * 16 bit UTF-16 code units, or -1 if the string is
+ * nul terminated.
+ * @param id2 The second of the two identifiers to be compared for
+ * confusability. The identifiers are in UTF-16 format.
+ * @param length2 The length of the second identifiers, expressed in
+ * 16 bit UTF-16 code units, or -1 if the string is
+ * nul terminated.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check.
+ * Confusability of the identifiers is not reported here,
+ * but through this function's return value.
+ * @return An integer value with bit(s) set corresponding to
+ * the type of confusability found, as defined by
+ * enum USpoofChecks. Zero is returned if the identifiers
+ * are not confusable.
+ *
+ * @draft ICU 74
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
+ const UChar *id1, int32_t length1,
+ const UChar *id2, int32_t length2,
+ UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
/**
* A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
@@ -1192,14 +1308,45 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
const char *id2, int32_t length2,
UErrorCode *status);
-
-
+#ifndef U_HIDE_DRAFT_API
+/**
+ * A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.
+ *
+ * @param sc The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id1 The first of the two identifiers to be compared for
+ * confusability. The strings are in UTF-8 format.
+ * @param length1 the length of the first identifiers, in bytes, or -1
+ * if the string is nul terminated.
+ * @param id2 The second of the two identifiers to be compared for
+ * confusability. The strings are in UTF-8 format.
+ * @param length2 The length of the second string in bytes, or -1
+ * if the string is nul terminated.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check.
+ * Confusability of the strings is not reported here,
+ * but through this function's return value.
+ * @return An integer value with bit(s) set corresponding to
+ * the type of confusability found, as defined by
+ * enum USpoofChecks. Zero is returned if the strings
+ * are not confusable.
+ *
+ * @draft ICU 74
+ *
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+ const char *id1, int32_t length1,
+ const char *id2, int32_t length2,
+ UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
/**
* Get the "skeleton" for an identifier.
* Skeletons are a transformation of the input identifier;
* Two identifiers are confusable if their skeletons are identical.
- * See Unicode UAX #39 for additional information.
+ * See Unicode Technical Standard #39 for additional information.
*
* Using skeletons directly makes it possible to quickly check
* whether an identifier is confusable with any of some large
@@ -1233,11 +1380,50 @@ uspoof_getSkeleton(const USpoofChecker *sc,
UChar *dest, int32_t destCapacity,
UErrorCode *status);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Get the "bidiSkeleton" for an identifier and a direction.
+ * Skeletons are a transformation of the input identifier;
+ * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ * they are RTL-confusable if their RTL bidiSkeletons are identical.
+ * See Unicode Technical Standard #39 for additional information:
+ * https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ * Using skeletons directly makes it possible to quickly check
+ * whether an identifier is confusable with any of some large
+ * set of existing identifiers, by creating an efficiently
+ * searchable collection of the skeletons.
+ *
+ * @param sc The USpoofChecker.
+ * @param direction The context direction with which the identifier will be
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id The input identifier whose skeleton will be computed.
+ * @param length The length of the input identifier, expressed in 16 bit
+ * UTF-16 code units, or -1 if the string is zero terminated.
+ * @param dest The output buffer, to receive the skeleton string.
+ * @param destCapacity The length of the output buffer, in 16 bit units.
+ * The destCapacity may be zero, in which case the function will
+ * return the actual length of the skeleton.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check.
+ * @return The length of the skeleton string. The returned length
+ * is always that of the complete skeleton, even when the
+ * supplied buffer is too small (or of zero length)
+ *
+ * @draft ICU 74
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
+ UBiDiDirection direction,
+ const UChar *id, int32_t length,
+ UChar *dest, int32_t destCapacity, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
/**
* Get the "skeleton" for an identifier.
* Skeletons are a transformation of the input identifier;
* Two identifiers are confusable if their skeletons are identical.
- * See Unicode UAX #39 for additional information.
+ * See Unicode Technical Standard #39 for additional information.
*
* Using skeletons directly makes it possible to quickly check
* whether an identifier is confusable with any of some large
@@ -1273,6 +1459,46 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
char *dest, int32_t destCapacity,
UErrorCode *status);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Get the "bidiSkeleton" for an identifier and a direction.
+ * Skeletons are a transformation of the input identifier;
+ * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ * they are RTL-confusable if their RTL bidiSkeletons are identical.
+ * See Unicode Technical Standard #39 for additional information:
+ * https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ * Using skeletons directly makes it possible to quickly check
+ * whether an identifier is confusable with any of some large
+ * set of existing identifiers, by creating an efficiently
+ * searchable collection of the skeletons.
+ *
+ * @param sc The USpoofChecker
+ * @param direction The context direction with which the identifier will be
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id The UTF-8 format identifier whose skeleton will be computed.
+ * @param length The length of the input string, in bytes,
+ * or -1 if the string is zero terminated.
+ * @param dest The output buffer, to receive the skeleton string.
+ * @param destCapacity The length of the output buffer, in bytes.
+ * The destCapacity may be zero, in which case the function will
+ * return the actual length of the skeleton.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check. Possible Errors include U_INVALID_CHAR_FOUND
+ * for invalid UTF-8 sequences, and
+ * U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
+ * to hold the complete skeleton.
+ * @return The length of the skeleton string, in bytes. The returned length
+ * is always that of the complete skeleton, even when the
+ * supplied buffer is too small (or of zero length)
+ *
+ * @draft ICU 74
+ */
+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+ const char *id, int32_t length, char *dest,
+ int32_t destCapacity, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
/**
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
* in http://unicode.org/Public/security/latest/xidmodifications.txt
@@ -1510,11 +1736,42 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
const icu::UnicodeString &s2,
UErrorCode *status);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.
+ *
+ * @param sc The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param s1 The first of the two identifiers to be compared for
+ * confusability. The strings are in UTF-8 format.
+ * @param s2 The second of the two identifiers to be compared for
+ * confusability. The strings are in UTF-8 format.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check.
+ * Confusability of the identifiers is not reported here,
+ * but through this function's return value.
+ * @return An integer value with bit(s) set corresponding to
+ * the type of confusability found, as defined by
+ * enum USpoofChecks. Zero is returned if the identifiers
+ * are not confusable.
+ *
+ * @draft ICU 74
+ *
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
+ UBiDiDirection direction,
+ const icu::UnicodeString &s1,
+ const icu::UnicodeString &s2,
+ UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
/**
* Get the "skeleton" for an identifier.
* Skeletons are a transformation of the input identifier;
* Two identifiers are confusable if their skeletons are identical.
- * See Unicode UAX #39 for additional information.
+ * See Unicode Technical Standard #39 for additional information.
*
* Using skeletons directly makes it possible to quickly check
* whether an identifier is confusable with any of some large
@@ -1540,6 +1797,36 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
icu::UnicodeString &dest,
UErrorCode *status);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Get the "bidiSkeleton" for an identifier and a direction.
+ * Skeletons are a transformation of the input identifier;
+ * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ * they are RTL-confusable if their RTL bidiSkeletons are identical.
+ * See Unicode Technical Standard #39 for additional information.
+ * https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ * Using skeletons directly makes it possible to quickly check
+ * whether an identifier is confusable with any of some large
+ * set of existing identifiers, by creating an efficiently
+ * searchable collection of the skeletons.
+ *
+ * @param sc The USpoofChecker.
+ * @param direction The context direction with which the identifier will be
+ * displayed. Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id The input identifier whose bidiSkeleton will be computed.
+ * @param dest The output identifier, to receive the skeleton string.
+ * @param status The error code, set if an error occurred while attempting to
+ * perform the check.
+ * @return A reference to the destination (skeleton) string.
+ *
+ * @draft ICU 74
+ */
+U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(
+ const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
+ icu::UnicodeString &dest, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
/**
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
* in http://unicode.org/Public/security/latest/xidmodifications.txt
diff --git a/thirdparty/icu4c/i18n/uspoof.cpp b/thirdparty/icu4c/i18n/uspoof.cpp
index 271caeafff9..47dac8418e4 100644
--- a/thirdparty/icu4c/i18n/uspoof.cpp
+++ b/thirdparty/icu4c/i18n/uspoof.cpp
@@ -15,6 +15,7 @@
*
* Unicode Spoof Detection
*/
+#include "unicode/ubidi.h"
#include "unicode/utypes.h"
#include "unicode/normalizer2.h"
#include "unicode/uspoof.h"
@@ -141,8 +142,8 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
u"\\U0001DF00-\\U0001DF1E\\U0001DF25-\\U0001DF2A\\U0001E08F\\U0001E7E0-"
u"\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED\\U0001E7EE\\U0001E7F0-"
u"\\U0001E7FE\\U00020000-\\U0002A6DF\\U0002A700-\\U0002B739\\U0002B740-"
- u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U00030000-"
- u"\\U0003134A\\U00031350-\\U000323AF]";
+ u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U0002EBF0-"
+ u"\\U0002EE5D\\U00030000-\\U0003134A\\U00031350-\\U000323AF]";
gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status);
if (gRecommendedSet == nullptr) {
@@ -538,6 +539,90 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
return result;
}
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
+ const char16_t *id1, int32_t length1,
+ const char16_t *id2, int32_t length2,
+ UErrorCode *status) {
+ UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
+ UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
+ if (id1Str.isBogus() || id2Str.isBogus()) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
+}
+
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+ const char *id1, int32_t length1, const char *id2,
+ int32_t length2, UErrorCode *status) {
+ if (length1 < -1 || length2 < -1) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ UnicodeString id1Str = UnicodeString::fromUTF8(
+ StringPiece(id1, length1 >= 0 ? length1 : static_cast