Merge pull request #84289 from bruvzg/icu741
ICU4C: Update to version 74.1
This commit is contained in:
commit
9b2686c333
65 changed files with 8399 additions and 6799 deletions
|
@ -401,6 +401,8 @@ if env["builtin_icu4c"]:
|
|||
"common/uloc.cpp",
|
||||
"common/uloc_keytype.cpp",
|
||||
"common/uloc_tag.cpp",
|
||||
"common/ulocale.cpp",
|
||||
"common/ulocbuilder.cpp",
|
||||
"common/umapfile.cpp",
|
||||
"common/umath.cpp",
|
||||
"common/umutablecptrie.cpp",
|
||||
|
@ -466,7 +468,7 @@ if env["builtin_icu4c"]:
|
|||
]
|
||||
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
|
||||
|
||||
icu_data_name = "icudt73l.dat"
|
||||
icu_data_name = "icudt74l.dat"
|
||||
|
||||
if env.editor_build:
|
||||
env_icu.Depends("#thirdparty/icu4c/icudata.gen.h", "#thirdparty/icu4c/" + icu_data_name)
|
||||
|
|
|
@ -623,6 +623,8 @@ thirdparty_icu_sources = [
|
|||
"common/uloc.cpp",
|
||||
"common/uloc_keytype.cpp",
|
||||
"common/uloc_tag.cpp",
|
||||
"common/ulocale.cpp",
|
||||
"common/ulocbuilder.cpp",
|
||||
"common/umapfile.cpp",
|
||||
"common/umath.cpp",
|
||||
"common/umutablecptrie.cpp",
|
||||
|
@ -688,7 +690,7 @@ thirdparty_icu_sources = [
|
|||
]
|
||||
thirdparty_icu_sources = [thirdparty_icu_dir + file for file in thirdparty_icu_sources]
|
||||
|
||||
icu_data_name = "icudt73l.dat"
|
||||
icu_data_name = "icudt74l.dat"
|
||||
|
||||
if env["static_icu_data"]:
|
||||
env_icu.Depends("../../../thirdparty/icu4c/icudata.gen.h", "../../../thirdparty/icu4c/" + icu_data_name)
|
||||
|
|
6
thirdparty/README.md
vendored
6
thirdparty/README.md
vendored
|
@ -389,7 +389,7 @@ Files extracted from upstream source:
|
|||
## icu4c
|
||||
|
||||
- Upstream: https://github.com/unicode-org/icu
|
||||
- Version: 73.2 (680f521746a3bd6a86f25f25ee50a62d88b489cf, 2023)
|
||||
- Version: 74.1 (9edac7b78327a1cb58db29e2714b15f9fa14e4d7, 2023)
|
||||
- License: Unicode
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
@ -401,7 +401,7 @@ Files extracted from upstream source:
|
|||
|
||||
Files generated from upstream source:
|
||||
|
||||
- The `icudt73l.dat` built with the provided `godot_data.json` config file (see
|
||||
- The `icudt74l.dat` built with the provided `godot_data.json` config file (see
|
||||
https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md
|
||||
for instructions).
|
||||
|
||||
|
@ -411,7 +411,7 @@ Files generated from upstream source:
|
|||
3. Reconfigure ICU with custom data config:
|
||||
`ICU_DATA_FILTER_FILE={GODOT_SOURCE}/thirdparty/icu4c/godot_data.json ./runConfigureICU {PLATFORM} --with-data-packaging=common`
|
||||
4. Delete `data/out` folder and rebuild data: `cd data && rm -rf ./out && make`
|
||||
5. Copy `source/data/out/icudt73l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt73l.dat`
|
||||
5. Copy `source/data/out/icudt74l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt74l.dat`
|
||||
|
||||
|
||||
## jpeg-compressor
|
||||
|
|
73
thirdparty/icu4c/LICENSE
vendored
73
thirdparty/icu4c/LICENSE
vendored
|
@ -1,49 +1,42 @@
|
|||
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
||||
|
||||
See Terms of Use <https://www.unicode.org/copyright.html>
|
||||
for definitions of Unicode Inc.’s Data Files and Software.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
UNICODE LICENSE V3
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2023 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
|
||||
Copyright © 2016-2023 Unicode, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
NOTICE TO USER: Carefully read the following legal agreement. BY
|
||||
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
|
||||
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
|
||||
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of data files and any associated documentation (the "Data Files") or
|
||||
software and any associated documentation (the "Software") to deal in the
|
||||
Data Files or Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
||||
copies of the Data Files or Software, and to permit persons to whom the
|
||||
Data Files or Software are furnished to do so, provided that either (a)
|
||||
this copyright and permission notice appear with all copies of the Data
|
||||
Files or Software, or (b) this copyright and permission notice appear in
|
||||
associated Documentation.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
|
||||
THIRD PARTY RIGHTS.
|
||||
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
|
||||
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
|
||||
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
|
||||
FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder shall
|
||||
not be used in advertising or otherwise to promote the sale, use or other
|
||||
dealings in these Data Files or Software without prior written
|
||||
authorization of the copyright holder.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
|
|
121
thirdparty/icu4c/common/brkeng.cpp
vendored
121
thirdparty/icu4c/common/brkeng.cpp
vendored
|
@ -21,6 +21,7 @@
|
|||
#include "unicode/uscript.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/rbbi.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
|
|||
}
|
||||
|
||||
UBool
|
||||
UnhandledEngine::handles(UChar32 c) const {
|
||||
UnhandledEngine::handles(UChar32 c, const char* locale) const {
|
||||
(void)locale; // Unused
|
||||
return fHandled && fHandled->contains(c);
|
||||
}
|
||||
|
||||
int32_t
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t /* startPos */,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &/*foundBreaks*/,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
UChar32 c = utext_current32(text);
|
||||
utext_setNativeIndex(text, startPos);
|
||||
UChar32 c = utext_current32(text);
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
|
@ -120,41 +123,39 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
|
|||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV _deleteEngine(void *obj) {
|
||||
delete (const icu::LanguageBreakEngine *) obj;
|
||||
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
|
||||
static UMutex gBreakEngineMutex;
|
||||
Mutex m(&gBreakEngineMutex);
|
||||
if (fEngines == nullptr) {
|
||||
LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
fEngines = engines.orphan();
|
||||
}
|
||||
}
|
||||
}
|
||||
U_CDECL_END
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
||||
ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
|
||||
const LanguageBreakEngine *lbe = nullptr;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureEngines(status);
|
||||
if (U_FAILURE(status) ) {
|
||||
// Note: no way to return error code to caller.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static UMutex gBreakEngineMutex;
|
||||
Mutex m(&gBreakEngineMutex);
|
||||
|
||||
if (fEngines == nullptr) {
|
||||
LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
|
||||
if (U_FAILURE(status) ) {
|
||||
// Note: no way to return error code to caller.
|
||||
return nullptr;
|
||||
}
|
||||
fEngines = engines.orphan();
|
||||
} else {
|
||||
int32_t i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != nullptr && lbe->handles(c)) {
|
||||
return lbe;
|
||||
}
|
||||
int32_t i = fEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
|
||||
if (lbe != nullptr && lbe->handles(c, locale)) {
|
||||
return lbe;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// We didn't find an engine. Create one.
|
||||
lbe = loadEngineFor(c);
|
||||
lbe = loadEngineFor(c, locale);
|
||||
if (lbe != nullptr) {
|
||||
fEngines->push((void *)lbe, status);
|
||||
}
|
||||
|
@ -162,7 +163,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
|
|||
}
|
||||
|
||||
const LanguageBreakEngine *
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
|
||||
ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UScriptCode code = uscript_getScript(c, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
|
@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void ICULanguageBreakFactory::addExternalEngine(
|
||||
ExternalBreakEngine* external, UErrorCode& status) {
|
||||
LocalPointer<ExternalBreakEngine> engine(external, status);
|
||||
ensureEngines(status);
|
||||
LocalPointer<BreakEngineWrapper> wrapper(
|
||||
new BreakEngineWrapper(engine.orphan(), status), status);
|
||||
static UMutex gBreakEngineMutex;
|
||||
Mutex m(&gBreakEngineMutex);
|
||||
fEngines->push(wrapper.getAlias(), status);
|
||||
wrapper.orphan();
|
||||
}
|
||||
|
||||
BreakEngineWrapper::BreakEngineWrapper(
|
||||
ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) {
|
||||
}
|
||||
|
||||
BreakEngineWrapper::~BreakEngineWrapper() {
|
||||
}
|
||||
|
||||
UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const {
|
||||
return delegate->isFor(c, locale);
|
||||
}
|
||||
|
||||
int32_t BreakEngineWrapper::findBreaks(
|
||||
UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool /* isPhraseBreaking */,
|
||||
UErrorCode &status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
int32_t result = 0;
|
||||
|
||||
// Find the span of characters included in the set.
|
||||
// The span to break begins at the current position in the text, and
|
||||
// extends towards the start or end of the text, depending on 'reverse'.
|
||||
|
||||
utext_setNativeIndex(text, startPos);
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
int32_t current;
|
||||
int32_t rangeStart;
|
||||
int32_t rangeEnd;
|
||||
UChar32 c = utext_current32(text);
|
||||
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
|
||||
utext_next32(text); // TODO: recast loop for postincrement
|
||||
c = utext_current32(text);
|
||||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
int32_t beforeSize = foundBreaks.size();
|
||||
int32_t additionalCapacity = rangeEnd - rangeStart + 1;
|
||||
// enlarge to contains (rangeEnd-rangeStart+1) more items
|
||||
foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status);
|
||||
if (U_FAILURE(status)) return 0;
|
||||
foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity);
|
||||
result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize,
|
||||
additionalCapacity, status);
|
||||
if (U_FAILURE(status)) return 0;
|
||||
foundBreaks.setSize(beforeSize + result);
|
||||
utext_setNativeIndex(text, current);
|
||||
return result;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
|
59
thirdparty/icu4c/common/brkeng.h
vendored
59
thirdparty/icu4c/common/brkeng.h
vendored
|
@ -10,6 +10,7 @@
|
|||
#ifndef BRKENG_H
|
||||
#define BRKENG_H
|
||||
|
||||
#include "unicode/umisc.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
|
@ -21,6 +22,7 @@ class UnicodeSet;
|
|||
class UStack;
|
||||
class UVector32;
|
||||
class DictionaryMatcher;
|
||||
class ExternalBreakEngine;
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakEngine
|
||||
|
@ -35,7 +37,7 @@ class DictionaryMatcher;
|
|||
* <p>LanguageBreakEngines should normally be implemented so as to
|
||||
* be shared between threads without locking.</p>
|
||||
*/
|
||||
class LanguageBreakEngine : public UMemory {
|
||||
class LanguageBreakEngine : public UObject {
|
||||
public:
|
||||
|
||||
/**
|
||||
|
@ -54,10 +56,11 @@ class LanguageBreakEngine : public UMemory {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param locale The locale.
|
||||
* @return true if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c) const = 0;
|
||||
virtual UBool handles(UChar32 c, const char* locale) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
@ -80,6 +83,35 @@ class LanguageBreakEngine : public UMemory {
|
|||
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* BreakEngineWrapper
|
||||
*/
|
||||
|
||||
/**
|
||||
* <p>BreakEngineWrapper implement LanguageBreakEngine by
|
||||
* a thin wrapper that delegate the task to ExternalBreakEngine
|
||||
* </p>
|
||||
*/
|
||||
class BreakEngineWrapper : public LanguageBreakEngine {
|
||||
public:
|
||||
|
||||
BreakEngineWrapper(ExternalBreakEngine* engine, UErrorCode &status);
|
||||
|
||||
virtual ~BreakEngineWrapper();
|
||||
|
||||
virtual UBool handles(UChar32 c, const char* locale) const override;
|
||||
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UVector32 &foundBreaks,
|
||||
UBool isPhraseBreaking,
|
||||
UErrorCode &status) const override;
|
||||
|
||||
private:
|
||||
LocalPointer<ExternalBreakEngine> delegate;
|
||||
};
|
||||
|
||||
/*******************************************************************
|
||||
* LanguageBreakFactory
|
||||
*/
|
||||
|
@ -125,9 +157,10 @@ class LanguageBreakFactory : public UMemory {
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param locale The locale.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0;
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) = 0;
|
||||
|
||||
};
|
||||
|
||||
|
@ -174,10 +207,11 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param locale The locale.
|
||||
* @return true if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c) const override;
|
||||
virtual UBool handles(UChar32 c, const char* locale) const override;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
@ -247,9 +281,18 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param locale The locale.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
|
||||
virtual const LanguageBreakEngine *getEngineFor(UChar32 c, const char* locale) override;
|
||||
|
||||
/**
|
||||
* Add and adopt the engine and return an URegistryKey.
|
||||
* @param engine The ExternalBreakEngine to be added and adopt. The caller
|
||||
* pass the ownership and should not release the memory after this.
|
||||
* @param status the error code.
|
||||
*/
|
||||
virtual void addExternalEngine(ExternalBreakEngine* engine, UErrorCode& status);
|
||||
|
||||
protected:
|
||||
/**
|
||||
|
@ -258,9 +301,10 @@ protected:
|
|||
*
|
||||
* @param c A character that begins a run for which a LanguageBreakEngine is
|
||||
* sought.
|
||||
* @param locale The locale.
|
||||
* @return A LanguageBreakEngine with the desired characteristics, or 0.
|
||||
*/
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c);
|
||||
virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, const char* locale);
|
||||
|
||||
/**
|
||||
* <p>Create a DictionaryMatcher for the specified script and break type.</p>
|
||||
|
@ -269,6 +313,9 @@ protected:
|
|||
* @return A DictionaryMatcher with the desired characteristics, or nullptr.
|
||||
*/
|
||||
virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script);
|
||||
|
||||
private:
|
||||
void ensureEngines(UErrorCode& status);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
14
thirdparty/icu4c/common/brkiter.cpp
vendored
14
thirdparty/icu4c/common/brkiter.cpp
vendored
|
@ -27,6 +27,7 @@
|
|||
#include "unicode/rbbi.h"
|
||||
#include "unicode/brkiter.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/filteredbrk.h"
|
||||
|
@ -121,8 +122,11 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
|
|||
// If there is a result, set the valid locale and actual locale, and the kind
|
||||
if (U_SUCCESS(status) && result != nullptr) {
|
||||
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
|
||||
|
||||
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
|
||||
actualLocale.data());
|
||||
uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY);
|
||||
result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
|
||||
}
|
||||
|
||||
ures_close(b);
|
||||
|
@ -202,18 +206,20 @@ BreakIterator::getAvailableLocales(int32_t& count)
|
|||
|
||||
BreakIterator::BreakIterator()
|
||||
{
|
||||
*validLocale = *actualLocale = 0;
|
||||
*validLocale = *actualLocale = *requestLocale = 0;
|
||||
}
|
||||
|
||||
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
|
||||
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
|
||||
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
|
||||
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
|
||||
}
|
||||
|
||||
BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
|
||||
if (this != &other) {
|
||||
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
|
||||
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
|
||||
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -493,12 +499,18 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
|
|||
|
||||
Locale
|
||||
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
if (type == ULOC_REQUESTED_LOCALE) {
|
||||
return Locale(requestLocale);
|
||||
}
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocale(type, status);
|
||||
}
|
||||
|
||||
const char *
|
||||
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
|
||||
if (type == ULOC_REQUESTED_LOCALE) {
|
||||
return requestLocale;
|
||||
}
|
||||
U_LOCALE_BASED(locBased, *this);
|
||||
return locBased.getLocaleID(type, status);
|
||||
}
|
||||
|
|
10
thirdparty/icu4c/common/characterproperties.cpp
vendored
10
thirdparty/icu4c/common/characterproperties.cpp
vendored
|
@ -169,7 +169,7 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
|||
case UPROPS_SRC_INPC:
|
||||
case UPROPS_SRC_INSC:
|
||||
case UPROPS_SRC_VO:
|
||||
uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
|
||||
uprops_addPropertyStarts(src, &sa, &errorCode);
|
||||
break;
|
||||
case UPROPS_SRC_EMOJI: {
|
||||
const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
|
||||
|
@ -178,6 +178,14 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case UPROPS_SRC_IDSU:
|
||||
// New in Unicode 15.1 for just two characters.
|
||||
sa.add(sa.set, 0x2FFE);
|
||||
sa.add(sa.set, 0x2FFF + 1);
|
||||
break;
|
||||
case UPROPS_SRC_ID_COMPAT_MATH:
|
||||
uprops_addPropertyStarts(src, &sa, &errorCode);
|
||||
break;
|
||||
default:
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
|
|
4
thirdparty/icu4c/common/dictbe.cpp
vendored
4
thirdparty/icu4c/common/dictbe.cpp
vendored
|
@ -42,7 +42,7 @@ DictionaryBreakEngine::~DictionaryBreakEngine() {
|
|||
}
|
||||
|
||||
UBool
|
||||
DictionaryBreakEngine::handles(UChar32 c) const {
|
||||
DictionaryBreakEngine::handles(UChar32 c, const char*) const {
|
||||
return fSet.contains(c);
|
||||
}
|
||||
|
||||
|
@ -54,13 +54,13 @@ DictionaryBreakEngine::findBreaks( UText *text,
|
|||
UBool isPhraseBreaking,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) return 0;
|
||||
(void)startPos; // TODO: remove this param?
|
||||
int32_t result = 0;
|
||||
|
||||
// Find the span of characters included in the set.
|
||||
// The span to break begins at the current position in the text, and
|
||||
// extends towards the start or end of the text, depending on 'reverse'.
|
||||
|
||||
utext_setNativeIndex(text, startPos);
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
int32_t current;
|
||||
int32_t rangeStart;
|
||||
|
|
3
thirdparty/icu4c/common/dictbe.h
vendored
3
thirdparty/icu4c/common/dictbe.h
vendored
|
@ -62,10 +62,11 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
* a particular kind of break.</p>
|
||||
*
|
||||
* @param c A character which begins a run that the engine might handle
|
||||
* @param locale The locale.
|
||||
* @return true if this engine handles the particular character and break
|
||||
* type.
|
||||
*/
|
||||
virtual UBool handles(UChar32 c) const override;
|
||||
virtual UBool handles(UChar32 c, const char* locale) const override;
|
||||
|
||||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
|
|
|
@ -143,6 +143,9 @@ static icu::UInitOnce nfkcInitOnce {};
|
|||
static Norm2AllModes *nfkc_cfSingleton;
|
||||
static icu::UInitOnce nfkc_cfInitOnce {};
|
||||
|
||||
static Norm2AllModes *nfkc_scfSingleton;
|
||||
static icu::UInitOnce nfkc_scfInitOnce {};
|
||||
|
||||
static UHashtable *cache=nullptr;
|
||||
|
||||
// UInitOnce singleton initialization function
|
||||
|
@ -156,6 +159,8 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
|||
nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
||||
nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
|
||||
} else if (uprv_strcmp(what, "nfkc_scf") == 0) {
|
||||
nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
|
||||
} else {
|
||||
UPRV_UNREACHABLE_EXIT; // Unknown singleton
|
||||
}
|
||||
|
@ -183,6 +188,10 @@ static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
|||
nfkc_cfSingleton = nullptr;
|
||||
nfkc_cfInitOnce.reset();
|
||||
|
||||
delete nfkc_scfSingleton;
|
||||
nfkc_scfSingleton = nullptr;
|
||||
nfkc_scfInitOnce.reset();
|
||||
|
||||
uhash_close(cache);
|
||||
cache=nullptr;
|
||||
return true;
|
||||
|
@ -213,6 +222,13 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
|||
return nfkc_cfSingleton;
|
||||
}
|
||||
|
||||
const Norm2AllModes *
|
||||
Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return nullptr; }
|
||||
umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
|
||||
return nfkc_scfSingleton;
|
||||
}
|
||||
|
||||
#if !NORM2_HARDCODE_NFC_DATA
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
||||
|
@ -261,6 +277,12 @@ Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
|||
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
|
||||
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
|
||||
return allModes!=nullptr ? &allModes->comp : nullptr;
|
||||
}
|
||||
|
||||
const Normalizer2 *
|
||||
Normalizer2::getInstance(const char *packageName,
|
||||
const char *name,
|
||||
|
@ -281,6 +303,8 @@ Normalizer2::getInstance(const char *packageName,
|
|||
allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
||||
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
||||
} else if(0==uprv_strcmp(name, "nfkc_scf")) {
|
||||
allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
|
||||
}
|
||||
}
|
||||
if(allModes==nullptr && U_SUCCESS(errorCode)) {
|
||||
|
@ -393,6 +417,11 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
|||
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
|
||||
return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getInstance(const char *packageName,
|
||||
const char *name,
|
||||
|
|
2532
thirdparty/icu4c/common/localefallback_data.h
vendored
2532
thirdparty/icu4c/common/localefallback_data.h
vendored
File diff suppressed because it is too large
Load diff
2
thirdparty/icu4c/common/localematcher.cpp
vendored
2
thirdparty/icu4c/common/localematcher.cpp
vendored
|
@ -307,7 +307,7 @@ LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale
|
|||
if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
|
||||
return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
71
thirdparty/icu4c/common/locid.cpp
vendored
71
thirdparty/icu4c/common/locid.cpp
vendored
|
@ -563,7 +563,7 @@ private:
|
|||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length,
|
||||
void (*checkType)(const char* type),
|
||||
void (*checkReplacement)(const UnicodeString& replacement),
|
||||
void (*checkReplacement)(const UChar* replacement),
|
||||
UErrorCode &status);
|
||||
|
||||
// Read the languageAlias data from alias to
|
||||
|
@ -700,7 +700,7 @@ AliasDataBuilder::readAlias(
|
|||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length,
|
||||
void (*checkType)(const char* type),
|
||||
void (*checkReplacement)(const UnicodeString& replacement),
|
||||
void (*checkReplacement)(const UChar* replacement),
|
||||
UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
|
@ -720,8 +720,8 @@ AliasDataBuilder::readAlias(
|
|||
LocalUResourceBundlePointer res(
|
||||
ures_getNextResource(alias, nullptr, &status));
|
||||
const char* aliasFrom = ures_getKey(res.getAlias());
|
||||
UnicodeString aliasTo =
|
||||
ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
|
||||
const UChar* aliasTo =
|
||||
ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status);
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
checkType(aliasFrom);
|
||||
|
@ -766,7 +766,7 @@ AliasDataBuilder::readLanguageAlias(
|
|||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) {}, status);
|
||||
[](const UChar*) {}, status);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -790,12 +790,12 @@ AliasDataBuilder::readScriptAlias(
|
|||
[](const char* type) {
|
||||
U_ASSERT(uprv_strlen(type) == 4);
|
||||
},
|
||||
[](const UnicodeString& replacement) {
|
||||
U_ASSERT(replacement.length() == 4);
|
||||
[](const UChar* replacement) {
|
||||
U_ASSERT(u_strlen(replacement) == 4);
|
||||
},
|
||||
#else
|
||||
[](const char*) {},
|
||||
[](const UnicodeString&) { },
|
||||
[](const UChar*) { },
|
||||
#endif
|
||||
status);
|
||||
}
|
||||
|
@ -824,7 +824,7 @@ AliasDataBuilder::readTerritoryAlias(
|
|||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) { },
|
||||
[](const UChar*) { },
|
||||
status);
|
||||
}
|
||||
|
||||
|
@ -851,15 +851,16 @@ AliasDataBuilder::readVariantAlias(
|
|||
U_ASSERT(uprv_strlen(type) != 4 ||
|
||||
(type[0] >= '0' && type[0] <= '9'));
|
||||
},
|
||||
[](const UnicodeString& replacement) {
|
||||
U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8);
|
||||
U_ASSERT(replacement.length() != 4 ||
|
||||
(replacement.charAt(0) >= u'0' &&
|
||||
replacement.charAt(0) <= u'9'));
|
||||
[](const UChar* replacement) {
|
||||
int32_t len = u_strlen(replacement);
|
||||
U_ASSERT(len >= 4 && len <= 8);
|
||||
U_ASSERT(len != 4 ||
|
||||
(*replacement >= u'0' &&
|
||||
*replacement <= u'9'));
|
||||
},
|
||||
#else
|
||||
[](const char*) {},
|
||||
[](const UnicodeString&) { },
|
||||
[](const UChar*) { },
|
||||
#endif
|
||||
status);
|
||||
}
|
||||
|
@ -888,7 +889,7 @@ AliasDataBuilder::readSubdivisionAlias(
|
|||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) { },
|
||||
[](const UChar*) { },
|
||||
status);
|
||||
}
|
||||
|
||||
|
@ -1066,7 +1067,13 @@ class AliasReplacer {
|
|||
public:
|
||||
AliasReplacer(UErrorCode status) :
|
||||
language(nullptr), script(nullptr), region(nullptr),
|
||||
extensions(nullptr), variants(status),
|
||||
extensions(nullptr),
|
||||
// store value in variants only once
|
||||
variants(nullptr,
|
||||
([](UElement e1, UElement e2) -> UBool {
|
||||
return 0==uprv_strcmp((const char*)e1.pointer,
|
||||
(const char*)e2.pointer);}),
|
||||
status),
|
||||
data(nullptr) {
|
||||
}
|
||||
~AliasReplacer() {
|
||||
|
@ -1652,10 +1659,16 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
|||
while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
|
||||
U_SUCCESS(status)) {
|
||||
*end = NULL_CHAR; // null terminate inside variantsBuff
|
||||
variants.addElement(start, status);
|
||||
// do not add "" or duplicate data to variants
|
||||
if (*start && !variants.contains(start)) {
|
||||
variants.addElement(start, status);
|
||||
}
|
||||
start = end + 1;
|
||||
}
|
||||
variants.addElement(start, status);
|
||||
// do not add "" or duplicate data to variants
|
||||
if (*start && !variants.contains(start)) {
|
||||
variants.addElement(start, status);
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
|
||||
|
@ -2079,6 +2092,10 @@ Locale::addLikelySubtags(UErrorCode& status) {
|
|||
|
||||
void
|
||||
Locale::minimizeSubtags(UErrorCode& status) {
|
||||
Locale::minimizeSubtags(false, status);
|
||||
}
|
||||
void
|
||||
Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
@ -2086,7 +2103,7 @@ Locale::minimizeSubtags(UErrorCode& status) {
|
|||
CharString minimizedLocaleID;
|
||||
{
|
||||
CharStringByteSink sink(&minimizedLocaleID);
|
||||
ulocimp_minimizeSubtags(fullName, sink, &status);
|
||||
ulocimp_minimizeSubtags(fullName, sink, favorScript, &status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -2402,8 +2419,9 @@ Locale::getLocaleCache()
|
|||
}
|
||||
|
||||
class KeywordEnumeration : public StringEnumeration {
|
||||
private:
|
||||
protected:
|
||||
char *keywords;
|
||||
private:
|
||||
char *current;
|
||||
int32_t length;
|
||||
UnicodeString currUSKey;
|
||||
|
@ -2510,6 +2528,17 @@ public:
|
|||
if (resultLength != nullptr) *resultLength = 0;
|
||||
return nullptr;
|
||||
}
|
||||
virtual int32_t count(UErrorCode &/*status*/) const override {
|
||||
char *kw = keywords;
|
||||
int32_t result = 0;
|
||||
while(*kw) {
|
||||
if (uloc_toUnicodeLocaleKey(kw) != nullptr) {
|
||||
result++;
|
||||
}
|
||||
kw += uprv_strlen(kw)+1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// Out-of-line virtual destructor to serve as the "key function".
|
||||
|
|
752
thirdparty/icu4c/common/loclikely.cpp
vendored
752
thirdparty/icu4c/common/loclikely.cpp
vendored
|
@ -31,82 +31,10 @@
|
|||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "loclikelysubtags.h"
|
||||
#include "ulocimp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/**
|
||||
* These are the canonical strings for unknown languages, scripts and regions.
|
||||
**/
|
||||
static const char* const unknownLanguage = "und";
|
||||
static const char* const unknownScript = "Zzzz";
|
||||
static const char* const unknownRegion = "ZZ";
|
||||
|
||||
/**
|
||||
* This function looks for the localeID in the likelySubtags resource.
|
||||
*
|
||||
* @param localeID The tag to find.
|
||||
* @param buffer A buffer to hold the matching entry
|
||||
* @param bufferLength The length of the output buffer
|
||||
* @return A pointer to "buffer" if found, or a null pointer if not.
|
||||
*/
|
||||
static const char* U_CALLCONV
|
||||
findLikelySubtags(const char* localeID,
|
||||
char* buffer,
|
||||
int32_t bufferLength,
|
||||
UErrorCode* err) {
|
||||
const char* result = nullptr;
|
||||
|
||||
if (!U_FAILURE(*err)) {
|
||||
int32_t resLen = 0;
|
||||
const char16_t* s = nullptr;
|
||||
UErrorCode tmpErr = U_ZERO_ERROR;
|
||||
icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));
|
||||
if (U_SUCCESS(tmpErr)) {
|
||||
icu::CharString und;
|
||||
if (localeID != nullptr) {
|
||||
if (*localeID == '\0') {
|
||||
localeID = unknownLanguage;
|
||||
} else if (*localeID == '_') {
|
||||
und.append(unknownLanguage, *err);
|
||||
und.append(localeID, *err);
|
||||
if (U_FAILURE(*err)) {
|
||||
return nullptr;
|
||||
}
|
||||
localeID = und.data();
|
||||
}
|
||||
}
|
||||
s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
|
||||
|
||||
if (U_FAILURE(tmpErr)) {
|
||||
/*
|
||||
* If a resource is missing, it's not really an error, it's
|
||||
* just that we don't have any data for that particular locale ID.
|
||||
*/
|
||||
if (tmpErr != U_MISSING_RESOURCE_ERROR) {
|
||||
*err = tmpErr;
|
||||
}
|
||||
}
|
||||
else if (resLen >= bufferLength) {
|
||||
/* The buffer should never overflow. */
|
||||
*err = U_INTERNAL_PROGRAM_ERROR;
|
||||
}
|
||||
else {
|
||||
u_UCharsToChars(s, buffer, resLen + 1);
|
||||
if (resLen >= 3 &&
|
||||
uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
|
||||
(resLen == 3 || buffer[3] == '_')) {
|
||||
uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
|
||||
}
|
||||
result = buffer;
|
||||
}
|
||||
} else {
|
||||
*err = tmpErr;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a tag to a buffer, adding the separator if necessary. The buffer
|
||||
* must be large enough to contain the resulting tag plus any separator
|
||||
|
@ -360,57 +288,6 @@ error:
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tag string from the supplied parameters. The lang, script and region
|
||||
* parameters may be nullptr pointers. If they are, their corresponding length parameters
|
||||
* must be less than or equal to 0. If the lang parameter is an empty string, the
|
||||
* default value for an unknown language is written to the output buffer.
|
||||
*
|
||||
* If the length of the new string exceeds the capacity of the output buffer,
|
||||
* the function copies as many bytes to the output buffer as it can, and returns
|
||||
* the error U_BUFFER_OVERFLOW_ERROR.
|
||||
*
|
||||
* If an illegal argument is provided, the function returns the error
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* @param lang The language tag to use.
|
||||
* @param langLength The length of the language tag.
|
||||
* @param script The script tag to use.
|
||||
* @param scriptLength The length of the script tag.
|
||||
* @param region The region tag to use.
|
||||
* @param regionLength The length of the region tag.
|
||||
* @param trailing Any trailing data to append to the new tag.
|
||||
* @param trailingLength The length of the trailing data.
|
||||
* @param sink The output sink receiving the tag string.
|
||||
* @param err A pointer to a UErrorCode for error reporting.
|
||||
**/
|
||||
static void U_CALLCONV
|
||||
createTagString(
|
||||
const char* lang,
|
||||
int32_t langLength,
|
||||
const char* script,
|
||||
int32_t scriptLength,
|
||||
const char* region,
|
||||
int32_t regionLength,
|
||||
const char* trailing,
|
||||
int32_t trailingLength,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err)
|
||||
{
|
||||
createTagStringWithAlternates(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the language, script, and region subtags from a tag string, and copy the
|
||||
* results into the corresponding output parameters. The buffers are null-terminated,
|
||||
|
@ -494,13 +371,6 @@ parseTagString(
|
|||
*scriptLength = subtagLength;
|
||||
|
||||
if (*scriptLength > 0) {
|
||||
if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
|
||||
/**
|
||||
* If the script part is the "unknown" script, then don't return it.
|
||||
**/
|
||||
*scriptLength = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move past any separator.
|
||||
*/
|
||||
|
@ -517,14 +387,7 @@ parseTagString(
|
|||
|
||||
*regionLength = subtagLength;
|
||||
|
||||
if (*regionLength > 0) {
|
||||
if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
|
||||
/**
|
||||
* If the region part is the "unknown" region, then don't return it.
|
||||
**/
|
||||
*regionLength = 0;
|
||||
}
|
||||
} else if (*position != 0 && *position != '@') {
|
||||
if (*regionLength <= 0 && *position != 0 && *position != '@') {
|
||||
/* back up over consumed trailing separator */
|
||||
--position;
|
||||
}
|
||||
|
@ -546,264 +409,6 @@ error:
|
|||
goto exit;
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
createLikelySubtagsString(
|
||||
const char* lang,
|
||||
int32_t langLength,
|
||||
const char* script,
|
||||
int32_t scriptLength,
|
||||
const char* region,
|
||||
int32_t regionLength,
|
||||
const char* variants,
|
||||
int32_t variantsLength,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err) {
|
||||
/**
|
||||
* ULOC_FULLNAME_CAPACITY will provide enough capacity
|
||||
* that we can build a string that contains the language,
|
||||
* script and region code without worrying about overrunning
|
||||
* the user-supplied buffer.
|
||||
**/
|
||||
char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with the script and region first.
|
||||
**/
|
||||
if (scriptLength > 0 && regionLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the script.
|
||||
**/
|
||||
if (scriptLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try the language with just the region.
|
||||
**/
|
||||
if (regionLength > 0) {
|
||||
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finally, try just the language.
|
||||
**/
|
||||
{
|
||||
const char* likelySubtags = nullptr;
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&tagBuffer);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
sink,
|
||||
err);
|
||||
}
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
likelySubtags =
|
||||
findLikelySubtags(
|
||||
tagBuffer.data(),
|
||||
likelySubtagsBuffer,
|
||||
sizeof(likelySubtagsBuffer),
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (likelySubtags != nullptr) {
|
||||
/* Always use the language tag from the
|
||||
maximal string, since it may be more
|
||||
specific than the one provided. */
|
||||
createTagStringWithAlternates(
|
||||
nullptr,
|
||||
0,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
variants,
|
||||
variantsLength,
|
||||
likelySubtags,
|
||||
sink,
|
||||
err);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
error:
|
||||
|
||||
if (!U_FAILURE(*err)) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
int32_t count = 0; \
|
||||
int32_t i; \
|
||||
|
@ -836,7 +441,6 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
const char* trailing = "";
|
||||
int32_t trailingLength = 0;
|
||||
int32_t trailingIndex = 0;
|
||||
UBool success = false;
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
|
@ -862,6 +466,9 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
|
||||
goto error;
|
||||
}
|
||||
if (langLength > 3) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Find the length of the trailing portion. */
|
||||
while (_isIDSeparator(localeID[trailingIndex])) {
|
||||
|
@ -871,30 +478,42 @@ _uloc_addLikelySubtags(const char* localeID,
|
|||
trailingLength = (int32_t)uprv_strlen(trailing);
|
||||
|
||||
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
|
||||
|
||||
success =
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
{
|
||||
const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
// We need to keep l on the stack because lsr may point into internal
|
||||
// memory of l.
|
||||
icu::Locale l = icu::Locale::createFromName(localeID);
|
||||
if (l.isBogus()) {
|
||||
goto error;
|
||||
}
|
||||
icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(l, true, *err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
const char* language = lsr.language;
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
createTagStringWithAlternates(
|
||||
language,
|
||||
(int32_t)uprv_strlen(language),
|
||||
lsr.script,
|
||||
(int32_t)uprv_strlen(lsr.script),
|
||||
lsr.region,
|
||||
(int32_t)uprv_strlen(lsr.region),
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
|
||||
if (!success) {
|
||||
const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
|
||||
|
||||
/*
|
||||
* If we get here, we need to return localeID.
|
||||
*/
|
||||
sink.Append(localeID, localIDLength);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
return true;
|
||||
|
||||
error:
|
||||
|
||||
|
@ -913,6 +532,7 @@ static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*)
|
|||
static void
|
||||
_uloc_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* err) {
|
||||
icu::CharString maximizedTagBuffer;
|
||||
|
||||
|
@ -925,7 +545,6 @@ _uloc_minimizeSubtags(const char* localeID,
|
|||
const char* trailing = "";
|
||||
int32_t trailingLength = 0;
|
||||
int32_t trailingIndex = 0;
|
||||
UBool successGetMax = false;
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
|
@ -964,213 +583,38 @@ _uloc_minimizeSubtags(const char* localeID,
|
|||
CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
|
||||
|
||||
{
|
||||
icu::CharString base;
|
||||
{
|
||||
icu::CharStringByteSink baseSink(&base);
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
baseSink,
|
||||
err);
|
||||
}
|
||||
|
||||
/**
|
||||
* First, we need to first get the maximization
|
||||
* from AddLikelySubtags.
|
||||
**/
|
||||
{
|
||||
icu::CharStringByteSink maxSink(&maximizedTagBuffer);
|
||||
successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
|
||||
}
|
||||
}
|
||||
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!successGetMax) {
|
||||
/**
|
||||
* If we got here, return the locale ID parameter unchanged.
|
||||
**/
|
||||
const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
|
||||
sink.Append(localeID, localeIDLength);
|
||||
return;
|
||||
}
|
||||
|
||||
// In the following, the lang, script, region are referring to those in
|
||||
// the maximizedTagBuffer, not the one in the localeID.
|
||||
langLength = sizeof(lang);
|
||||
scriptLength = sizeof(script);
|
||||
regionLength = sizeof(region);
|
||||
parseTagString(
|
||||
maximizedTagBuffer.data(),
|
||||
lang,
|
||||
&langLength,
|
||||
script,
|
||||
&scriptLength,
|
||||
region,
|
||||
®ionLength,
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start first with just the language.
|
||||
**/
|
||||
{
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Next, try the language and region.
|
||||
**/
|
||||
if (regionLength > 0) {
|
||||
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
icu::LSR lsr = likelySubtags->minimizeSubtags(
|
||||
{lang, langLength},
|
||||
{script, scriptLength},
|
||||
{region, regionLength},
|
||||
favorScript,
|
||||
*err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
nullptr,
|
||||
0,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
const char* language = lsr.language;
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finally, try the language and script. This is our last chance,
|
||||
* since trying with all three subtags would only yield the
|
||||
* maximal version that we already have.
|
||||
**/
|
||||
if (scriptLength > 0) {
|
||||
icu::CharString tagBuffer;
|
||||
{
|
||||
icu::CharStringByteSink tagSink(&tagBuffer);
|
||||
createLikelySubtagsString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
0,
|
||||
tagSink,
|
||||
err);
|
||||
}
|
||||
|
||||
createTagStringWithAlternates(
|
||||
language,
|
||||
(int32_t)uprv_strlen(language),
|
||||
lsr.script,
|
||||
(int32_t)uprv_strlen(lsr.script),
|
||||
lsr.region,
|
||||
(int32_t)uprv_strlen(lsr.region),
|
||||
trailing,
|
||||
trailingLength,
|
||||
nullptr,
|
||||
sink,
|
||||
err);
|
||||
if(U_FAILURE(*err)) {
|
||||
goto error;
|
||||
}
|
||||
else if (!tagBuffer.isEmpty() &&
|
||||
uprv_strnicmp(
|
||||
maximizedTagBuffer.data(),
|
||||
tagBuffer.data(),
|
||||
tagBuffer.length()) == 0) {
|
||||
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
nullptr,
|
||||
0,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
/**
|
||||
* If we got here, return the max + trail.
|
||||
**/
|
||||
createTagString(
|
||||
lang,
|
||||
langLength,
|
||||
script,
|
||||
scriptLength,
|
||||
region,
|
||||
regionLength,
|
||||
trailing,
|
||||
trailingLength,
|
||||
sink,
|
||||
err);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1181,31 +625,6 @@ error:
|
|||
}
|
||||
}
|
||||
|
||||
static int32_t
|
||||
do_canonicalize(const char* localeID,
|
||||
char* buffer,
|
||||
int32_t bufferCapacity,
|
||||
UErrorCode* err)
|
||||
{
|
||||
int32_t canonicalizedSize = uloc_canonicalize(
|
||||
localeID,
|
||||
buffer,
|
||||
bufferCapacity,
|
||||
err);
|
||||
|
||||
if (*err == U_STRING_NOT_TERMINATED_WARNING ||
|
||||
*err == U_BUFFER_OVERFLOW_ERROR) {
|
||||
return canonicalizedSize;
|
||||
}
|
||||
else if (U_FAILURE(*err)) {
|
||||
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
return canonicalizedSize;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_addLikelySubtags(const char* localeID,
|
||||
char* maximizedLocaleID,
|
||||
|
@ -1239,14 +658,13 @@ static UBool
|
|||
_ulocimp_addLikelySubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* status) {
|
||||
PreflightingLocaleIDBuffer localeBuffer;
|
||||
do {
|
||||
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
|
||||
localeBuffer.getCapacity(), status);
|
||||
} while (localeBuffer.needToTryAgain(status));
|
||||
|
||||
icu::CharString localeBuffer;
|
||||
{
|
||||
icu::CharStringByteSink localeSink(&localeBuffer);
|
||||
ulocimp_canonicalize(localeID, localeSink, status);
|
||||
}
|
||||
if (U_SUCCESS(*status)) {
|
||||
return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
|
||||
return _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -1271,7 +689,7 @@ uloc_minimizeSubtags(const char* localeID,
|
|||
icu::CheckedArrayByteSink sink(
|
||||
minimizedLocaleID, minimizedLocaleIDCapacity);
|
||||
|
||||
ulocimp_minimizeSubtags(localeID, sink, status);
|
||||
ulocimp_minimizeSubtags(localeID, sink, false, status);
|
||||
int32_t reslen = sink.NumberOfBytesAppended();
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -1291,14 +709,14 @@ uloc_minimizeSubtags(const char* localeID,
|
|||
U_CAPI void U_EXPORT2
|
||||
ulocimp_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* status) {
|
||||
PreflightingLocaleIDBuffer localeBuffer;
|
||||
do {
|
||||
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
|
||||
localeBuffer.getCapacity(), status);
|
||||
} while (localeBuffer.needToTryAgain(status));
|
||||
|
||||
_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
|
||||
icu::CharString localeBuffer;
|
||||
{
|
||||
icu::CharStringByteSink localeSink(&localeBuffer);
|
||||
ulocimp_canonicalize(localeID, localeSink, status);
|
||||
}
|
||||
_uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
|
||||
}
|
||||
|
||||
// Pairs of (language subtag, + or -) for finding out fast if common languages
|
||||
|
@ -1374,16 +792,26 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
|
|||
UErrorCode rgStatus = U_ZERO_ERROR;
|
||||
|
||||
// First check for rg keyword value
|
||||
int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
|
||||
if (U_FAILURE(rgStatus) || rgLen != 6) {
|
||||
icu::CharString rg;
|
||||
{
|
||||
icu::CharStringByteSink sink(&rg);
|
||||
ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus);
|
||||
}
|
||||
int32_t rgLen = rg.length();
|
||||
if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
|
||||
rgLen = 0;
|
||||
} else {
|
||||
// rgBuf guaranteed to be zero terminated here, with text len 6
|
||||
char *rgPtr = rgBuf;
|
||||
for (; *rgPtr!= 0; rgPtr++) {
|
||||
*rgPtr = uprv_toupper(*rgPtr);
|
||||
// chop off the subdivision code (which will generally be "zzzz" anyway)
|
||||
const char* const data = rg.data();
|
||||
if (uprv_isASCIILetter(data[0])) {
|
||||
rgLen = 2;
|
||||
rgBuf[0] = uprv_toupper(data[0]);
|
||||
rgBuf[1] = uprv_toupper(data[1]);
|
||||
} else {
|
||||
// assume three-digit region code
|
||||
rgLen = 3;
|
||||
uprv_memcpy(rgBuf, data, rgLen);
|
||||
}
|
||||
rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
|
||||
}
|
||||
|
||||
if (rgLen == 0) {
|
||||
|
|
419
thirdparty/icu4c/common/loclikelysubtags.cpp
vendored
419
thirdparty/icu4c/common/loclikelysubtags.cpp
vendored
|
@ -11,6 +11,7 @@
|
|||
#include "unicode/locid.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "charstr.h"
|
||||
#include "cstring.h"
|
||||
#include "loclikelysubtags.h"
|
||||
|
@ -23,6 +24,7 @@
|
|||
#include "uniquecharstr.h"
|
||||
#include "uresdata.h"
|
||||
#include "uresimp.h"
|
||||
#include "uvector.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -81,11 +83,18 @@ struct XLikelySubtagsData {
|
|||
// Read all strings in the resource bundle and convert them to invariant char *.
|
||||
LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes;
|
||||
int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0;
|
||||
ResourceArray m49Array;
|
||||
if (likelyTable.findValue("m49", value)) {
|
||||
m49Array = value.getArray(errorCode);
|
||||
} else {
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return;
|
||||
}
|
||||
if (!readStrings(likelyTable, "languageAliases", value,
|
||||
languageIndexes, languagesLength, errorCode) ||
|
||||
!readStrings(likelyTable, "regionAliases", value,
|
||||
regionIndexes, regionsLength, errorCode) ||
|
||||
!readStrings(likelyTable, "lsrs", value,
|
||||
!readLSREncodedStrings(likelyTable, "lsrnum", value, m49Array,
|
||||
lsrSubtagIndexes,lsrSubtagsLength, errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -136,7 +145,7 @@ struct XLikelySubtagsData {
|
|||
|
||||
if (!readStrings(matchTable, "partitions", value,
|
||||
partitionIndexes, partitionsLength, errorCode) ||
|
||||
!readStrings(matchTable, "paradigms", value,
|
||||
!readLSREncodedStrings(matchTable, "paradigmnum", value, m49Array,
|
||||
paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
@ -233,10 +242,96 @@ private:
|
|||
return false;
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
stringArray.getValue(i, value); // returns true because i < length
|
||||
rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode);
|
||||
if (stringArray.getValue(i, value)) { // returns true because i < length
|
||||
int32_t strLength = 0;
|
||||
rawIndexes[i] = strings.add(value.getString(strLength, errorCode), errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
UnicodeString toLanguage(int encoded) {
|
||||
if (encoded == 0) {
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
if (encoded == 1) {
|
||||
return UNICODE_STRING_SIMPLE("skip");
|
||||
}
|
||||
encoded &= 0x00ffffff;
|
||||
encoded %= 27*27*27;
|
||||
char lang[3];
|
||||
lang[0] = 'a' + ((encoded % 27) - 1);
|
||||
lang[1] = 'a' + (((encoded / 27 ) % 27) - 1);
|
||||
if (encoded / (27 * 27) == 0) {
|
||||
return UnicodeString(lang, 2, US_INV);
|
||||
}
|
||||
lang[2] = 'a' + ((encoded / (27 * 27)) - 1);
|
||||
return UnicodeString(lang, 3, US_INV);
|
||||
}
|
||||
UnicodeString toScript(int encoded) {
|
||||
if (encoded == 0) {
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
if (encoded == 1) {
|
||||
return UNICODE_STRING_SIMPLE("script");
|
||||
}
|
||||
encoded = (encoded >> 24) & 0x000000ff;
|
||||
const char* script = uscript_getShortName(static_cast<UScriptCode>(encoded));
|
||||
if (script == nullptr) {
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
U_ASSERT(uprv_strlen(script) == 4);
|
||||
return UnicodeString(script, 4, US_INV);
|
||||
}
|
||||
UnicodeString m49IndexToCode(const ResourceArray &m49Array, ResourceValue &value, int index, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
if (m49Array.getValue(index, value)) {
|
||||
return value.getUnicodeString(errorCode);
|
||||
}
|
||||
// "m49" does not include the index.
|
||||
errorCode = U_MISSING_RESOURCE_ERROR;
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
|
||||
UnicodeString toRegion(const ResourceArray& m49Array, ResourceValue &value, int encoded, UErrorCode &errorCode) {
|
||||
if (encoded == 0 || encoded == 1) {
|
||||
return UNICODE_STRING_SIMPLE("");
|
||||
}
|
||||
encoded &= 0x00ffffff;
|
||||
encoded /= 27 * 27 * 27;
|
||||
encoded %= 27 * 27;
|
||||
if (encoded < 27) {
|
||||
// Selected M49 code index, find the code from "m49" resource.
|
||||
return m49IndexToCode(m49Array, value, encoded, errorCode);
|
||||
}
|
||||
char region[2];
|
||||
region[0] = 'A' + ((encoded % 27) - 1);
|
||||
region[1] = 'A' + (((encoded / 27) % 27) - 1);
|
||||
return UnicodeString(region, 2, US_INV);
|
||||
}
|
||||
|
||||
bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
|
||||
LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
|
||||
if (table.findValue(key, value)) {
|
||||
const int32_t* vectors = value.getIntVector(length, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
if (length == 0) { return true; }
|
||||
int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length * 3);
|
||||
if (rawIndexes == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
rawIndexes[i*3] = strings.addByValue(toLanguage(vectors[i]), errorCode);
|
||||
rawIndexes[i*3+1] = strings.addByValue(toScript(vectors[i]), errorCode);
|
||||
rawIndexes[i*3+2] = strings.addByValue(
|
||||
toRegion(m49Array, value, vectors[i], errorCode), errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
length *= 3;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -245,15 +340,52 @@ private:
|
|||
namespace {
|
||||
|
||||
XLikelySubtags *gLikelySubtags = nullptr;
|
||||
UVector *gMacroregions = nullptr;
|
||||
UInitOnce gInitOnce {};
|
||||
|
||||
UBool U_CALLCONV cleanup() {
|
||||
delete gLikelySubtags;
|
||||
gLikelySubtags = nullptr;
|
||||
delete gMacroregions;
|
||||
gMacroregions = nullptr;
|
||||
gInitOnce.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
static const char16_t RANGE_MARKER = 0x7E; /* '~' */
|
||||
UVector* loadMacroregions(UErrorCode &status) {
|
||||
LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
|
||||
|
||||
LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
|
||||
LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
|
||||
LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
|
||||
LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
|
||||
UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
|
||||
int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
|
||||
char16_t buf[6];
|
||||
regionName.extract(buf,6,status);
|
||||
if ( rangeMarkerLocation > 0 ) {
|
||||
char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
|
||||
buf[rangeMarkerLocation] = 0;
|
||||
while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
|
||||
LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
|
||||
newMacroRegions->adoptElement(newRegion.orphan(),status);
|
||||
buf[rangeMarkerLocation-1]++;
|
||||
}
|
||||
} else {
|
||||
LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
|
||||
newMacroRegions->adoptElement(newRegion.orphan(),status);
|
||||
}
|
||||
}
|
||||
return newMacroRegions.orphan();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
|
||||
|
@ -263,10 +395,14 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
|
|||
data.load(errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
gLikelySubtags = new XLikelySubtags(data);
|
||||
if (gLikelySubtags == nullptr) {
|
||||
gMacroregions = loadMacroregions(errorCode);
|
||||
if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
|
||||
delete gLikelySubtags;
|
||||
delete gMacroregions;
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
|
||||
}
|
||||
|
||||
|
@ -317,15 +453,32 @@ XLikelySubtags::~XLikelySubtags() {
|
|||
delete[] lsrs;
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
if (locale.isBogus()) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return LSR("", "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag... which CLDR changes to
|
||||
// und-x-subtag-subtag...
|
||||
return LSR(name, "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), errorCode);
|
||||
LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), returnInputIfUnmatch, errorCode);
|
||||
|
||||
if (uprv_strlen(max.language) == 0 &&
|
||||
uprv_strlen(max.script) == 0 &&
|
||||
uprv_strlen(max.region) == 0) {
|
||||
// No match. ICU API mandate us to
|
||||
// If the provided ULocale instance is already in the maximal form, or
|
||||
// there is no data available available for maximization, it will be
|
||||
// returned.
|
||||
return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -338,7 +491,9 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) {
|
|||
} // namespace
|
||||
|
||||
LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const {
|
||||
const char *variant,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
|
@ -378,64 +533,91 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
|
|||
language = getCanonical(languageAliases, language);
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return maximize(language, script, region);
|
||||
return maximize(language, script, region, returnInputIfUnmatch, errorCode);
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
|
||||
if (uprv_strcmp(language, "und") == 0) {
|
||||
LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
return maximize({language, (int32_t)uprv_strlen(language)},
|
||||
{script, (int32_t)uprv_strlen(script)},
|
||||
{region, (int32_t)uprv_strlen(region)},
|
||||
returnInputIfUnmatch,
|
||||
errorCode);
|
||||
}
|
||||
|
||||
bool XLikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
|
||||
// In Java, we use Region class. In C++, since Region is under i18n,
|
||||
// we read the same data used by Region into gMacroregions avoid dependency
|
||||
// from common to i18n/region.cpp
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
UnicodeString str(UnicodeString::fromUTF8(region));
|
||||
return gMacroregions->contains((void *)&str);
|
||||
}
|
||||
|
||||
LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
if (language.compare("und") == 0) {
|
||||
language = "";
|
||||
}
|
||||
if (uprv_strcmp(script, "Zzzz") == 0) {
|
||||
if (script.compare("Zzzz") == 0) {
|
||||
script = "";
|
||||
}
|
||||
if (uprv_strcmp(region, "ZZ") == 0) {
|
||||
if (region.compare("ZZ") == 0) {
|
||||
region = "";
|
||||
}
|
||||
if (*script != 0 && *region != 0 && *language != 0) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
|
||||
if (!script.empty() && !region.empty() && !language.empty()) {
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode); // already maximized
|
||||
}
|
||||
bool retainLanguage = false;
|
||||
bool retainScript = false;
|
||||
bool retainRegion = false;
|
||||
|
||||
uint32_t retainOldMask = 0;
|
||||
BytesTrie iter(trie);
|
||||
uint64_t state;
|
||||
int32_t value;
|
||||
// Small optimization: Array lookup for first language letter.
|
||||
int32_t c0;
|
||||
if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
|
||||
language[1] != 0 && // language.length() >= 2
|
||||
if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 &&
|
||||
language.length() >= 2 &&
|
||||
(state = trieFirstLetterStates[c0]) != 0) {
|
||||
value = trieNext(iter.resetToState64(state), language, 1);
|
||||
} else {
|
||||
value = trieNext(iter, language, 0);
|
||||
}
|
||||
bool matchLanguage = (value >= 0);
|
||||
bool matchScript = false;
|
||||
if (value >= 0) {
|
||||
if (*language != 0) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
retainLanguage = !language.empty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 4;
|
||||
retainLanguage = true;
|
||||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
|
||||
if (value >= 0 && !script.empty()) {
|
||||
matchScript = true;
|
||||
}
|
||||
if (value > 0) {
|
||||
// Intermediate or final value from just language.
|
||||
if (value == SKIP_SCRIPT) {
|
||||
value = 0;
|
||||
}
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = !script.empty();
|
||||
} else {
|
||||
value = trieNext(iter, script, 0);
|
||||
if (value >= 0) {
|
||||
if (*script != 0) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
retainScript = !script.empty();
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
retainScript = true;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
|
@ -447,19 +629,19 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
}
|
||||
}
|
||||
|
||||
bool matchRegion = false;
|
||||
if (value > 0) {
|
||||
// Final value from just language or language+script.
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
retainRegion = !region.empty();
|
||||
} else {
|
||||
value = trieNext(iter, region, 0);
|
||||
if (value >= 0) {
|
||||
if (*region != 0) {
|
||||
retainOldMask |= 1;
|
||||
if (!region.empty() && !isMacroregion(region, errorCode)) {
|
||||
retainRegion = true;
|
||||
matchRegion = true;
|
||||
}
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
retainRegion = true;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
|
@ -470,28 +652,33 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
}
|
||||
}
|
||||
U_ASSERT(value < lsrsLength);
|
||||
const LSR &result = lsrs[value];
|
||||
const LSR &matched = lsrs[value];
|
||||
|
||||
if (*language == 0) {
|
||||
language = "und";
|
||||
if (returnInputIfUnmatch &&
|
||||
(!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
|
||||
return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching.
|
||||
}
|
||||
if (language.empty()) {
|
||||
language = StringPiece("und");
|
||||
}
|
||||
|
||||
if (retainOldMask == 0) {
|
||||
if (!(retainLanguage || retainScript || retainRegion)) {
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
return LSR(result.language, result.script, result.region, result.flags);
|
||||
return LSR(matched.language, matched.script, matched.region, matched.flags);
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
language = result.language;
|
||||
if (!retainLanguage) {
|
||||
language = matched.language;
|
||||
}
|
||||
if ((retainOldMask & 2) == 0) {
|
||||
script = result.script;
|
||||
if (!retainScript) {
|
||||
script = matched.script;
|
||||
}
|
||||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
if (!retainRegion) {
|
||||
region = matched.region;
|
||||
}
|
||||
int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
return LSR(language, script, region, retainOldMask);
|
||||
return LSR(language, script, region, retainMask, errorCode);
|
||||
}
|
||||
|
||||
int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
|
||||
|
@ -627,57 +814,97 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
|
|||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
|
||||
const char *regionIn, ULocale.Minimize fieldToFavor,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
// LSR result2 = addLikelySubtags(languageIn, "", "");
|
||||
// if result.equals(result2) return result2;
|
||||
// However, we can optimize 2 of the cases:
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
U_ASSERT(value >= 0);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
|
||||
UStringTrieResult result;
|
||||
uint8_t c;
|
||||
if (s.length() == i) {
|
||||
result = iter.next(u'*');
|
||||
} else {
|
||||
c = s.data()[i];
|
||||
for (;;) {
|
||||
c = uprv_invCharToAscii(c);
|
||||
// EBCDIC: If s[i] is not an invariant character,
|
||||
// then c is now 0 and will simply not match anything, which is harmless.
|
||||
if (i+1 != s.length()) {
|
||||
if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
|
||||
return -1;
|
||||
}
|
||||
c = s.data()[++i];
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
U_ASSERT(value > 0);
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
switch (result) {
|
||||
case USTRINGTRIE_NO_MATCH: return -1;
|
||||
case USTRINGTRIE_NO_VALUE: return 0;
|
||||
case USTRINGTRIE_INTERMEDIATE_VALUE:
|
||||
U_ASSERT(iter.getValue() == SKIP_SCRIPT);
|
||||
return SKIP_SCRIPT;
|
||||
case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
|
||||
default: return -1;
|
||||
}
|
||||
|
||||
// The last case is not as easy to optimize.
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
|
||||
StringPiece region,
|
||||
bool favorScript,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR max = maximize(language, script, region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
// If no match, return it.
|
||||
if (uprv_strlen(max.language) == 0 &&
|
||||
uprv_strlen(max.script) == 0 &&
|
||||
uprv_strlen(max.region) == 0) {
|
||||
// No match. ICU API mandate us to
|
||||
// "If this Locale is already in the minimal form, or not valid, or
|
||||
// there is no data available for minimization, the Locale will be
|
||||
// unchanged."
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
|
||||
}
|
||||
// try language
|
||||
LSR test = maximize(max.language, "", "", true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
|
||||
if (!favorScript) {
|
||||
// favor Region
|
||||
// try language and region
|
||||
test = maximize(max.language, "", max.region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
}
|
||||
// try language and script
|
||||
test = maximize(max.language, max.script, "", true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
if (favorScript) {
|
||||
// try language and region
|
||||
test = maximize(max.language, "", max.region, true, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return max;
|
||||
}
|
||||
if (test.isEquivalentTo(max)) {
|
||||
return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
}
|
||||
return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
28
thirdparty/icu4c/common/loclikelysubtags.h
vendored
28
thirdparty/icu4c/common/loclikelysubtags.h
vendored
|
@ -11,6 +11,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/bytestrie.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "charstrmap.h"
|
||||
|
@ -47,7 +48,9 @@ public:
|
|||
static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
|
||||
|
||||
// VisibleForTesting
|
||||
LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
|
||||
LSR makeMaximizedLsrFrom(const Locale &locale,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Tests whether lsr is "more likely" than other.
|
||||
|
@ -61,13 +64,9 @@ public:
|
|||
*/
|
||||
int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;
|
||||
|
||||
// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
|
||||
// in loclikely.cpp to this new code, including activating this
|
||||
// minimizeSubtags() function. The LocaleMatcher does not minimize.
|
||||
#if 0
|
||||
LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
|
||||
ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
|
||||
#endif
|
||||
LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool favorScript,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
// visible for LocaleDistance
|
||||
const LocaleDistanceData &getDistanceData() const { return distanceData; }
|
||||
|
@ -80,16 +79,25 @@ private:
|
|||
static void initLikelySubtags(UErrorCode &errorCode);
|
||||
|
||||
LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
|
||||
const char *variant, UErrorCode &errorCode) const;
|
||||
const char *variant,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
LSR maximize(const char *language, const char *script, const char *region) const;
|
||||
LSR maximize(const char *language, const char *script, const char *region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
LSR maximize(StringPiece language, StringPiece script, StringPiece region,
|
||||
bool returnInputIfUnmatch,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
int32_t getLikelyIndex(const char *language, const char *script) const;
|
||||
bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const;
|
||||
|
||||
static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
|
||||
static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i);
|
||||
|
||||
UResourceBundle *langInfoBundle;
|
||||
// We could store the strings by value, except that if there were few enough strings,
|
||||
|
|
20
thirdparty/icu4c/common/locmap.cpp
vendored
20
thirdparty/icu4c/common/locmap.cpp
vendored
|
@ -1170,7 +1170,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
|
|||
// conversion functionality when available.
|
||||
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
|
||||
int32_t len;
|
||||
char baseName[ULOC_FULLNAME_CAPACITY] = {};
|
||||
icu::CharString baseName;
|
||||
const char * mylocaleID = localeID;
|
||||
|
||||
// Check any for keywords.
|
||||
|
@ -1189,19 +1189,23 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
|
|||
else
|
||||
{
|
||||
// If the locale ID contains keywords other than collation, just use the base name.
|
||||
len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
|
||||
|
||||
if (U_SUCCESS(*status) && len > 0)
|
||||
{
|
||||
baseName[len] = 0;
|
||||
mylocaleID = baseName;
|
||||
icu::CharStringByteSink sink(&baseName);
|
||||
ulocimp_getBaseName(localeID, sink, status);
|
||||
}
|
||||
if (U_SUCCESS(*status) && !baseName.isEmpty())
|
||||
{
|
||||
mylocaleID = baseName.data();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
|
||||
// this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
|
||||
(void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), false, status);
|
||||
icu::CharString asciiBCP47Tag;
|
||||
{
|
||||
icu::CharStringByteSink sink(&asciiBCP47Tag);
|
||||
ulocimp_toLanguageTag(mylocaleID, sink, false, status);
|
||||
}
|
||||
|
||||
if (U_SUCCESS(*status))
|
||||
{
|
||||
|
|
14
thirdparty/icu4c/common/locresdata.cpp
vendored
14
thirdparty/icu4c/common/locresdata.cpp
vendored
|
@ -24,6 +24,8 @@
|
|||
#include "unicode/putil.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "charstr.h"
|
||||
#include "cstring.h"
|
||||
#include "ulocimp.h"
|
||||
#include "uresimp.h"
|
||||
|
@ -156,16 +158,18 @@ _uloc_getOrientationHelper(const char* localeId,
|
|||
ULayoutType result = ULOC_LAYOUT_UNKNOWN;
|
||||
|
||||
if (!U_FAILURE(*status)) {
|
||||
int32_t length = 0;
|
||||
char localeBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
|
||||
uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
|
||||
icu::CharString localeBuffer;
|
||||
{
|
||||
icu::CharStringByteSink sink(&localeBuffer);
|
||||
ulocimp_canonicalize(localeId, sink, status);
|
||||
}
|
||||
|
||||
if (!U_FAILURE(*status)) {
|
||||
int32_t length = 0;
|
||||
const char16_t* const value =
|
||||
uloc_getTableStringWithFallback(
|
||||
nullptr,
|
||||
localeBuffer,
|
||||
localeBuffer.data(),
|
||||
"layout",
|
||||
nullptr,
|
||||
key,
|
||||
|
|
20
thirdparty/icu4c/common/lsr.cpp
vendored
20
thirdparty/icu4c/common/lsr.cpp
vendored
|
@ -31,6 +31,26 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t
|
|||
}
|
||||
}
|
||||
|
||||
LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
|
||||
UErrorCode &errorCode) :
|
||||
language(nullptr), script(nullptr), region(nullptr),
|
||||
regionIndex(indexForRegion(r.data())), flags(f) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
CharString data;
|
||||
data.append(lang, errorCode).append('\0', errorCode);
|
||||
int32_t scriptOffset = data.length();
|
||||
data.append(scr, errorCode).append('\0', errorCode);
|
||||
int32_t regionOffset = data.length();
|
||||
data.append(r, errorCode);
|
||||
owned = data.cloneData(errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
language = owned;
|
||||
script = owned + scriptOffset;
|
||||
region = owned + regionOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LSR::LSR(LSR &&other) noexcept :
|
||||
language(other.language), script(other.script), region(other.region), owned(other.owned),
|
||||
regionIndex(other.regionIndex), flags(other.flags),
|
||||
|
|
3
thirdparty/icu4c/common/lsr.h
vendored
3
thirdparty/icu4c/common/lsr.h
vendored
|
@ -7,6 +7,7 @@
|
|||
#ifndef __LSR_H__
|
||||
#define __LSR_H__
|
||||
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "cstring.h"
|
||||
|
@ -45,6 +46,8 @@ struct LSR final : public UMemory {
|
|||
*/
|
||||
LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
|
||||
UErrorCode &errorCode);
|
||||
LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
|
||||
UErrorCode &errorCode);
|
||||
LSR(LSR &&other) noexcept;
|
||||
LSR(const LSR &other) = delete;
|
||||
inline ~LSR() {
|
||||
|
|
2
thirdparty/icu4c/common/norm2_nfc_data.h
vendored
2
thirdparty/icu4c/common/norm2_nfc_data.h
vendored
|
@ -10,7 +10,7 @@
|
|||
#ifdef INCLUDED_FROM_NORMALIZER2_CPP
|
||||
|
||||
static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0};
|
||||
static const UVersionInfo norm2_nfc_data_dataVersion={0xf,0,0,0};
|
||||
static const UVersionInfo norm2_nfc_data_dataVersion={0xf,1,0,0};
|
||||
|
||||
static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
|
||||
0x50,0x4cb8,0x8920,0x8a20,0x8a20,0x8a20,0x8a20,0x8a20,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c,
|
||||
|
|
1
thirdparty/icu4c/common/norm2allmodes.h
vendored
1
thirdparty/icu4c/common/norm2allmodes.h
vendored
|
@ -391,6 +391,7 @@ struct Norm2AllModes : public UMemory {
|
|||
static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
|
||||
static const Norm2AllModes *getNFKC_SCFInstance(UErrorCode &errorCode);
|
||||
|
||||
Normalizer2Impl *impl;
|
||||
ComposeNormalizer2 comp;
|
||||
|
|
3
thirdparty/icu4c/common/normalizer2impl.h
vendored
3
thirdparty/icu4c/common/normalizer2impl.h
vendored
|
@ -789,7 +789,8 @@ unorm_getFCD16(UChar32 c);
|
|||
*
|
||||
* Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms.
|
||||
* ICU ships with data files for standard Unicode Normalization Forms
|
||||
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm).
|
||||
* NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm),
|
||||
* NFKC_Casefold (nfkc_cf.nrm) and NFKC_Simple_Casefold (nfkc_scf.nrm).
|
||||
* Custom (application-specific) data can be built into additional .nrm files
|
||||
* with the gennorm2 build tool.
|
||||
* ICU ships with one such file, uts46.nrm, for the implementation of UTS #46.
|
||||
|
|
2008
thirdparty/icu4c/common/propname_data.h
vendored
2008
thirdparty/icu4c/common/propname_data.h
vendored
File diff suppressed because it is too large
Load diff
15
thirdparty/icu4c/common/putil.cpp
vendored
15
thirdparty/icu4c/common/putil.cpp
vendored
|
@ -1175,6 +1175,21 @@ uprv_tzname(int n)
|
|||
if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
|
||||
int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
|
||||
const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
|
||||
// MacOS14 has the realpath as something like
|
||||
// /usr/share/zoneinfo.default/Australia/Melbourne
|
||||
// which will not have "/zoneinfo/" in the path.
|
||||
// Therefore if we fail, we fall back to read the link which is
|
||||
// /var/db/timezone/zoneinfo/Australia/Melbourne
|
||||
// We also fall back to reading the link if the realpath leads to something like
|
||||
// /usr/share/zoneinfo/posixrules
|
||||
if (tzZoneInfoTailPtr == nullptr ||
|
||||
uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
|
||||
ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
|
||||
if (size > 0) {
|
||||
gTimeZoneBuffer[size] = 0;
|
||||
tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
|
||||
}
|
||||
}
|
||||
if (tzZoneInfoTailPtr != nullptr) {
|
||||
tzZoneInfoTailPtr += tzZoneInfoTailLen;
|
||||
skipZoneIDPrefix(&tzZoneInfoTailPtr);
|
||||
|
|
51
thirdparty/icu4c/common/rbbi.cpp
vendored
51
thirdparty/icu4c/common/rbbi.cpp
vendored
|
@ -1125,6 +1125,7 @@ static icu::UStack *gLanguageBreakFactories = nullptr;
|
|||
static const icu::UnicodeString *gEmptyString = nullptr;
|
||||
static icu::UInitOnce gLanguageBreakFactoriesInitOnce {};
|
||||
static icu::UInitOnce gRBBIInitOnce {};
|
||||
static icu::ICULanguageBreakFactory *gICULanguageBreakFactory = nullptr;
|
||||
|
||||
/**
|
||||
* Release all static memory held by breakiterator.
|
||||
|
@ -1153,37 +1154,41 @@ static void U_CALLCONV rbbiInit() {
|
|||
ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
|
||||
}
|
||||
|
||||
static void U_CALLCONV initLanguageFactories() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
static void U_CALLCONV initLanguageFactories(UErrorCode& status) {
|
||||
U_ASSERT(gLanguageBreakFactories == nullptr);
|
||||
gLanguageBreakFactories = new UStack(_deleteFactory, nullptr, status);
|
||||
if (gLanguageBreakFactories != nullptr && U_SUCCESS(status)) {
|
||||
ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status);
|
||||
gLanguageBreakFactories->push(builtIn, status);
|
||||
LocalPointer<ICULanguageBreakFactory> factory(new ICULanguageBreakFactory(status), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
gICULanguageBreakFactory = factory.orphan();
|
||||
gLanguageBreakFactories->push(gICULanguageBreakFactory, status);
|
||||
#ifdef U_LOCAL_SERVICE_HOOK
|
||||
LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
|
||||
if (extra != nullptr) {
|
||||
gLanguageBreakFactories->push(extra, status);
|
||||
}
|
||||
LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status);
|
||||
if (extra != nullptr) {
|
||||
gLanguageBreakFactories->push(extra, status);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup);
|
||||
}
|
||||
|
||||
void ensureLanguageFactories(UErrorCode& status) {
|
||||
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories, status);
|
||||
}
|
||||
|
||||
static const LanguageBreakEngine*
|
||||
getLanguageBreakEngineFromFactory(UChar32 c)
|
||||
getLanguageBreakEngineFromFactory(UChar32 c, const char* locale)
|
||||
{
|
||||
umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories);
|
||||
if (gLanguageBreakFactories == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ensureLanguageFactories(status);
|
||||
if (U_FAILURE(status)) return nullptr;
|
||||
|
||||
int32_t i = gLanguageBreakFactories->size();
|
||||
const LanguageBreakEngine *lbe = nullptr;
|
||||
while (--i >= 0) {
|
||||
LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i));
|
||||
lbe = factory->getEngineFor(c);
|
||||
lbe = factory->getEngineFor(c, locale);
|
||||
if (lbe != nullptr) {
|
||||
break;
|
||||
}
|
||||
|
@ -1199,7 +1204,7 @@ getLanguageBreakEngineFromFactory(UChar32 c)
|
|||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
const LanguageBreakEngine *
|
||||
RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
||||
RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
|
||||
const LanguageBreakEngine *lbe = nullptr;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -1215,14 +1220,14 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||
int32_t i = fLanguageBreakEngines->size();
|
||||
while (--i >= 0) {
|
||||
lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i));
|
||||
if (lbe->handles(c)) {
|
||||
if (lbe->handles(c, locale)) {
|
||||
return lbe;
|
||||
}
|
||||
}
|
||||
|
||||
// No existing dictionary took the character. See if a factory wants to
|
||||
// give us a new LanguageBreakEngine for this character.
|
||||
lbe = getLanguageBreakEngineFromFactory(c);
|
||||
lbe = getLanguageBreakEngineFromFactory(c, locale);
|
||||
|
||||
// If we got one, use it and push it on our stack.
|
||||
if (lbe != nullptr) {
|
||||
|
@ -1259,6 +1264,18 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
|
|||
return fUnhandledBreakEngine;
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
void U_EXPORT2 RuleBasedBreakIterator::registerExternalBreakEngine(
|
||||
ExternalBreakEngine* toAdopt, UErrorCode& status) {
|
||||
LocalPointer<ExternalBreakEngine> engine(toAdopt, status);
|
||||
if (U_FAILURE(status)) return;
|
||||
ensureLanguageFactories(status);
|
||||
if (U_FAILURE(status)) return;
|
||||
gICULanguageBreakFactory->addExternalEngine(engine.orphan(), status);
|
||||
}
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
|
||||
void RuleBasedBreakIterator::dumpCache() {
|
||||
fBreakCache->dumpCache();
|
||||
}
|
||||
|
|
5
thirdparty/icu4c/common/rbbi_cache.cpp
vendored
5
thirdparty/icu4c/common/rbbi_cache.cpp
vendored
|
@ -158,12 +158,13 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
|
|||
|
||||
// We now have a dictionary character. Get the appropriate language object
|
||||
// to deal with it.
|
||||
const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
|
||||
const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(
|
||||
c, fBI->getLocaleID(ULOC_REQUESTED_LOCALE, status));
|
||||
|
||||
// Ask the language object if there are any breaks. It will add them to the cache and
|
||||
// leave the text pointer on the other side of its range, ready to search for the next one.
|
||||
if (lbe != nullptr) {
|
||||
foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
|
||||
foundBreakCount += lbe->findBreaks(text, current, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
|
||||
}
|
||||
|
||||
// Reload the loop variables for the next go-round
|
||||
|
|
1
thirdparty/icu4c/common/rbbirb.cpp
vendored
1
thirdparty/icu4c/common/rbbirb.cpp
vendored
|
@ -66,7 +66,6 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
|
|||
fForwardTable = nullptr;
|
||||
fRuleStatusVals = nullptr;
|
||||
fChainRules = false;
|
||||
fLBCMNoChain = false;
|
||||
fLookAheadHardBreak = false;
|
||||
fUSetNodes = nullptr;
|
||||
fRuleStatusVals = nullptr;
|
||||
|
|
3
thirdparty/icu4c/common/rbbirb.h
vendored
3
thirdparty/icu4c/common/rbbirb.h
vendored
|
@ -159,9 +159,6 @@ public:
|
|||
UBool fChainRules; // True for chained Unicode TR style rules.
|
||||
// False for traditional regexp rules.
|
||||
|
||||
UBool fLBCMNoChain; // True: suppress chaining of rules on
|
||||
// chars with LineBreak property == CM.
|
||||
|
||||
UBool fLookAheadHardBreak; // True: Look ahead matches cause an
|
||||
// immediate break, no continuing for the
|
||||
// longest match.
|
||||
|
|
2
thirdparty/icu4c/common/rbbiscan.cpp
vendored
2
thirdparty/icu4c/common/rbbiscan.cpp
vendored
|
@ -547,8 +547,6 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
|||
UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart);
|
||||
if (opt == UNICODE_STRING("chain", 5)) {
|
||||
fRB->fChainRules = true;
|
||||
} else if (opt == UNICODE_STRING("LBCMNoChain", 11)) {
|
||||
fRB->fLBCMNoChain = true;
|
||||
} else if (opt == UNICODE_STRING("forward", 7)) {
|
||||
fRB->fDefaultTree = &fRB->fForwardTree;
|
||||
} else if (opt == UNICODE_STRING("reverse", 7)) {
|
||||
|
|
15
thirdparty/icu4c/common/rbbitblb.cpp
vendored
15
thirdparty/icu4c/common/rbbitblb.cpp
vendored
|
@ -458,21 +458,6 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNod
|
|||
|
||||
// We've got a node that can end a match.
|
||||
|
||||
// !!LBCMNoChain implementation: If this node's val correspond to
|
||||
// the Line Break $CM char class, don't chain from it.
|
||||
// TODO: Remove this. !!LBCMNoChain is deprecated, and is not used
|
||||
// by any of the standard ICU rules.
|
||||
if (fRB->fLBCMNoChain) {
|
||||
UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal);
|
||||
if (c != -1) {
|
||||
// c == -1 occurs with sets containing only the {eof} marker string.
|
||||
ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
|
||||
if (cLBProp == U_LB_COMBINING_MARK) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now iterate over the nodes that can start a match, looking for ones
|
||||
// with the same char class as our ending node.
|
||||
RBBINode *startNode;
|
||||
|
|
127
thirdparty/icu4c/common/ubidi_props_data.h
vendored
127
thirdparty/icu4c/common/ubidi_props_data.h
vendored
|
@ -9,11 +9,11 @@
|
|||
|
||||
#ifdef INCLUDED_FROM_UBIDI_PROPS_C
|
||||
|
||||
static const UVersionInfo ubidi_props_dataVersion={0xf,0,0,0};
|
||||
static const UVersionInfo ubidi_props_dataVersion={0xf,1,0,0};
|
||||
|
||||
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6bc0,0x65d0,0x28,0x620,0x8cc,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6702b6};
|
||||
static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x6ba0,0x65b0,0x28,0x620,0x8cc,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6702b6};
|
||||
|
||||
static const uint16_t ubidi_props_trieIndex[13024]={
|
||||
static const uint16_t ubidi_props_trieIndex[13008]={
|
||||
0x387,0x38f,0x397,0x39f,0x3b7,0x3bf,0x3c7,0x3cf,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af,
|
||||
0x3a7,0x3af,0x3a7,0x3af,0x3d5,0x3dd,0x3e5,0x3ed,0x3f5,0x3fd,0x3f9,0x401,0x409,0x411,0x40c,0x414,
|
||||
0x3a7,0x3af,0x3a7,0x3af,0x41c,0x424,0x3a7,0x3af,0x3a7,0x3af,0x3a7,0x3af,0x42a,0x432,0x43a,0x442,
|
||||
|
@ -38,8 +38,8 @@ static const uint16_t ubidi_props_trieIndex[13024]={
|
|||
0x7e8,0x7f0,0x7f8,0x7ff,0x806,0x80e,0x812,0x7e0,0x67c,0x67c,0x67c,0x81a,0x820,0x67c,0x67c,0x826,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x82e,0x3a7,0x3a7,0x3a7,0x836,0x3a7,0x3a7,0x3a7,0x3f5,
|
||||
0x83e,0x846,0x849,0x3a7,0x851,0x67c,0x67c,0x67f,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x858,0x85e,
|
||||
0x86e,0x866,0x3a7,0x3a7,0x876,0x61f,0x3a7,0x3ce,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x67c,0x835,
|
||||
0x3dc,0x3a7,0x87e,0x886,0x3a7,0x88e,0x896,0x3a7,0x3a7,0x3a7,0x3a7,0x89a,0x3a7,0x3a7,0x674,0x3cd,
|
||||
0x86e,0x866,0x3a7,0x3a7,0x876,0x61f,0x3a7,0x3ce,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x67c,0x87e,
|
||||
0x3dc,0x3a7,0x85e,0x882,0x3a7,0x88a,0x892,0x3a7,0x3a7,0x3a7,0x3a7,0x896,0x3a7,0x3a7,0x674,0x3cd,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
|
@ -96,10 +96,10 @@ static const uint16_t ubidi_props_trieIndex[13024]={
|
|||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x87e,0x67c,0x595,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x8a1,0x3a7,0x3a7,0x8a6,0x8ae,0x3a7,0x3a7,0x5cb,0x67c,0x673,0x3a7,0x3a7,0x8b6,0x3a7,0x3a7,0x3a7,
|
||||
0x8be,0x8c5,0x645,0x8cd,0x3a7,0x3a7,0x5a1,0x8d5,0x3a7,0x8dd,0x8e4,0x3a7,0x501,0x8e9,0x3a7,0x51a,
|
||||
0x3a7,0x8f1,0x8f9,0x51c,0x3a7,0x8fd,0x51b,0x905,0x3a7,0x3a7,0x3a7,0x90b,0x3a7,0x3a7,0x3a7,0x912,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x85e,0x67c,0x595,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x89d,0x3a7,0x3a7,0x8a2,0x8aa,0x3a7,0x3a7,0x5cb,0x67c,0x673,0x3a7,0x3a7,0x8b2,0x3a7,0x3a7,0x3a7,
|
||||
0x8ba,0x8c1,0x645,0x8c9,0x3a7,0x3a7,0x5a1,0x8d1,0x3a7,0x8d9,0x8e0,0x3a7,0x501,0x8e5,0x3a7,0x51a,
|
||||
0x3a7,0x8ed,0x8f5,0x51c,0x3a7,0x8f9,0x51b,0x901,0x3a7,0x3a7,0x3a7,0x907,0x3a7,0x3a7,0x3a7,0x90e,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
|
@ -139,9 +139,9 @@ static const uint16_t ubidi_props_trieIndex[13024]={
|
|||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x926,0x91a,0x91e,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,
|
||||
0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x92e,0x936,0x4a6,0x4a6,0x4a6,0x93b,0x93f,
|
||||
0x947,0x94f,0x953,0x95b,0x4a6,0x4a6,0x4a6,0x95f,0x967,0x397,0x96f,0x977,0x3a7,0x3a7,0x3a7,0x97f,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x922,0x916,0x91a,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,
|
||||
0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x92a,0x932,0x4a6,0x4a6,0x4a6,0x937,0x93b,
|
||||
0x943,0x94b,0x94f,0x957,0x4a6,0x4a6,0x4a6,0x95b,0x963,0x397,0x96b,0x973,0x3a7,0x3a7,0x3a7,0x97b,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xe9c,0xe9c,0xedc,0xf1c,0xe9c,0xe9c,0xe9c,0xe9c,0xe9c,0xe9c,0xf54,0xf94,0xfd4,0xfe4,0x1024,0x1030,
|
||||
|
@ -178,68 +178,68 @@ static const uint16_t ubidi_props_trieIndex[13024]={
|
|||
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd89,
|
||||
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,
|
||||
0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd89,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x987,0x3a7,0x67c,0x67c,0x98f,0x61f,0x3a7,0x514,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x997,0x3a7,0x3a7,0x3a7,0x99e,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x983,0x3a7,0x67c,0x67c,0x98b,0x61f,0x3a7,0x514,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x993,0x3a7,0x3a7,0x3a7,0x99a,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9a6,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x9ae,0x9b2,0x43c,0x43c,0x43c,0x43c,0x9c2,0x9ba,0x43c,0x9ca,0x43c,0x43c,0x9d2,0x9d8,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9e8,0x9e0,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x9f0,0x43c,0x9f8,0x4a6,0xa00,0x43c,0xa08,0xa0f,0xa15,0xa1d,0xa21,0xa29,0x43c,
|
||||
0x51b,0xa31,0xa38,0xa3f,0x41e,0xa47,0x569,0x3a7,0x501,0xa4e,0x3a7,0xa54,0x41e,0xa59,0xa61,0x3a7,
|
||||
0x3a7,0xa66,0x51b,0x3a7,0x3a7,0x3a7,0x836,0xa6e,0x41e,0x5a3,0x57e,0xa75,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0xa31,0xa7d,0x3a7,0x3a7,0xa85,0xa8d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa91,0xa99,0x3a7,
|
||||
0x3a7,0xaa1,0x57e,0xaa9,0x3a7,0xaaf,0x3a7,0x3a7,0x60f,0xab7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xabc,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xac3,0xacb,0x3a7,0x3a7,0x3a7,0xace,0x57e,0xad6,
|
||||
0xada,0xae2,0x3a7,0xae9,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xaf0,0x3a7,0x3a7,0xafe,0xaf8,0x3a7,0x3a7,0x3a7,0xb06,0xb0e,0x3a7,0xb12,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x5a5,0x41e,0x99e,0xb1a,0x3a7,0x3a7,0x3a7,0xb27,0xb22,0x3a7,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9a2,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x9aa,0x9ae,0x43c,0x43c,0x43c,0x43c,0x9be,0x9b6,0x43c,0x9c6,0x43c,0x43c,0x9ce,0x9d4,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x9e4,0x9dc,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x9ec,0x43c,0x9f4,0x4a6,0x9fc,0x43c,0xa04,0xa0b,0xa11,0xa19,0xa1d,0xa25,0x43c,
|
||||
0x51b,0xa2d,0xa34,0xa3b,0x41e,0xa43,0x569,0x3a7,0x501,0xa4a,0x3a7,0xa50,0x41e,0xa55,0xa5d,0x3a7,
|
||||
0x3a7,0xa62,0x51b,0x3a7,0x3a7,0x3a7,0x836,0xa6a,0x41e,0x5a3,0x57e,0xa71,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0xa2d,0xa79,0x3a7,0x3a7,0xa81,0xa89,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa8d,0xa95,0x3a7,
|
||||
0x3a7,0xa9d,0x57e,0xaa5,0x3a7,0xaab,0x3a7,0x3a7,0x60f,0xab3,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xab8,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xabf,0xac7,0x3a7,0x3a7,0x3a7,0xaca,0x57e,0xad2,
|
||||
0xad6,0xade,0x3a7,0xae5,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xaec,0x3a7,0x3a7,0xafa,0xaf4,0x3a7,0x3a7,0x3a7,0xb02,0xb0a,0x3a7,0xb0e,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x5a5,0x41e,0x99a,0xb16,0x3a7,0x3a7,0x3a7,0xb23,0xb1e,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xb2f,0xb37,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb3d,
|
||||
0x3a7,0xb43,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xb2b,0xb33,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb39,
|
||||
0x3a7,0xb3f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0xa55,0x3a7,0xb49,0x3a7,0x3a7,0xb51,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0xa51,0x3a7,0xb45,0x3a7,0x3a7,0xb4d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x535,0xb59,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x535,0xb55,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xb61,0x500,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0xb69,0xb71,0xb77,0x3a7,0xb7d,0x67c,0x67c,0xb85,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x67c,0x67c,0xb8d,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb93,
|
||||
0x3a7,0xb9a,0x3a7,0xb96,0x3a7,0xb9d,0x3a7,0xba5,0xba9,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xbb1,0x3f5,0xbb8,0xbbf,0xbc7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xb5d,0x500,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0xb65,0xb6d,0xb73,0x3a7,0xb79,0x67c,0x67c,0xb81,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x67c,0x67c,0xb89,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xb8f,
|
||||
0x3a7,0xb96,0x3a7,0xb92,0x3a7,0xb99,0x3a7,0xba1,0xba5,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3f5,0xbad,0x3f5,0xbb4,0xbbb,0xbc3,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xbcf,0xbd7,0x3a7,0x3a7,0xa55,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0xb43,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa81,0x3a7,
|
||||
0xbdc,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xbe4,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0xbec,
|
||||
0x43c,0xbf4,0xbf4,0xbfb,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x91e,0x4a6,0x4a6,0x43c,
|
||||
0x43c,0x4a6,0x4a6,0xc03,0x43c,0x43c,0x43c,0x43c,0x43c,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,
|
||||
0xc0b,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x67c,0xc13,0x67c,0x67c,0x67f,0xc18,0xc1c,
|
||||
0x858,0xc24,0x3c9,0x3a7,0xc2a,0x3a7,0xc2f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x783,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xbcb,0xbd3,0x3a7,0x3a7,0xa51,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0xb3f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xa7d,0x3a7,
|
||||
0xbd8,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0xbe0,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0xbe8,
|
||||
0x43c,0xbf0,0xbf0,0xbf7,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,
|
||||
0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x91a,0x4a6,0x4a6,0x43c,
|
||||
0x43c,0x4a6,0x4a6,0xbff,0x43c,0x43c,0x43c,0x43c,0x43c,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,0x4a6,
|
||||
0xc07,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x43c,0x67c,0xc0f,0x67c,0x67c,0x67f,0xc14,0xc18,
|
||||
0x858,0xc20,0x3c9,0x3a7,0xc26,0x3a7,0xc2b,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x783,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,
|
||||
0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0xc37,
|
||||
0x98f,0x67c,0x67c,0x67c,0xc3e,0x67c,0x67c,0xc45,0xc4d,0xc13,0x67c,0xc55,0x67c,0xc5d,0xc62,0x3a7,
|
||||
0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67f,0xc6a,0xc73,0xc77,0xc7f,
|
||||
0xc6f,0x67c,0x67c,0x67c,0x67c,0xc87,0x67c,0x792,0xc8f,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0xc33,
|
||||
0x98b,0x67c,0x67c,0x67c,0xc3a,0x67c,0x67c,0xc41,0xc49,0xc0f,0x67c,0xc51,0x67c,0xc59,0xc5e,0x3a7,
|
||||
0x3a7,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67c,0x67f,0xc66,0xc6f,0xc73,0xc7b,
|
||||
0xc6b,0x67c,0x67c,0x67c,0x67c,0xc83,0x67c,0x792,0xc8b,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc96,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc92,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc96,0xca6,0xc9e,0xc9e,0xc9e,0xca7,0xca7,0xca7,
|
||||
0xca7,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0xcaf,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,
|
||||
0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,
|
||||
0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,
|
||||
0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,
|
||||
0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0xca7,0x386,0x386,0x386,0x12,0x12,0x12,0x12,
|
||||
0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0x3a7,0xc92,0xca2,0xc9a,0xc9a,0xc9a,0xca3,0xca3,0xca3,
|
||||
0xca3,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0x3f5,0xcab,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,
|
||||
0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,
|
||||
0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,
|
||||
0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,
|
||||
0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0xca3,0x386,0x386,0x386,0x12,0x12,0x12,0x12,
|
||||
0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12,
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4,
|
||||
4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2,
|
||||
|
@ -551,15 +551,14 @@ static const uint16_t ubidi_props_trieIndex[13024]={
|
|||
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
|
||||
0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
|
||||
0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0,
|
||||
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,
|
||||
0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,
|
||||
0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,
|
||||
0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,
|
||||
0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
|
||||
0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,
|
||||
0,0xb1,0xb1,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,
|
||||
|
@ -935,13 +934,13 @@ static const UBiDiProps ubidi_props_singleton={
|
|||
ubidi_props_trieIndex+3612,
|
||||
nullptr,
|
||||
3612,
|
||||
9412,
|
||||
9396,
|
||||
0x1a0,
|
||||
0xe9c,
|
||||
0x0,
|
||||
0x0,
|
||||
0x110000,
|
||||
0x32dc,
|
||||
0x32cc,
|
||||
nullptr, 0, false, false, 0, nullptr
|
||||
},
|
||||
{ 2,2,0,0 }
|
||||
|
|
43
thirdparty/icu4c/common/ucase.cpp
vendored
43
thirdparty/icu4c/common/ucase.cpp
vendored
|
@ -317,43 +317,6 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* Add the simple case closure mapping,
|
||||
* except if there is not actually an scf relationship between the two characters.
|
||||
* TODO: Unicode should probably add the corresponding scf mappings.
|
||||
* See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
|
||||
* If & when those scf mappings are added, we should be able to remove all of these exceptions.
|
||||
*/
|
||||
void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) {
|
||||
switch (c) {
|
||||
case 0x0390:
|
||||
if (t == 0x1FD3) { return; }
|
||||
break;
|
||||
case 0x03B0:
|
||||
if (t == 0x1FE3) { return; }
|
||||
break;
|
||||
case 0x1FD3:
|
||||
if (t == 0x0390) { return; }
|
||||
break;
|
||||
case 0x1FE3:
|
||||
if (t == 0x03B0) { return; }
|
||||
break;
|
||||
case 0xFB05:
|
||||
if (t == 0xFB06) { return; }
|
||||
break;
|
||||
case 0xFB06:
|
||||
if (t == 0xFB05) { return; }
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
sa->add(sa->set, t);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
U_CFUNC void U_EXPORT2
|
||||
ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
|
||||
uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
|
||||
|
@ -397,7 +360,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
|
|||
pe=pe0;
|
||||
UChar32 mapping;
|
||||
GET_SLOT_VALUE(excWord, idx, pe, mapping);
|
||||
addOneSimpleCaseClosure(c, mapping, sa);
|
||||
sa->add(sa->set, mapping);
|
||||
}
|
||||
}
|
||||
if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
|
||||
|
@ -405,7 +368,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
|
|||
int32_t delta;
|
||||
GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
|
||||
UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta;
|
||||
addOneSimpleCaseClosure(c, mapping, sa);
|
||||
sa->add(sa->set, mapping);
|
||||
}
|
||||
|
||||
/* get the closure string pointer & length */
|
||||
|
@ -448,7 +411,7 @@ ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
|
|||
for(int32_t idx=0; idx<closureLength;) {
|
||||
UChar32 mapping;
|
||||
U16_NEXT_UNSAFE(closure, idx, mapping);
|
||||
addOneSimpleCaseClosure(c, mapping, sa);
|
||||
sa->add(sa->set, mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
125
thirdparty/icu4c/common/ucase_props_data.h
vendored
125
thirdparty/icu4c/common/ucase_props_data.h
vendored
|
@ -9,9 +9,9 @@
|
|||
|
||||
#ifdef INCLUDED_FROM_UCASE_CPP
|
||||
|
||||
static const UVersionInfo ucase_props_dataVersion={0xf,0,0,0};
|
||||
static const UVersionInfo ucase_props_dataVersion={0xf,1,0,0};
|
||||
|
||||
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x76f2,0x66c8,0x683,0x172,0,0,0,0,0,0,0,0,0,0,3};
|
||||
static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x76ec,0x66c8,0x680,0x172,0,0,0,0,0,0,0,0,0,0,3};
|
||||
|
||||
static const uint16_t ucase_props_trieIndex[13148]={
|
||||
0x355,0x35d,0x365,0x36d,0x37b,0x383,0x38b,0x393,0x39b,0x3a3,0x3aa,0x3b2,0x3ba,0x3c2,0x3ca,0x3d2,
|
||||
|
@ -509,9 +509,9 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0x39b9,0x3a29,0x3a99,0x3b09,0x3b7b,0x3beb,0x3c5b,0x3ccb,0x3d3b,0x3dab,0x3e1b,0x3e8b,0x411,0x411,0x3ef9,0x3f79,
|
||||
0x3fe9,0,0x4069,0x40e9,0xfc12,0xfc12,0xdb12,0xdb12,0x419b,4,0x4209,4,4,4,0x4259,0x42d9,
|
||||
0x4349,0,0x43c9,0x4449,0xd512,0xd512,0xd512,0xd512,0x44fb,4,4,4,0x411,0x411,0x4569,0x4619,
|
||||
0,0,0x46e9,0x4769,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4819,0x48c9,
|
||||
0x4999,0x391,0x4a19,0x4a99,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b49,0x4bc9,
|
||||
0x4c39,0,0x4cb9,0x4d39,0xc012,0xc012,0xc112,0xc112,0x4deb,4,4,0,0,0,0,0,
|
||||
0,0,0x46d9,0x4759,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4809,0x48b9,
|
||||
0x4979,0x391,0x49f9,0x4a79,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b29,0x4ba9,
|
||||
0x4c19,0,0x4c99,0x4d19,0xc012,0xc012,0xc112,0xc112,0x4dcb,4,4,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,
|
||||
0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4,
|
||||
0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0,
|
||||
|
@ -525,8 +525,8 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e5a,0,
|
||||
2,0,0x4e9a,0x4eda,2,2,0,1,2,2,0xe12,2,1,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e3a,0,
|
||||
2,0,0x4e7a,0x4eba,2,2,0,1,2,2,0xe12,2,1,0,0,0,
|
||||
0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1,
|
||||
0x21,0x21,0,0,0,0,0xf211,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,
|
||||
|
@ -541,13 +541,13 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,
|
||||
0x1812,0x1812,0x1812,0x1812,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
|
||||
0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,
|
||||
0xe811,0xe811,0xe811,0xe811,0x92,0xff91,0x4f1a,0x4f3a,0x4f5a,0x4f79,0x4f99,0x92,0xff91,0x92,0xff91,0x92,
|
||||
0xff91,0x4fba,0x4fda,0x4ffa,0x501a,1,0x92,0xff91,1,0x92,0xff91,1,1,1,1,1,
|
||||
0x25,5,0x503a,0x503a,0x92,0xff91,0x92,0xff91,1,0,0,0,0,0,0,0x92,
|
||||
0xe811,0xe811,0xe811,0xe811,0x92,0xff91,0x4efa,0x4f1a,0x4f3a,0x4f59,0x4f79,0x92,0xff91,0x92,0xff91,0x92,
|
||||
0xff91,0x4f9a,0x4fba,0x4fda,0x4ffa,1,0x92,0xff91,1,0x92,0xff91,1,1,1,1,1,
|
||||
0x25,5,0x501a,0x501a,0x92,0xff91,0x92,0xff91,1,0,0,0,0,0,0,0x92,
|
||||
0xff91,0x92,0xff91,0x44,0x44,0x44,0x92,0xff91,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,
|
||||
0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,
|
||||
0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0,0x5059,0,0,0,0,0,0x5059,0,0,
|
||||
0,0,0,0,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,
|
||||
0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,
|
||||
0x5039,0x5039,0x5039,0x5039,0x5039,0x5039,0,0x5039,0,0,0,0,0,0x5039,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
|
||||
|
@ -562,7 +562,7 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x507a,0x50b9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x505a,0x5099,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,4,4,4,0,
|
||||
0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,4,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
|
@ -573,11 +573,11 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
4,4,4,4,4,4,4,4,4,4,4,4,4,4,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,1,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,0xff91,0x50fa,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,0xff91,0x511a,1,0,
|
||||
5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92,0xff91,0x50da,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92,0xff91,0x50fa,1,0,
|
||||
0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x513a,0x515a,0x517a,0x519a,0x513a,1,0x51ba,0x51da,0x51fa,0x521a,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xe812,0x523a,0x525a,0x92,0xff91,0x92,0xff91,0,
|
||||
0x92,0xff91,0x511a,0x513a,0x515a,0x517a,0x511a,1,0x519a,0x51ba,0x51da,0x51fa,0x92,0xff91,0x92,0xff91,
|
||||
0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xe812,0x521a,0x523a,0x92,0xff91,0x92,0xff91,0,
|
||||
0,0,0,0,0x92,0xff91,0,1,0,1,0x92,0xff91,0x92,0xff91,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,5,5,5,0x92,0xff91,0,5,5,1,0,0,0,0,0,
|
||||
|
@ -607,17 +607,17 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
|
||||
0,0,0,4,4,0,0x64,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,0x5279,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,0x5259,1,1,1,1,
|
||||
1,1,1,4,5,5,5,5,1,1,1,1,1,1,1,1,
|
||||
1,5,4,4,0,0,0,0,0x5299,0x52c9,0x52f9,0x5329,0x5359,0x5389,0x53b9,0x53e9,
|
||||
0x5419,0x5449,0x5479,0x54a9,0x54d9,0x5509,0x5539,0x5569,0x5b99,0x5bc9,0x5bf9,0x5c29,0x5c59,0x5c89,0x5cb9,0x5ce9,
|
||||
0x5d19,0x5d49,0x5d79,0x5da9,0x5dd9,0x5e09,0x5e39,0x5e69,0x5e99,0x5ec9,0x5ef9,0x5f29,0x5f59,0x5f89,0x5fb9,0x5fe9,
|
||||
0x6019,0x6049,0x6079,0x60a9,0x60d9,0x6109,0x6139,0x6169,0x5599,0x55c9,0x55f9,0x5629,0x5659,0x5689,0x56b9,0x56e9,
|
||||
0x5719,0x5749,0x5779,0x57a9,0x57d9,0x5809,0x5839,0x5869,0x5899,0x58c9,0x58f9,0x5929,0x5959,0x5989,0x59b9,0x59e9,
|
||||
0x5a19,0x5a49,0x5a79,0x5aa9,0x5ad9,0x5b09,0x5b39,0x5b69,0,0,0,0,0,4,0,0,
|
||||
1,5,4,4,0,0,0,0,0x5279,0x52a9,0x52d9,0x5309,0x5339,0x5369,0x5399,0x53c9,
|
||||
0x53f9,0x5429,0x5459,0x5489,0x54b9,0x54e9,0x5519,0x5549,0x5b79,0x5ba9,0x5bd9,0x5c09,0x5c39,0x5c69,0x5c99,0x5cc9,
|
||||
0x5cf9,0x5d29,0x5d59,0x5d89,0x5db9,0x5de9,0x5e19,0x5e49,0x5e79,0x5ea9,0x5ed9,0x5f09,0x5f39,0x5f69,0x5f99,0x5fc9,
|
||||
0x5ff9,0x6029,0x6059,0x6089,0x60b9,0x60e9,0x6119,0x6149,0x5579,0x55a9,0x55d9,0x5609,0x5639,0x5669,0x5699,0x56c9,
|
||||
0x56f9,0x5729,0x5759,0x5789,0x57b9,0x57e9,0x5819,0x5849,0x5879,0x58a9,0x58d9,0x5909,0x5939,0x5969,0x5999,0x59c9,
|
||||
0x59f9,0x5a29,0x5a59,0x5a89,0x5ab9,0x5ae9,0x5b19,0x5b49,0,0,0,0,0,4,0,0,
|
||||
4,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0x6199,0x6219,0x6299,0x6319,0x63c9,0x6479,0x6519,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0x65b9,0x6639,0x66b9,0x6739,0x67b9,
|
||||
0,0,0,0,0,0,0,0,0x6179,0x61f9,0x6279,0x62f9,0x63a9,0x6459,0x64e9,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0x6589,0x6609,0x6689,0x6709,0x6789,
|
||||
0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,4,
|
||||
4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,4,
|
||||
|
@ -838,7 +838,7 @@ static const uint16_t ucase_props_trieIndex[13148]={
|
|||
0,0,0,0,0,0,0,0,0,0,0,0
|
||||
};
|
||||
|
||||
static const uint16_t ucase_props_exceptions[1667]={
|
||||
static const uint16_t ucase_props_exceptions[1664]={
|
||||
0xc850,0x20,2,0x130,0x131,0x4810,0x20,0x841,0x6b,1,0x212a,0x841,0x73,1,0x17f,0x5c50,
|
||||
0x20,2,0x130,0x131,0x844,0x4b,1,0x212a,0x844,0x53,1,0x17f,0x806,0x3bc,0x39c,0x841,
|
||||
0xe5,1,0x212b,0x8c0,1,0x2220,0x73,0x73,0x53,0x53,0x53,0x73,0x1e9e,0x844,0xc5,1,
|
||||
|
@ -909,41 +909,40 @@ static const uint16_t ucase_props_exceptions[1667]={
|
|||
0x3b7,0x3b9,0x397,0x399,0x880,0x2220,0x3ae,0x3b9,0x389,0x399,0x389,0x345,0x880,0x2220,0x3b7,0x342,
|
||||
0x397,0x342,0x397,0x342,0x880,0x3330,0x3b7,0x342,0x3b9,0x397,0x342,0x399,0x397,0x342,0x345,0xc90,
|
||||
9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x3330,0x3b9,0x308,0x300,0x399,0x308,0x300,0x399,0x308,
|
||||
0x300,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x390,0x880,0x2220,
|
||||
0x3b9,0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308,
|
||||
0x342,0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x8c0,1,0x3330,0x3c5,
|
||||
0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x3b0,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1,
|
||||
0x313,0x880,0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308,
|
||||
0x342,0x3a5,0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9,
|
||||
0x3b9,0x3a9,0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9,
|
||||
0x342,0x3a9,0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9,
|
||||
0x220,0x3c9,0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1,
|
||||
0xc5,0xc10,0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10,
|
||||
0x2a1f,0xc10,0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10,
|
||||
0x8a04,0xc10,0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10,
|
||||
0xa515,0x810,0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806,
|
||||
0x13a2,0x13a2,0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7,
|
||||
0x13a7,0x806,0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac,
|
||||
0x806,0x13ad,0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806,
|
||||
0x13b2,0x13b2,0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7,
|
||||
0x13b7,0x806,0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc,
|
||||
0x806,0x13bd,0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806,
|
||||
0x13c2,0x13c2,0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7,
|
||||
0x13c7,0x806,0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc,
|
||||
0x806,0x13cd,0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806,
|
||||
0x13d2,0x13d2,0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7,
|
||||
0x13d7,0x806,0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc,
|
||||
0x806,0x13dd,0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806,
|
||||
0x13e2,0x13e2,0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7,
|
||||
0x13e7,0x806,0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec,
|
||||
0x806,0x13ed,0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46,
|
||||
0x66,0x880,0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46,
|
||||
0x6c,0x880,0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66,
|
||||
0x6c,0x46,0x46,0x4c,0x46,0x66,0x6c,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,
|
||||
0xfb06,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544,
|
||||
0x546,0x544,0x576,0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544,
|
||||
0x53b,0x544,0x56b,0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544,
|
||||
0x53d,0x544,0x56d
|
||||
0x300,0x882,0x390,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x880,0x2220,0x3b9,
|
||||
0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308,0x342,
|
||||
0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x882,0x3b0,0x3330,0x3c5,0x308,
|
||||
0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1,0x313,0x880,
|
||||
0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308,0x342,0x3a5,
|
||||
0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9,0x3b9,0x3a9,
|
||||
0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9,0x342,0x3a9,
|
||||
0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9,0x220,0x3c9,
|
||||
0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1,0xc5,0xc10,
|
||||
0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10,0x2a1f,0xc10,
|
||||
0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10,0x8a04,0xc10,
|
||||
0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10,0xa515,0x810,
|
||||
0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806,0x13a2,0x13a2,
|
||||
0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7,0x13a7,0x806,
|
||||
0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac,0x806,0x13ad,
|
||||
0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806,0x13b2,0x13b2,
|
||||
0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7,0x13b7,0x806,
|
||||
0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc,0x806,0x13bd,
|
||||
0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806,0x13c2,0x13c2,
|
||||
0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7,0x13c7,0x806,
|
||||
0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc,0x806,0x13cd,
|
||||
0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806,0x13d2,0x13d2,
|
||||
0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7,0x13d7,0x806,
|
||||
0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc,0x806,0x13dd,
|
||||
0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806,0x13e2,0x13e2,
|
||||
0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7,0x13e7,0x806,
|
||||
0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec,0x806,0x13ed,
|
||||
0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46,0x66,0x880,
|
||||
0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46,0x6c,0x880,
|
||||
0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66,0x6c,0x46,
|
||||
0x46,0x4c,0x46,0x66,0x6c,0x882,0xfb06,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0x8c0,1,
|
||||
0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544,0x546,0x544,0x576,
|
||||
0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544,0x53b,0x544,0x56b,
|
||||
0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544,0x53d,0x544,0x56d
|
||||
};
|
||||
|
||||
static const uint16_t ucase_props_unfold[370]={
|
||||
|
|
15
thirdparty/icu4c/common/ucasemap.cpp
vendored
15
thirdparty/icu4c/common/ucasemap.cpp
vendored
|
@ -679,14 +679,18 @@ void toUpper(uint32_t options,
|
|||
// Adding one only to the final vowel in a longer sequence
|
||||
// (which does not occur in normal writing) would require lookahead.
|
||||
// Set the same flag as for preserving an existing dialytika.
|
||||
if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
|
||||
(upper == 0x399 || upper == 0x3A5)) {
|
||||
data |= HAS_DIALYTIKA;
|
||||
if ((data & HAS_VOWEL) != 0 &&
|
||||
(state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) !=
|
||||
0 &&
|
||||
(upper == 0x399 || upper == 0x3A5)) {
|
||||
data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) != 0 ? HAS_DIALYTIKA
|
||||
: HAS_COMBINING_DIALYTIKA;
|
||||
}
|
||||
int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
|
||||
if ((data & HAS_YPOGEGRAMMENI) != 0) {
|
||||
numYpogegrammeni = 1;
|
||||
}
|
||||
const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0;
|
||||
// Skip combining diacritics after this Greek letter.
|
||||
int32_t nextNextIndex = nextIndex;
|
||||
while (nextIndex < srcLength) {
|
||||
|
@ -704,7 +708,8 @@ void toUpper(uint32_t options,
|
|||
}
|
||||
}
|
||||
if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
|
||||
nextState |= AFTER_VOWEL_WITH_ACCENT;
|
||||
nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT
|
||||
: AFTER_VOWEL_WITH_COMBINING_ACCENT;
|
||||
}
|
||||
// Map according to Greek rules.
|
||||
UBool addTonos = false;
|
||||
|
@ -715,7 +720,7 @@ void toUpper(uint32_t options,
|
|||
!isFollowedByCasedLetter(src, nextIndex, srcLength)) {
|
||||
// Keep disjunctive "or" with (only) a tonos.
|
||||
// We use the same "word boundary" conditions as for the Final_Sigma test.
|
||||
if (i == nextIndex) {
|
||||
if (hasPrecomposedAccent) {
|
||||
upper = 0x389; // Preserve the precomposed form.
|
||||
} else {
|
||||
addTonos = true;
|
||||
|
|
3
thirdparty/icu4c/common/ucasemap_imp.h
vendored
3
thirdparty/icu4c/common/ucasemap_imp.h
vendored
|
@ -263,7 +263,8 @@ static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALY
|
|||
|
||||
// State bits.
|
||||
static const uint32_t AFTER_CASED = 1;
|
||||
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
|
||||
static const uint32_t AFTER_VOWEL_WITH_COMBINING_ACCENT = 2;
|
||||
static const uint32_t AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT = 4;
|
||||
|
||||
uint32_t getLetterData(UChar32 c);
|
||||
|
||||
|
|
6150
thirdparty/icu4c/common/uchar_props_data.h
vendored
6150
thirdparty/icu4c/common/uchar_props_data.h
vendored
File diff suppressed because it is too large
Load diff
90
thirdparty/icu4c/common/ucurr.cpp
vendored
90
thirdparty/icu4c/common/ucurr.cpp
vendored
|
@ -11,6 +11,8 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "unicode/ucurr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ures.h"
|
||||
|
@ -20,6 +22,7 @@
|
|||
#include "unicode/usetiter.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -520,14 +523,18 @@ ucurr_forLocale(const char* locale,
|
|||
return 0;
|
||||
}
|
||||
|
||||
char currency[4]; // ISO currency codes are alpha3 codes.
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
int32_t resLen = uloc_getKeywordValue(locale, "currency",
|
||||
currency, UPRV_LENGTHOF(currency), &localStatus);
|
||||
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) {
|
||||
CharString currency;
|
||||
{
|
||||
CharStringByteSink sink(¤cy);
|
||||
ulocimp_getKeywordValue(locale, "currency", sink, &localStatus);
|
||||
}
|
||||
int32_t resLen = currency.length();
|
||||
|
||||
if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency.data(), resLen)) {
|
||||
if (resLen < buffCapacity) {
|
||||
T_CString_toUpperCase(currency);
|
||||
u_charsToUChars(currency, buff, resLen);
|
||||
T_CString_toUpperCase(currency.data());
|
||||
u_charsToUChars(currency.data(), buff, resLen);
|
||||
}
|
||||
return u_terminateUChars(buff, buffCapacity, resLen, ec);
|
||||
}
|
||||
|
@ -597,11 +604,15 @@ ucurr_forLocale(const char* locale,
|
|||
|
||||
if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
|
||||
// We don't know about it. Check to see if we support the variant.
|
||||
uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
|
||||
CharString parent;
|
||||
{
|
||||
CharStringByteSink sink(&parent);
|
||||
ulocimp_getParent(locale, sink, ec);
|
||||
}
|
||||
*ec = U_USING_FALLBACK_WARNING;
|
||||
// TODO: Loop over the shortened id rather than recursing and
|
||||
// TODO: Loop over the parent rather than recursing and
|
||||
// looking again for a currency keyword.
|
||||
return ucurr_forLocale(id, buff, buffCapacity, ec);
|
||||
return ucurr_forLocale(parent.data(), buff, buffCapacity, ec);
|
||||
}
|
||||
if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
|
||||
// There is nothing to fallback to. Report the failure/warning if possible.
|
||||
|
@ -624,20 +635,22 @@ ucurr_forLocale(const char* locale,
|
|||
* @return true if the fallback happened; false if locale is already
|
||||
* root ("").
|
||||
*/
|
||||
static UBool fallback(char *loc) {
|
||||
if (!*loc) {
|
||||
static UBool fallback(CharString& loc) {
|
||||
if (loc.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if (uprv_strcmp(loc, "en_GB") == 0) {
|
||||
if (loc == "en_GB") {
|
||||
// HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
|
||||
// in order to consume the correct data strings. This hack will be removed
|
||||
// when proper data sink loading is implemented here.
|
||||
// NOTE: "001" adds 1 char over "GB". However, both call sites allocate
|
||||
// arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
|
||||
uprv_strcpy(loc + 3, "001");
|
||||
loc.truncate(3);
|
||||
loc.append("001", status);
|
||||
} else {
|
||||
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
|
||||
CharString tmp;
|
||||
CharStringByteSink sink(&tmp);
|
||||
ulocimp_getParent(loc.data(), sink, &status);
|
||||
loc = std::move(tmp);
|
||||
}
|
||||
/*
|
||||
char *i = uprv_strrchr(loc, '_');
|
||||
|
@ -692,9 +705,12 @@ ucurr_getName(const char16_t* currency,
|
|||
// this function.
|
||||
UErrorCode ec2 = U_ZERO_ERROR;
|
||||
|
||||
char loc[ULOC_FULLNAME_CAPACITY];
|
||||
uloc_getName(locale, loc, sizeof(loc), &ec2);
|
||||
if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
CharString loc;
|
||||
{
|
||||
CharStringByteSink sink(&loc);
|
||||
ulocimp_getName(locale, sink, &ec2);
|
||||
}
|
||||
if (U_FAILURE(ec2)) {
|
||||
*ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
@ -707,7 +723,7 @@ ucurr_getName(const char16_t* currency,
|
|||
|
||||
const char16_t* s = nullptr;
|
||||
ec2 = U_ZERO_ERROR;
|
||||
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2));
|
||||
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc.data(), &ec2));
|
||||
|
||||
if (nameStyle == UCURR_NARROW_SYMBOL_NAME || nameStyle == UCURR_FORMAL_SYMBOL_NAME || nameStyle == UCURR_VARIANT_SYMBOL_NAME) {
|
||||
CharString key;
|
||||
|
@ -791,9 +807,12 @@ ucurr_getPluralName(const char16_t* currency,
|
|||
// this function.
|
||||
UErrorCode ec2 = U_ZERO_ERROR;
|
||||
|
||||
char loc[ULOC_FULLNAME_CAPACITY];
|
||||
uloc_getName(locale, loc, sizeof(loc), &ec2);
|
||||
if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
CharString loc;
|
||||
{
|
||||
CharStringByteSink sink(&loc);
|
||||
ulocimp_getName(locale, sink, &ec2);
|
||||
}
|
||||
if (U_FAILURE(ec2)) {
|
||||
*ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
@ -803,7 +822,7 @@ ucurr_getPluralName(const char16_t* currency,
|
|||
|
||||
const char16_t* s = nullptr;
|
||||
ec2 = U_ZERO_ERROR;
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc.data(), &ec2);
|
||||
|
||||
rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2);
|
||||
|
||||
|
@ -904,13 +923,17 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_
|
|||
*total_currency_name_count = 0;
|
||||
*total_currency_symbol_count = 0;
|
||||
const char16_t* s = nullptr;
|
||||
char locale[ULOC_FULLNAME_CAPACITY] = "";
|
||||
uprv_strcpy(locale, loc);
|
||||
CharString locale;
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
locale.append(loc, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
|
||||
for (;;) {
|
||||
UErrorCode ec2 = U_ZERO_ERROR;
|
||||
// TODO: ures_openDirect?
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2);
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale.data(), &ec2);
|
||||
UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, nullptr, &ec2);
|
||||
int32_t n = ures_getSize(curr);
|
||||
for (int32_t i=0; i<n; ++i) {
|
||||
|
@ -979,14 +1002,17 @@ collectCurrencyNames(const char* locale,
|
|||
// Look up the Currencies resource for the given locale.
|
||||
UErrorCode ec2 = U_ZERO_ERROR;
|
||||
|
||||
char loc[ULOC_FULLNAME_CAPACITY] = "";
|
||||
uloc_getName(locale, loc, sizeof(loc), &ec2);
|
||||
if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
CharString loc;
|
||||
{
|
||||
CharStringByteSink sink(&loc);
|
||||
ulocimp_getName(locale, sink, &ec2);
|
||||
}
|
||||
if (U_FAILURE(ec2)) {
|
||||
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
|
||||
// Get maximum currency name count first.
|
||||
getCurrencyNameCount(loc, total_currency_name_count, total_currency_symbol_count);
|
||||
getCurrencyNameCount(loc.data(), total_currency_name_count, total_currency_symbol_count);
|
||||
|
||||
*currencyNames = (CurrencyNameStruct*)uprv_malloc
|
||||
(sizeof(CurrencyNameStruct) * (*total_currency_name_count));
|
||||
|
@ -1014,7 +1040,7 @@ collectCurrencyNames(const char* locale,
|
|||
for (int32_t localeLevel = 0; ; ++localeLevel) {
|
||||
ec2 = U_ZERO_ERROR;
|
||||
// TODO: ures_openDirect
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2);
|
||||
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc.data(), &ec2);
|
||||
UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, nullptr, &ec2);
|
||||
int32_t n = ures_getSize(curr);
|
||||
for (int32_t i=0; i<n; ++i) {
|
||||
|
|
2
thirdparty/icu4c/common/udata.cpp
vendored
2
thirdparty/icu4c/common/udata.cpp
vendored
|
@ -1196,7 +1196,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
*p = U_FILE_SEP_CHAR;
|
||||
}
|
||||
#if defined (UDATA_DEBUG)
|
||||
fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
|
||||
fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.data());
|
||||
#endif
|
||||
path = altSepPath.data();
|
||||
}
|
||||
|
|
114
thirdparty/icu4c/common/uloc.cpp
vendored
114
thirdparty/icu4c/common/uloc.cpp
vendored
|
@ -103,12 +103,12 @@ static const char * const LANGUAGES[] = {
|
|||
"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
|
||||
"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
|
||||
"bgc", "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
|
||||
"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
|
||||
"blo", "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
|
||||
"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
|
||||
"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
|
||||
"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
|
||||
"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
|
||||
"cs", "csb", "cu", "cv", "cy",
|
||||
"cs", "csb", "csw", "cu", "cv", "cy",
|
||||
"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
|
||||
"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
|
||||
"dyo", "dyu", "dz", "dzg",
|
||||
|
@ -135,7 +135,7 @@ static const char * const LANGUAGES[] = {
|
|||
"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
|
||||
"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
|
||||
"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
|
||||
"kv", "kw", "ky",
|
||||
"kv", "kw", "kxv", "ky",
|
||||
"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
|
||||
"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
|
||||
"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
|
||||
|
@ -169,14 +169,14 @@ static const char * const LANGUAGES[] = {
|
|||
"sv", "sw", "swb", "syc", "syr", "szl",
|
||||
"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
|
||||
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
|
||||
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tok", "tpi",
|
||||
"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
|
||||
"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
|
||||
"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
|
||||
"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
|
||||
"vot", "vro", "vun",
|
||||
"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vmw",
|
||||
"vo", "vot", "vro", "vun",
|
||||
"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
|
||||
"xal", "xh", "xmf", "xog",
|
||||
"xal", "xh", "xmf", "xnr", "xog",
|
||||
"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
|
||||
"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
|
||||
"zun", "zxx", "zza",
|
||||
|
@ -220,12 +220,12 @@ static const char * const LANGUAGES_3[] = {
|
|||
"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
|
||||
"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
|
||||
"bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
|
||||
"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
|
||||
"blo", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
|
||||
"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
|
||||
"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
|
||||
"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
|
||||
"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
|
||||
"ces", "csb", "chu", "chv", "cym",
|
||||
"ces", "csb", "csw", "chu", "chv", "cym",
|
||||
"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
|
||||
"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
|
||||
"dyo", "dyu", "dzo", "dzg",
|
||||
|
@ -252,7 +252,7 @@ static const char * const LANGUAGES_3[] = {
|
|||
"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
|
||||
"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
|
||||
"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
|
||||
"kom", "cor", "kir",
|
||||
"kom", "cor", "kxv", "kir",
|
||||
"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
|
||||
"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
|
||||
"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
|
||||
|
@ -286,14 +286,14 @@ static const char * const LANGUAGES_3[] = {
|
|||
"swe", "swa", "swb", "syc", "syr", "szl",
|
||||
"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
|
||||
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
|
||||
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tok", "tpi",
|
||||
"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
|
||||
"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
|
||||
"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
|
||||
"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
|
||||
"vot", "vro", "vun",
|
||||
"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vmw",
|
||||
"vol", "vot", "vro", "vun",
|
||||
"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
|
||||
"xal", "xho", "xmf", "xog",
|
||||
"xal", "xho", "xmf", "xnr", "xog",
|
||||
"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
|
||||
"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
|
||||
"zun", "zxx", "zza",
|
||||
|
@ -477,25 +477,6 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
|
|||
/* ### BCP47 Conversion *******************************************/
|
||||
/* Test if the locale id has BCP47 u extension and does not have '@' */
|
||||
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(localeID) == 1)
|
||||
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
|
||||
static const char* _ConvertBCP47(
|
||||
const char* id, char* buffer, int32_t length,
|
||||
UErrorCode* err, int32_t* pLocaleIdSize) {
|
||||
const char* finalID;
|
||||
int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, nullptr, err);
|
||||
if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
finalID=id;
|
||||
if (*err == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
finalID=buffer;
|
||||
}
|
||||
if (pLocaleIdSize != nullptr) {
|
||||
*pLocaleIdSize = localeIDSize;
|
||||
}
|
||||
return finalID;
|
||||
}
|
||||
/* Gets the size of the shortest subtag in the given localeID. */
|
||||
static int32_t getShortestSubtagLength(const char *localeID) {
|
||||
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
|
||||
|
@ -762,7 +743,7 @@ ulocimp_getKeywordValue(const char* localeID,
|
|||
char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
|
||||
|
||||
if(status && U_SUCCESS(*status) && localeID) {
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
CharString tempBuffer;
|
||||
const char* tmpLocaleID;
|
||||
|
||||
if (keywordName == nullptr || keywordName[0] == 0) {
|
||||
|
@ -776,8 +757,9 @@ ulocimp_getKeywordValue(const char* localeID,
|
|||
}
|
||||
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
|
||||
sizeof(tempBuffer), status, nullptr);
|
||||
CharStringByteSink sink(&tempBuffer);
|
||||
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
|
||||
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
|
||||
} else {
|
||||
tmpLocaleID=localeID;
|
||||
}
|
||||
|
@ -1406,7 +1388,7 @@ U_CAPI UEnumeration* U_EXPORT2
|
|||
uloc_openKeywords(const char* localeID,
|
||||
UErrorCode* status)
|
||||
{
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
CharString tempBuffer;
|
||||
const char* tmpLocaleID;
|
||||
|
||||
if(status==nullptr || U_FAILURE(*status)) {
|
||||
|
@ -1414,8 +1396,9 @@ uloc_openKeywords(const char* localeID,
|
|||
}
|
||||
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
|
||||
sizeof(tempBuffer), status, nullptr);
|
||||
CharStringByteSink sink(&tempBuffer);
|
||||
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
|
||||
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
|
||||
} else {
|
||||
if (localeID==nullptr) {
|
||||
localeID=uloc_getDefault();
|
||||
|
@ -1489,7 +1472,7 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
|
||||
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
|
||||
PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
|
||||
CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
|
||||
CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
|
||||
const char* origLocaleID;
|
||||
const char* tmpLocaleID;
|
||||
|
@ -1512,13 +1495,9 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
}
|
||||
|
||||
do {
|
||||
// After this call tmpLocaleID may point to localeIDPtr which may
|
||||
// point to either localeID or localeIDWithHyphens.data().
|
||||
tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
|
||||
tempBuffer.getCapacity(), err,
|
||||
&(tempBuffer.requestedCapacity));
|
||||
} while (tempBuffer.needToTryAgain(err));
|
||||
CharStringByteSink tempSink(&tempBuffer);
|
||||
ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err);
|
||||
tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
|
||||
} else {
|
||||
if (localeID==nullptr) {
|
||||
localeID=uloc_getDefault();
|
||||
|
@ -1676,12 +1655,39 @@ uloc_getParent(const char* localeID,
|
|||
char* parent,
|
||||
int32_t parentCapacity,
|
||||
UErrorCode* err)
|
||||
{
|
||||
if (U_FAILURE(*err)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
CheckedArrayByteSink sink(parent, parentCapacity);
|
||||
ulocimp_getParent(localeID, sink, err);
|
||||
|
||||
int32_t reslen = sink.NumberOfBytesAppended();
|
||||
|
||||
if (U_FAILURE(*err)) {
|
||||
return reslen;
|
||||
}
|
||||
|
||||
if (sink.Overflowed()) {
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
} else {
|
||||
u_terminateChars(parent, parentCapacity, reslen, err);
|
||||
}
|
||||
|
||||
return reslen;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocimp_getParent(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err)
|
||||
{
|
||||
const char *lastUnderscore;
|
||||
int32_t i;
|
||||
|
||||
if (U_FAILURE(*err))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
if (localeID == nullptr)
|
||||
localeID = uloc_getDefault();
|
||||
|
@ -1697,13 +1703,9 @@ uloc_getParent(const char* localeID,
|
|||
if (uprv_strnicmp(localeID, "und_", 4) == 0) {
|
||||
localeID += 3;
|
||||
i -= 3;
|
||||
uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
|
||||
} else if (parent != localeID) {
|
||||
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
|
||||
}
|
||||
sink.Append(localeID, i);
|
||||
}
|
||||
|
||||
return u_terminateChars(parent, parentCapacity, i, err);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
|
@ -1795,7 +1797,7 @@ uloc_getVariant(const char* localeID,
|
|||
int32_t variantCapacity,
|
||||
UErrorCode* err)
|
||||
{
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
CharString tempBuffer;
|
||||
const char* tmpLocaleID;
|
||||
int32_t i=0;
|
||||
|
||||
|
@ -1804,7 +1806,9 @@ uloc_getVariant(const char* localeID,
|
|||
}
|
||||
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
|
||||
CharStringByteSink sink(&tempBuffer);
|
||||
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, err);
|
||||
tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
|
||||
} else {
|
||||
if (localeID==nullptr) {
|
||||
localeID=uloc_getDefault();
|
||||
|
|
110
thirdparty/icu4c/common/uloc_tag.cpp
vendored
110
thirdparty/icu4c/common/uloc_tag.cpp
vendored
|
@ -1326,14 +1326,23 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st
|
|||
attrBufLength = 0;
|
||||
for (; i < len; i++) {
|
||||
if (buf[i] != '-') {
|
||||
attrBuf[attrBufLength++] = buf[i];
|
||||
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
|
||||
attrBuf[attrBufLength++] = buf[i];
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (attrBufLength > 0) {
|
||||
attrBuf[attrBufLength] = 0;
|
||||
if (static_cast<size_t>(attrBufLength) < sizeof(attrBuf)) {
|
||||
attrBuf[attrBufLength] = 0;
|
||||
} else {
|
||||
*status = U_STRING_NOT_TERMINATED_WARNING;
|
||||
}
|
||||
|
||||
} else if (i >= len){
|
||||
break;
|
||||
|
@ -1879,11 +1888,8 @@ static void
|
|||
_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) {
|
||||
(void)hadPosix;
|
||||
char buf[ULOC_FULLNAME_CAPACITY];
|
||||
char tmpAppend[ULOC_FULLNAME_CAPACITY];
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
int32_t len, i;
|
||||
int32_t reslen = 0;
|
||||
int32_t capacity = sizeof tmpAppend;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
|
@ -1936,37 +1942,18 @@ _appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool
|
|||
}
|
||||
|
||||
if (writeValue) {
|
||||
if (reslen < capacity) {
|
||||
tmpAppend[reslen++] = SEP;
|
||||
}
|
||||
sink.Append("-", 1);
|
||||
|
||||
if (firstValue) {
|
||||
if (reslen < capacity) {
|
||||
tmpAppend[reslen++] = *PRIVATEUSE_KEY;
|
||||
}
|
||||
|
||||
if (reslen < capacity) {
|
||||
tmpAppend[reslen++] = SEP;
|
||||
}
|
||||
|
||||
len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
|
||||
if (reslen < capacity) {
|
||||
tmpAppend[reslen++] = SEP;
|
||||
}
|
||||
|
||||
sink.Append(PRIVATEUSE_KEY, UPRV_LENGTHOF(PRIVATEUSE_KEY) - 1);
|
||||
sink.Append("-", 1);
|
||||
sink.Append(PRIVUSE_VARIANT_PREFIX, UPRV_LENGTHOF(PRIVUSE_VARIANT_PREFIX) - 1);
|
||||
sink.Append("-", 1);
|
||||
firstValue = false;
|
||||
}
|
||||
|
||||
len = (int32_t)uprv_strlen(pPriv);
|
||||
if (reslen < capacity) {
|
||||
uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
|
||||
}
|
||||
reslen += len;
|
||||
sink.Append(pPriv, len);
|
||||
}
|
||||
}
|
||||
/* reset private use starting position */
|
||||
|
@ -1976,15 +1963,6 @@ _appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool
|
|||
}
|
||||
p++;
|
||||
}
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (U_SUCCESS(*status)) {
|
||||
len = reslen;
|
||||
sink.Append(tmpAppend, len);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2092,12 +2070,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
|||
int32_t oldTagLength = tagLen;
|
||||
if (tagLen < newTagLength) {
|
||||
uprv_free(tagBuf);
|
||||
tagBuf = (char*)uprv_malloc(newTagLength + 1);
|
||||
// Change t->buf after the free and before return to avoid the second double free in
|
||||
// the destructor of t when t is out of scope.
|
||||
t->buf = tagBuf = (char*)uprv_malloc(newTagLength + 1);
|
||||
if (tagBuf == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
t->buf = tagBuf;
|
||||
tagLen = newTagLength;
|
||||
}
|
||||
parsedLenDelta = checkLegacyLen - replacementLen;
|
||||
|
@ -2646,53 +2625,18 @@ ulocimp_toLanguageTag(const char* localeID,
|
|||
UBool strict,
|
||||
UErrorCode* status) {
|
||||
icu::CharString canonical;
|
||||
int32_t reslen;
|
||||
UErrorCode tmpStatus = U_ZERO_ERROR;
|
||||
UBool hadPosix = false;
|
||||
const char* pKeywordStart;
|
||||
|
||||
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
|
||||
int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
|
||||
if (resultCapacity > 0) {
|
||||
char* buffer;
|
||||
|
||||
for (;;) {
|
||||
buffer = canonical.getAppendBuffer(
|
||||
/*minCapacity=*/resultCapacity,
|
||||
/*desiredCapacityHint=*/resultCapacity,
|
||||
resultCapacity,
|
||||
tmpStatus);
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return;
|
||||
}
|
||||
|
||||
reslen =
|
||||
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
|
||||
|
||||
if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
resultCapacity = reslen;
|
||||
tmpStatus = U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
canonical.append(buffer, reslen, tmpStatus);
|
||||
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
|
||||
}
|
||||
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return;
|
||||
}
|
||||
{
|
||||
icu::CharStringByteSink canonicalSink(&canonical);
|
||||
ulocimp_canonicalize(localeID, canonicalSink, &tmpStatus);
|
||||
}
|
||||
if (U_FAILURE(tmpStatus)) {
|
||||
*status = tmpStatus;
|
||||
return;
|
||||
}
|
||||
|
||||
/* For handling special case - private use only tag */
|
||||
|
|
99
thirdparty/icu4c/common/ulocale.cpp
vendored
Normal file
99
thirdparty/icu4c/common/ulocale.cpp
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
//
|
||||
#include "unicode/errorcode.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ulocale.h"
|
||||
#include "unicode/locid.h"
|
||||
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
#define EXTERNAL(i) (reinterpret_cast<ULocale*>(i))
|
||||
#define CONST_INTERNAL(e) (reinterpret_cast<const icu::Locale*>(e))
|
||||
#define INTERNAL(e) (reinterpret_cast<icu::Locale*>(e))
|
||||
|
||||
ULocale*
|
||||
ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) {
|
||||
CharString str(length < 0 ? StringPiece(localeID) : StringPiece(localeID, length), *err);
|
||||
if (U_FAILURE(*err)) return nullptr;
|
||||
return EXTERNAL(icu::Locale::createFromName(str.data()).clone());
|
||||
}
|
||||
|
||||
ULocale*
|
||||
ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err) {
|
||||
Locale l = icu::Locale::forLanguageTag(length < 0 ? StringPiece(tag) : StringPiece(tag, length), *err);
|
||||
if (U_FAILURE(*err)) return nullptr;
|
||||
return EXTERNAL(l.clone());
|
||||
}
|
||||
|
||||
void
|
||||
ulocale_close(ULocale* locale) {
|
||||
delete INTERNAL(locale);
|
||||
}
|
||||
|
||||
#define IMPL_ULOCALE_STRING_GETTER(N1, N2) \
|
||||
const char* ulocale_get ## N1(const ULocale* locale) { \
|
||||
if (locale == nullptr) return nullptr; \
|
||||
return CONST_INTERNAL(locale)->get ## N2(); \
|
||||
}
|
||||
|
||||
#define IMPL_ULOCALE_STRING_IDENTICAL_GETTER(N) IMPL_ULOCALE_STRING_GETTER(N, N)
|
||||
|
||||
#define IMPL_ULOCALE_GET_KEYWORD_VALUE(N) \
|
||||
int32_t ulocale_get ##N ( \
|
||||
const ULocale* locale, const char* keyword, int32_t keywordLength, \
|
||||
char* valueBuffer, int32_t bufferCapacity, UErrorCode *err) { \
|
||||
if (U_FAILURE(*err)) return 0; \
|
||||
if (locale == nullptr) { \
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR; \
|
||||
return 0; \
|
||||
} \
|
||||
CheckedArrayByteSink sink(valueBuffer, bufferCapacity); \
|
||||
CONST_INTERNAL(locale)->get ## N( \
|
||||
keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \
|
||||
sink, *err); \
|
||||
int32_t reslen = sink.NumberOfBytesAppended(); \
|
||||
if (U_FAILURE(*err)) { \
|
||||
return reslen; \
|
||||
} \
|
||||
if (sink.Overflowed()) { \
|
||||
*err = U_BUFFER_OVERFLOW_ERROR; \
|
||||
} else { \
|
||||
u_terminateChars(valueBuffer, bufferCapacity, reslen, err); \
|
||||
} \
|
||||
return reslen; \
|
||||
}
|
||||
|
||||
#define IMPL_ULOCALE_GET_KEYWORDS(N) \
|
||||
UEnumeration* ulocale_get ## N(const ULocale* locale, UErrorCode *err) { \
|
||||
if (U_FAILURE(*err)) return nullptr; \
|
||||
if (locale == nullptr) { \
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR; \
|
||||
return nullptr; \
|
||||
} \
|
||||
return uenum_openFromStringEnumeration( \
|
||||
CONST_INTERNAL(locale)->create ## N(*err), err); \
|
||||
}
|
||||
|
||||
IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Language)
|
||||
IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Script)
|
||||
IMPL_ULOCALE_STRING_GETTER(Region, Country)
|
||||
IMPL_ULOCALE_STRING_IDENTICAL_GETTER(Variant)
|
||||
IMPL_ULOCALE_STRING_GETTER(LocaleID, Name)
|
||||
IMPL_ULOCALE_STRING_IDENTICAL_GETTER(BaseName)
|
||||
IMPL_ULOCALE_GET_KEYWORD_VALUE(KeywordValue)
|
||||
IMPL_ULOCALE_GET_KEYWORD_VALUE(UnicodeKeywordValue)
|
||||
IMPL_ULOCALE_GET_KEYWORDS(Keywords)
|
||||
IMPL_ULOCALE_GET_KEYWORDS(UnicodeKeywords)
|
||||
|
||||
bool ulocale_isBogus(const ULocale* locale) {
|
||||
if (locale == nullptr) return false;
|
||||
return CONST_INTERNAL(locale)->isBogus();
|
||||
}
|
||||
|
||||
/*eof*/
|
156
thirdparty/icu4c/common/ulocbuilder.cpp
vendored
Normal file
156
thirdparty/icu4c/common/ulocbuilder.cpp
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "unicode/bytestream.h"
|
||||
#include "unicode/localebuilder.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/umachine.h"
|
||||
#include "unicode/ulocbuilder.h"
|
||||
#include "cstring.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
using icu::CheckedArrayByteSink;
|
||||
using icu::StringPiece;
|
||||
|
||||
#define EXTERNAL(i) (reinterpret_cast<ULocaleBuilder*>(i))
|
||||
#define INTERNAL(e) (reinterpret_cast<icu::LocaleBuilder*>(e))
|
||||
#define CONST_INTERNAL(e) (reinterpret_cast<const icu::LocaleBuilder*>(e))
|
||||
|
||||
ULocaleBuilder* ulocbld_open() {
|
||||
return EXTERNAL(new icu::LocaleBuilder());
|
||||
}
|
||||
|
||||
void ulocbld_close(ULocaleBuilder* builder) {
|
||||
if (builder == nullptr) return;
|
||||
delete INTERNAL(builder);
|
||||
}
|
||||
|
||||
void ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length) {
|
||||
if (builder == nullptr) return;
|
||||
icu::Locale l;
|
||||
if (length < 0 || locale[length] == '\0') {
|
||||
l = icu::Locale(locale);
|
||||
} else {
|
||||
if (length >= ULOC_FULLNAME_CAPACITY) {
|
||||
l.setToBogus();
|
||||
} else {
|
||||
// locale is not null termined but Locale API require one.
|
||||
// Create a null termined version in buf.
|
||||
char buf[ULOC_FULLNAME_CAPACITY];
|
||||
uprv_memcpy(buf, locale, length);
|
||||
buf[length] = '\0';
|
||||
l = icu::Locale(buf);
|
||||
}
|
||||
}
|
||||
INTERNAL(builder)->setLocale(l);
|
||||
}
|
||||
|
||||
void
|
||||
ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale) {
|
||||
if (builder == nullptr) return;
|
||||
INTERNAL(builder)->setLocale(*(reinterpret_cast<const icu::Locale*>(locale)));
|
||||
ulocale_close(locale);
|
||||
}
|
||||
|
||||
#define STRING_PIECE(s, l) ((l)<0 ? StringPiece(s) : StringPiece((s), (l)))
|
||||
|
||||
#define IMPL_ULOCBLD_SETTER(N) \
|
||||
void ulocbld_##N(ULocaleBuilder* bld, const char* s, int32_t l) { \
|
||||
if (bld == nullptr) return; \
|
||||
INTERNAL(bld)->N(STRING_PIECE(s,l)); \
|
||||
}
|
||||
|
||||
IMPL_ULOCBLD_SETTER(setLanguageTag)
|
||||
IMPL_ULOCBLD_SETTER(setLanguage)
|
||||
IMPL_ULOCBLD_SETTER(setScript)
|
||||
IMPL_ULOCBLD_SETTER(setRegion)
|
||||
IMPL_ULOCBLD_SETTER(setVariant)
|
||||
IMPL_ULOCBLD_SETTER(addUnicodeLocaleAttribute)
|
||||
IMPL_ULOCBLD_SETTER(removeUnicodeLocaleAttribute)
|
||||
|
||||
void ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length) {
|
||||
if (builder == nullptr) return;
|
||||
INTERNAL(builder)->setExtension(key, STRING_PIECE(value, length));
|
||||
}
|
||||
|
||||
void ulocbld_setUnicodeLocaleKeyword(
|
||||
ULocaleBuilder* builder, const char* key, int32_t keyLength,
|
||||
const char* type, int32_t typeLength) {
|
||||
if (builder == nullptr) return;
|
||||
INTERNAL(builder)->setUnicodeLocaleKeyword(
|
||||
STRING_PIECE(key, keyLength), STRING_PIECE(type, typeLength));
|
||||
}
|
||||
|
||||
void ulocbld_clear(ULocaleBuilder* builder) {
|
||||
if (builder == nullptr) return;
|
||||
INTERNAL(builder)->clear();
|
||||
}
|
||||
|
||||
void ulocbld_clearExtensions(ULocaleBuilder* builder) {
|
||||
if (builder == nullptr) return;
|
||||
INTERNAL(builder)->clearExtensions();
|
||||
}
|
||||
|
||||
|
||||
ULocale* ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err) {
|
||||
if (builder == nullptr) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
icu::Locale l = INTERNAL(builder)->build(*err);
|
||||
if (U_FAILURE(*err)) return nullptr;
|
||||
icu::Locale* r = l.clone();
|
||||
if (r == nullptr) {
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
return reinterpret_cast<ULocale*>(r);
|
||||
}
|
||||
|
||||
int32_t ulocbld_buildLocaleID(ULocaleBuilder* builder,
|
||||
char* buffer, int32_t bufferCapacity, UErrorCode* err) {
|
||||
if (builder == nullptr) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
icu::Locale l = INTERNAL(builder)->build(*err);
|
||||
if (U_FAILURE(*err)) return 0;
|
||||
int32_t length = (int32_t)(uprv_strlen(l.getName()));
|
||||
if (0 < length && length <= bufferCapacity) {
|
||||
uprv_memcpy(buffer, l.getName(), length);
|
||||
}
|
||||
return u_terminateChars(buffer, bufferCapacity, length, err);
|
||||
}
|
||||
|
||||
int32_t ulocbld_buildLanguageTag(ULocaleBuilder* builder,
|
||||
char* buffer, int32_t bufferCapacity, UErrorCode* err) {
|
||||
if (builder == nullptr) {
|
||||
*err = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
icu::Locale l = INTERNAL(builder)->build(*err);
|
||||
if (U_FAILURE(*err)) return 0;
|
||||
CheckedArrayByteSink sink(buffer, bufferCapacity);
|
||||
l.toLanguageTag(sink, *err);
|
||||
int32_t reslen = sink.NumberOfBytesAppended();
|
||||
if (U_FAILURE(*err)) {
|
||||
return reslen;
|
||||
}
|
||||
if (sink.Overflowed()) {
|
||||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
} else {
|
||||
u_terminateChars(buffer, bufferCapacity, reslen, err);
|
||||
}
|
||||
return reslen;
|
||||
}
|
||||
|
||||
UBool ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode) {
|
||||
if (builder == nullptr) {
|
||||
*outErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return true;
|
||||
}
|
||||
return CONST_INTERNAL(builder)->copyErrorTo(*outErrorCode);
|
||||
}
|
75
thirdparty/icu4c/common/ulocimp.h
vendored
75
thirdparty/icu4c/common/ulocimp.h
vendored
|
@ -92,6 +92,11 @@ ulocimp_getKeywordValue(const char* localeID,
|
|||
icu::ByteSink& sink,
|
||||
UErrorCode* status);
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocimp_getParent(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Writes a well-formed language tag for this locale ID.
|
||||
*
|
||||
|
@ -237,6 +242,7 @@ ulocimp_addLikelySubtags(const char* localeID,
|
|||
*
|
||||
* @param localeID The locale to minimize
|
||||
* @param sink The output sink receiving the maximized locale
|
||||
* @param favorScript favor to keep script if true, region if false.
|
||||
* @param err Error information if minimizing the locale failed. If the length
|
||||
* of the localeID and the null-terminator is greater than the maximum allowed size,
|
||||
* or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
|
||||
|
@ -245,6 +251,7 @@ ulocimp_addLikelySubtags(const char* localeID,
|
|||
U_CAPI void U_EXPORT2
|
||||
ulocimp_minimizeSubtags(const char* localeID,
|
||||
icu::ByteSink& sink,
|
||||
bool favorScript,
|
||||
UErrorCode* err);
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
|
@ -307,72 +314,4 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le
|
|||
// Return true if the value is already canonicalized.
|
||||
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
|
||||
|
||||
/**
|
||||
* A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
|
||||
* This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
|
||||
* and then, if it's not big enough, reallocate it on the heap and try again.
|
||||
*
|
||||
* You use it like this:
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
*
|
||||
* PreflightingLocaleIDBuffer tempBuffer;
|
||||
* do {
|
||||
* tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
|
||||
* } while (tempBuffer.needToTryAgain(&err));
|
||||
* if (U_SUCCESS(err)) {
|
||||
* uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
|
||||
* }
|
||||
*/
|
||||
class PreflightingLocaleIDBuffer {
|
||||
private:
|
||||
char stackBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
char* heapBuffer = nullptr;
|
||||
int32_t capacity = ULOC_FULLNAME_CAPACITY;
|
||||
|
||||
public:
|
||||
int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
|
||||
|
||||
// No heap allocation. Use only on the stack.
|
||||
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
|
||||
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
|
||||
#if U_HAVE_PLACEMENT_NEW
|
||||
static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
|
||||
#endif
|
||||
|
||||
PreflightingLocaleIDBuffer() {}
|
||||
|
||||
~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
|
||||
|
||||
char* getBuffer() {
|
||||
if (heapBuffer == nullptr) {
|
||||
return stackBuffer;
|
||||
} else {
|
||||
return heapBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t getCapacity() {
|
||||
return capacity;
|
||||
}
|
||||
|
||||
bool needToTryAgain(UErrorCode* err) {
|
||||
if (heapBuffer != nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
|
||||
heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
|
||||
if (heapBuffer == nullptr) {
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
*err = U_ZERO_ERROR;
|
||||
capacity = newCapacity;
|
||||
}
|
||||
return U_SUCCESS(*err);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
1
thirdparty/icu4c/common/unicode/brkiter.h
vendored
1
thirdparty/icu4c/common/unicode/brkiter.h
vendored
|
@ -649,6 +649,7 @@ private:
|
|||
/** @internal (private) */
|
||||
char actualLocale[ULOC_FULLNAME_CAPACITY];
|
||||
char validLocale[ULOC_FULLNAME_CAPACITY];
|
||||
char requestLocale[ULOC_FULLNAME_CAPACITY];
|
||||
};
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
|
2
thirdparty/icu4c/common/unicode/docmain.h
vendored
2
thirdparty/icu4c/common/unicode/docmain.h
vendored
|
@ -114,7 +114,7 @@
|
|||
* </tr>
|
||||
* <tr>
|
||||
* <td>Locales </td>
|
||||
* <td>uloc.h</a></td>
|
||||
* <td>uloc.h, ulocale.h, ulocbuilder.h</a></td>
|
||||
* <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
|
|
14
thirdparty/icu4c/common/unicode/locid.h
vendored
14
thirdparty/icu4c/common/unicode/locid.h
vendored
|
@ -984,7 +984,10 @@ public:
|
|||
static const char* const* U_EXPORT2 getISOCountries();
|
||||
|
||||
/**
|
||||
* Gets a list of all available language codes defined in ISO 639. This is a pointer
|
||||
* Returns a list of all unique language codes defined in ISO 639.
|
||||
* They can be 2 or 3 letter codes, as defined by
|
||||
* <a href="https://www.ietf.org/rfc/bcp/bcp47.html#section-2.2.1">
|
||||
* BCP 47, section 2.2.1</a>. This is a pointer
|
||||
* to an array of pointers to arrays of char. All of these pointers are owned
|
||||
* by ICU-- do not delete them, and do not write through them. The array is
|
||||
* terminated with a null pointer.
|
||||
|
@ -1110,6 +1113,15 @@ protected: /* only protected for testing purposes. DO NOT USE. */
|
|||
* @internal
|
||||
*/
|
||||
void setFromPOSIXID(const char *posixID);
|
||||
/**
|
||||
* Minimize the subtags for this Locale, per the algorithm described
|
||||
* @param favorScript favor to keep script if true, to keep region if false.
|
||||
* @param status error information if maximizing this Locale failed.
|
||||
* If this Locale is not well-formed, the error code is
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
* @internal
|
||||
*/
|
||||
void minimizeSubtags(bool favorScript, UErrorCode& status);
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
private:
|
||||
|
|
26
thirdparty/icu4c/common/unicode/normalizer2.h
vendored
26
thirdparty/icu4c/common/unicode/normalizer2.h
vendored
|
@ -147,7 +147,10 @@ public:
|
|||
getNFKDInstance(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
|
||||
* Returns a Normalizer2 instance for Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Casefold
|
||||
*
|
||||
* Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
|
@ -160,6 +163,25 @@ public:
|
|||
static const Normalizer2 *
|
||||
getNFKCCasefoldInstance(UErrorCode &errorCode);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Returns a Normalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
|
||||
*
|
||||
* Same as getInstance(nullptr, "nfkc_scf", UNORM2_COMPOSE, errorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 74
|
||||
*/
|
||||
static const Normalizer2 *
|
||||
getNFKCSimpleCasefoldInstance(UErrorCode &errorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns a Normalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
|
@ -172,7 +194,7 @@ public:
|
|||
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
|
||||
*
|
||||
* @param packageName nullptr for ICU built-in data, otherwise application data package name
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
|
||||
* @param mode normalization mode (compose or decompose etc.)
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
|
|
82
thirdparty/icu4c/common/unicode/rbbi.h
vendored
82
thirdparty/icu4c/common/unicode/rbbi.h
vendored
|
@ -43,6 +43,69 @@ class RBBIDataWrapper;
|
|||
class UnhandledEngine;
|
||||
class UStack;
|
||||
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* The ExternalBreakEngine class define an abstract interface for the host environment
|
||||
* to provide a low level facility to break text for unicode text in script that the text boundary
|
||||
* cannot be handled by upper level rule based logic, for example, for Chinese and Japanese
|
||||
* word breaking, Thai, Khmer, Burmese, Lao and other Southeast Asian scripts.
|
||||
* The host environment implement one or more subclass of ExternalBreakEngine and
|
||||
* register them in the initialization time by calling
|
||||
* RuleBasedBreakIterator::registerExternalBreakEngine(). ICU adopt and own the engine and will
|
||||
* delete the registered external engine in proper time during the clean up
|
||||
* event.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
class ExternalBreakEngine : public UObject {
|
||||
public:
|
||||
/**
|
||||
* destructor
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual ~ExternalBreakEngine() {}
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character when
|
||||
* the RuleBasedBreakIterator is used for a particular locale. This method is used
|
||||
* by the RuleBasedBreakIterator to find a break engine.</p>
|
||||
* @param c A character which begins a run that the engine might handle.
|
||||
* @param locale The locale.
|
||||
* @return true if this engine handles the particular character for that locale.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual bool isFor(UChar32 c, const char* locale) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Indicate whether this engine handles a particular character.This method is
|
||||
* used by the RuleBasedBreakIterator after it already find a break engine to see which
|
||||
* characters after the first one can be handled by this break engine.</p>
|
||||
* @param c A character that the engine might handle.
|
||||
* @return true if this engine handles the particular character.
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual bool handles(UChar32 c) const = 0;
|
||||
|
||||
/**
|
||||
* <p>Divide up a range of text handled by this break engine.</p>
|
||||
*
|
||||
* @param text A UText representing the text
|
||||
* @param start The start of the range of known characters
|
||||
* @param end The end of the range of known characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or
|
||||
* nullptr
|
||||
* @param foundBreaksCapacity The capacity of foundBreaks
|
||||
* @param status Information on any errors encountered.
|
||||
* @return The number of breaks found
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
virtual int32_t fillBreaks(UText* text, int32_t start, int32_t end,
|
||||
int32_t* foundBreaks, int32_t foundBreaksCapacity,
|
||||
UErrorCode& status) const = 0;
|
||||
};
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* A subclass of BreakIterator whose behavior is specified using a list of rules.
|
||||
|
@ -716,9 +779,10 @@ private:
|
|||
* This function returns the appropriate LanguageBreakEngine for a
|
||||
* given character c.
|
||||
* @param c A character in the dictionary set
|
||||
* @param locale The locale.
|
||||
* @internal (private)
|
||||
*/
|
||||
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
|
||||
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale);
|
||||
|
||||
public:
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
|
@ -734,8 +798,24 @@ private:
|
|||
*/
|
||||
void dumpTables();
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Register a new external break engine. The external break engine will be adopted.
|
||||
* Because ICU may choose to cache break engine internally, this must
|
||||
* be called at application startup, prior to any calls to
|
||||
* object methods of RuleBasedBreakIterator to avoid undefined behavior.
|
||||
* @param toAdopt the ExternalBreakEngine instance to be adopted
|
||||
* @param status the in/out status code, no special meanings are assigned
|
||||
* @internal ICU 74 technology preview
|
||||
*/
|
||||
static void U_EXPORT2 registerExternalBreakEngine(
|
||||
ExternalBreakEngine* toAdopt, UErrorCode& status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
};
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
||||
|
|
44
thirdparty/icu4c/common/unicode/uchar.h
vendored
44
thirdparty/icu4c/common/unicode/uchar.h
vendored
|
@ -60,7 +60,7 @@ U_CDECL_BEGIN
|
|||
* @see u_getUnicodeVersion
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
#define U_UNICODE_VERSION "15.0"
|
||||
#define U_UNICODE_VERSION "15.1"
|
||||
|
||||
/**
|
||||
* \file
|
||||
|
@ -532,12 +532,33 @@ typedef enum UProperty {
|
|||
* @stable ICU 70
|
||||
*/
|
||||
UCHAR_RGI_EMOJI=71,
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Binary property IDS_Unary_Operator.
|
||||
* For programmatic determination of Ideographic Description Sequences.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
UCHAR_IDS_UNARY_OPERATOR=72,
|
||||
/**
|
||||
* Binary property ID_Compat_Math_Start.
|
||||
* Used in mathematical identifier profile in UAX #31.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
UCHAR_ID_COMPAT_MATH_START=73,
|
||||
/**
|
||||
* Binary property ID_Compat_Math_Continue.
|
||||
* Used in mathematical identifier profile in UAX #31.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
UCHAR_ID_COMPAT_MATH_CONTINUE=74,
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the last constant for binary Unicode properties.
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UCHAR_BINARY_LIMIT=72,
|
||||
UCHAR_BINARY_LIMIT=75,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/** Enumerated property Bidi_Class.
|
||||
|
@ -1900,6 +1921,11 @@ enum UBlockCode {
|
|||
/** @stable ICU 72 */
|
||||
UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/
|
||||
|
||||
// New block in Unicode 15.1
|
||||
|
||||
/** @stable ICU 74 */
|
||||
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 328, /*[2EBF0]*/
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal UBlockCode value.
|
||||
|
@ -1907,7 +1933,7 @@ enum UBlockCode {
|
|||
*
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
UBLOCK_COUNT = 328,
|
||||
UBLOCK_COUNT = 329,
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
|
||||
/** @stable ICU 2.0 */
|
||||
|
@ -2439,6 +2465,16 @@ typedef enum ULineBreak {
|
|||
U_LB_E_MODIFIER = 41, /*[EM]*/
|
||||
/** @stable ICU 58 */
|
||||
U_LB_ZWJ = 42, /*[ZWJ]*/
|
||||
/** @stable ICU 74 */
|
||||
U_LB_AKSARA = 43, /*[AK]*/
|
||||
/** @stable ICU 74 */
|
||||
U_LB_AKSARA_PREBASE = 44, /*[AP]*/
|
||||
/** @stable ICU 74 */
|
||||
U_LB_AKSARA_START = 45, /*[AS]*/
|
||||
/** @stable ICU 74 */
|
||||
U_LB_VIRAMA_FINAL = 46, /*[VF]*/
|
||||
/** @stable ICU 74 */
|
||||
U_LB_VIRAMA = 47, /*[VI]*/
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
/**
|
||||
* One more than the highest normal ULineBreak value.
|
||||
|
@ -2446,7 +2482,7 @@ typedef enum ULineBreak {
|
|||
*
|
||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||
*/
|
||||
U_LB_COUNT = 43
|
||||
U_LB_COUNT = 48
|
||||
#endif // U_HIDE_DEPRECATED_API
|
||||
} ULineBreak;
|
||||
|
||||
|
|
229
thirdparty/icu4c/common/unicode/ulocale.h
vendored
Normal file
229
thirdparty/icu4c/common/unicode/ulocale.h
vendored
Normal file
|
@ -0,0 +1,229 @@
|
|||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#ifndef ULOCALE_H
|
||||
#define ULOCALE_H
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/uenum.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Locale ID functionality similar to C++ class Locale
|
||||
*/
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Opaque C service object type for the locale API
|
||||
* @draft ICU 74
|
||||
*/
|
||||
struct ULocale;
|
||||
|
||||
/**
|
||||
* C typedef for struct ULocale.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
typedef struct ULocale ULocale;
|
||||
|
||||
/**
|
||||
* Constructs an ULocale from the locale ID.
|
||||
* The created ULocale should be destroyed by calling
|
||||
* ulocale_close();
|
||||
* @param localeID the locale, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the locale; if negative, then the locale need to be
|
||||
* null terminated.
|
||||
* @param err the error code
|
||||
* @return the locale.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Constructs an ULocale from the provided IETF BCP 47 language tag.
|
||||
* The created ULocale should be destroyed by calling
|
||||
* ulocale_close();
|
||||
* @param tag the language tag, defined as IETF BCP 47 language tag, const
|
||||
* char* pointer (need not be terminated when the length is non-negative)
|
||||
* @param length the length of the tag; if negative, then the tag need to be
|
||||
* null terminated.
|
||||
* @param err the error code
|
||||
* @return the locale.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Close the locale and destroy it's internal states.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocale_close(ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-639 language code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return the language code of the locale.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getLanguage(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-15924 abbreviation script code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the script.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getScript(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's ISO-3166 region code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the region.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getRegion(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the locale's variant code.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to the variant.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getVariant(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, up
|
||||
* to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to "name".
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getLocaleID(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale as ulocale_getLocaleID()
|
||||
* would return, but without keywords.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return A pointer to "base name".
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
ulocale_getBaseName(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Gets the bogus state. Locale object can be bogus if it doesn't exist
|
||||
*
|
||||
* @param locale the locale
|
||||
* @return false if it is a real locale, true if it is a bogus locale
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI bool U_EXPORT2
|
||||
ulocale_isBogus(const ULocale* locale);
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param err the error code
|
||||
* @return pointer to UEnumeration, or nullptr if there are no keywords.
|
||||
* Client must call uenum_close() to dispose the returned value.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ulocale_getKeywords(const ULocale* locale, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the list of unicode keywords for the specified locale.
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param err the error code
|
||||
* @return pointer to UEnumeration, or nullptr if there are no keywords.
|
||||
* Client must call uenum_close() to dispose the returned value.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI UEnumeration* U_EXPORT2
|
||||
ulocale_getUnicodeKeywords(const ULocale* locale, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the value for a keyword.
|
||||
*
|
||||
* This uses legacy keyword=value pairs, like "collation=phonebook".
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param keyword the keyword, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keywordLength the length of the keyword; if negative, then the
|
||||
* keyword need to be null terminated.
|
||||
* @param valueBuffer The buffer to receive the value.
|
||||
* @param valueBufferCapacity The capacity of receiving valueBuffer.
|
||||
* @param err the error code
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocale_getKeywordValue(
|
||||
const ULocale* locale, const char* keyword, int32_t keywordLength,
|
||||
char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err);
|
||||
|
||||
/**
|
||||
* Gets the Unicode value for a Unicode keyword.
|
||||
*
|
||||
* This uses Unicode key-value pairs, like "co-phonebk".
|
||||
*
|
||||
* @param locale the locale
|
||||
* @param keyword the Unicode keyword, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keywordLength the length of the Unicode keyword; if negative,
|
||||
* then the keyword need to be null terminated.
|
||||
* @param valueBuffer The buffer to receive the Unicode value.
|
||||
* @param valueBufferCapacity The capacity of receiving valueBuffer.
|
||||
* @param err the error code
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocale_getUnicodeKeywordValue(
|
||||
const ULocale* locale, const char* keyword, int32_t keywordLength,
|
||||
char* valueBuffer, int32_t valueBufferCapacity, UErrorCode *err);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalULocalePointer
|
||||
* "Smart pointer" class, closes a ULocale via ulocale_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocalePointer, ULocale, ulocale_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
#endif /*_ULOCALE */
|
441
thirdparty/icu4c/common/unicode/ulocbuilder.h
vendored
Normal file
441
thirdparty/icu4c/common/unicode/ulocbuilder.h
vendored
Normal file
|
@ -0,0 +1,441 @@
|
|||
// © 2023 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
#ifndef __ULOCBUILDER_H__
|
||||
#define __ULOCBUILDER_H__
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "unicode/ulocale.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: Builder API for Locale
|
||||
*/
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Opaque C service object type for the locale builder API
|
||||
* @draft ICU 74
|
||||
*/
|
||||
struct ULocaleBuilder;
|
||||
|
||||
/**
|
||||
* C typedef for struct ULocaleBuilder.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
typedef struct ULocaleBuilder ULocaleBuilder;
|
||||
|
||||
/**
|
||||
* <code>ULocaleBuilder</code> is used to build valid <code>locale</code> id
|
||||
* string or IETF BCP 47 language tag from values configured by the setters.
|
||||
* The <code>ULocaleBuilder</code> checks if a value configured by a
|
||||
* setter satisfies the syntax requirements defined by the <code>Locale</code>
|
||||
* class. A string of Locale created by a <code>ULocaleBuilder</code> is
|
||||
* well-formed and can be transformed to a well-formed IETF BCP 47 language tag
|
||||
* without losing information.
|
||||
*
|
||||
* <p>The following example shows how to create a <code>locale</code> string
|
||||
* with the <code>ULocaleBuilder</code>.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* char buffer[ULOC_FULLNAME_CAPACITY];
|
||||
* ULocaleBuilder* builder = ulocbld_open();
|
||||
* ulocbld_setLanguage(builder, "sr", -1);
|
||||
* ulocbld_setScript(builder, "Latn", -1);
|
||||
* ulocbld_setRegion(builder, "RS", -1);
|
||||
* int32_t length = ulocbld_buildLocaleID(
|
||||
* builder, buffer, ULOC_FULLNAME_CAPACITY, &error);
|
||||
* ulocbld_close(builder);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <p>ULocaleBuilders can be reused; <code>ulocbld_clear()</code> resets all
|
||||
* fields to their default values.
|
||||
*
|
||||
* <p>ULocaleBuilder tracks errors in an internal UErrorCode. For all setters,
|
||||
* except ulocbld_setLanguageTag and ulocbld_setLocale, ULocaleBuilder will return immediately
|
||||
* if the internal UErrorCode is in error state.
|
||||
* To reset internal state and error code, call clear method.
|
||||
* The ulocbld_setLanguageTag and setLocale method will first clear the internal
|
||||
* UErrorCode, then track the error of the validation of the input parameter
|
||||
* into the internal UErrorCode.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constructs an empty ULocaleBuilder. The default value of all
|
||||
* fields, extensions, and private use information is the
|
||||
* empty string. The created builder should be destroyed by calling
|
||||
* ulocbld_close();
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI ULocaleBuilder* U_EXPORT2
|
||||
ulocbld_open();
|
||||
|
||||
/**
|
||||
* Close the builder and destroy it's internal states.
|
||||
* @param builder the builder
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_close(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Resets the <code>ULocaleBuilder</code> to match the provided
|
||||
* <code>locale</code>. Existing state is discarded.
|
||||
*
|
||||
* <p>All fields of the locale must be well-formed.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param locale the locale, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the locale; if negative, then the locale need to be
|
||||
* null terminated,
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLocale(ULocaleBuilder* builder, const char* locale, int32_t length);
|
||||
|
||||
/**
|
||||
* Resets the <code>ULocaleBuilder</code> to match the provided
|
||||
* <code>ULocale</code>. Existing state is discarded.
|
||||
*
|
||||
* <p>The locale must be not bogus.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder.
|
||||
* @param locale the locale, a ULocale* pointer. The builder adopts the locale
|
||||
* after the call and the client must not delete it.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_adoptULocale(ULocaleBuilder* builder, ULocale* locale);
|
||||
|
||||
/**
|
||||
* Resets the ULocaleBuilder to match the provided IETF BCP 47 language tag.
|
||||
* Discards the existing state.
|
||||
* The empty string causes the builder to be reset, like {@link #ulocbld_clear}.
|
||||
* Legacy language tags (marked as “Type: grandfathered” in BCP 47)
|
||||
* are converted to their canonical form before being processed.
|
||||
* Otherwise, the <code>language tag</code> must be well-formed,
|
||||
* or else the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods
|
||||
* will later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param tag the language tag, defined as IETF BCP 47 language tag, a
|
||||
* const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the tag; if negative, then the tag need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLanguageTag(ULocaleBuilder* builder, const char* tag, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the language. If <code>language</code> is the empty string, the
|
||||
* language in this <code>ULocaleBuilder</code> is removed. Otherwise, the
|
||||
* <code>language</code> must be well-formed, or else the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() methods will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The syntax of language value is defined as
|
||||
* [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param language the language, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the language; if negative, then the language need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setLanguage(ULocaleBuilder* builder, const char* language, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the script. If <code>script</code> is the empty string, the script in
|
||||
* this <code>ULocaleBuilder</code> is removed.
|
||||
* Otherwise, the <code>script</code> must be well-formed, or else the
|
||||
* ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later
|
||||
* report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The script value is a four-letter script code as
|
||||
* [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
|
||||
* defined by ISO 15924
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param script the script, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the script; if negative, then the script need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setScript(ULocaleBuilder* builder, const char* script, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the region. If region is the empty string, the region in this
|
||||
* <code>ULocaleBuilder</code> is removed. Otherwise, the <code>region</code>
|
||||
* must be well-formed, or else the ulocbld_buildLocaleID() and
|
||||
* ulocbld_buildLanguageTag() methods will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>The region value is defined by
|
||||
* [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
|
||||
* as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
|
||||
*
|
||||
* <p>The region value in the <code>Locale</code> created by the
|
||||
* <code>ULocaleBuilder</code> is always normalized to upper case.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param region the region, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the region; if negative, then the region need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setRegion(ULocaleBuilder* builder, const char* region, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the variant. If variant is the empty string, the variant in this
|
||||
* <code>ULocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
|
||||
* must be well-formed, or else the ulocbld_buildLocaleID() and
|
||||
* ulocbld_buildLanguageTag() methods will later report an
|
||||
* U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> This method checks if <code>variant</code>
|
||||
* satisfies the
|
||||
* [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
|
||||
* syntax requirements, and normalizes the value to lowercase letters. However,
|
||||
* the <code>Locale</code> class does not impose any syntactic
|
||||
* restriction on variant. To set an ill-formed variant, use a Locale constructor.
|
||||
* If there are multiple unicode_variant_subtag, the caller must concatenate
|
||||
* them with '-' as separator (ex: "foobar-fibar").
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param variant the variant, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the variant; if negative, then the variant need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setVariant(ULocaleBuilder* builder, const char* variant, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the extension for the given key. If the value is the empty string,
|
||||
* the extension is removed. Otherwise, the <code>key</code> and
|
||||
* <code>value</code> must be well-formed, or else the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() methods will
|
||||
* later report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
|
||||
* Setting a value for this key replaces any existing Unicode locale key/type
|
||||
* pairs with those defined in the extension.
|
||||
*
|
||||
* <p><b>Note:</b> The key ('x') is used for the private use code. To be
|
||||
* well-formed, the value for this key needs only to have subtags of one to
|
||||
* eight alphanumeric characters, not two to eight as in the general case.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param key the extension key
|
||||
* @param value the value, a const char * pointer (need not be terminated when
|
||||
* the length is non-negative)
|
||||
* @param length the length of the value; if negative, then the value need to be
|
||||
* null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setExtension(ULocaleBuilder* builder, char key, const char* value, int32_t length);
|
||||
|
||||
/**
|
||||
* Sets the Unicode locale keyword type for the given key. If the type
|
||||
* StringPiece is constructed with a nullptr, the keyword is removed.
|
||||
* If the type is the empty string, the keyword is set without type subtags.
|
||||
* Otherwise, the key and type must be well-formed, or else the
|
||||
* ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() methods will later
|
||||
* report an U_ILLEGAL_ARGUMENT_ERROR.
|
||||
*
|
||||
* <p>Keys and types are converted to lower case.
|
||||
*
|
||||
* <p><b>Note</b>:Setting the 'u' extension via {@link #ulocbld_setExtension}
|
||||
* replaces all Unicode locale keywords with those defined in the
|
||||
* extension.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param key the Unicode locale key, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param keyLength the length of the key; if negative, then the key need to be
|
||||
* null terminated,
|
||||
* @param type the Unicode locale type, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param typeLength the length of the type; if negative, then the type need to
|
||||
* be null terminated,
|
||||
* @return This builder.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_setUnicodeLocaleKeyword(ULocaleBuilder* builder,
|
||||
const char* key, int32_t keyLength, const char* type, int32_t typeLength);
|
||||
|
||||
/**
|
||||
* Adds a unicode locale attribute, if not already present, otherwise
|
||||
* has no effect. The attribute must not be empty string and must be
|
||||
* well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
|
||||
* during the ulocbld_buildLocaleID() and ulocbld_buildLanguageTag() calls.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param attribute the attribute, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param length the length of the attribute; if negative, then the attribute
|
||||
* need to be null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_addUnicodeLocaleAttribute(
|
||||
ULocaleBuilder* builder, const char* attribute, int32_t length);
|
||||
|
||||
/**
|
||||
* Removes a unicode locale attribute, if present, otherwise has no
|
||||
* effect. The attribute must not be empty string and must be well-formed
|
||||
* or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the ulocbld_buildLocaleID()
|
||||
* and ulocbld_buildLanguageTag() calls.
|
||||
*
|
||||
* <p>Attribute comparison for removal is case-insensitive.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param attribute the attribute, a const char * pointer (need not be
|
||||
* terminated when the length is non-negative)
|
||||
* @param length the length of the attribute; if negative, then the attribute
|
||||
* need to be null terminated,
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_removeUnicodeLocaleAttribute(
|
||||
ULocaleBuilder* builder, const char* attribute, int32_t length);
|
||||
|
||||
/**
|
||||
* Resets the builder to its initial, empty state.
|
||||
* <p>This method clears the internal UErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_clear(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Resets the extensions to their initial, empty state.
|
||||
* Language, script, region and variant are unchanged.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ulocbld_clearExtensions(ULocaleBuilder* builder);
|
||||
|
||||
/**
|
||||
* Build the LocaleID string from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildLocaleID() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildLocaleID() call and the caller is
|
||||
* free to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param locale the locale id
|
||||
* @param localeCapacity the size of the locale buffer to store the locale id
|
||||
* @param err the error code
|
||||
* @return the length of the locale id in buffer
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocbld_buildLocaleID(ULocaleBuilder* builder, char* locale,
|
||||
int32_t localeCapacity, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Build the ULocale object from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildULocale() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildULocale() call and the caller is
|
||||
* free to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder.
|
||||
* @param err the error code.
|
||||
* @return the locale, a ULocale* pointer. The created ULocale must be
|
||||
* destroyed by calling {@link ulocale_close}.
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI ULocale* U_EXPORT2
|
||||
ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Build the IETF BCP 47 language tag string from the fields set on this builder.
|
||||
* If any set methods or during the ulocbld_buildLanguageTag() call require memory
|
||||
* allocation but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
|
||||
* If any of the fields set by the setters are not well-formed, the status
|
||||
* will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
|
||||
* not change after the ulocbld_buildLanguageTag() call and the caller is free
|
||||
* to keep using the same builder to build more locales.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param language the language tag
|
||||
* @param languageCapacity the size of the language buffer to store the language
|
||||
* tag
|
||||
* @param err the error code
|
||||
* @return the length of the language tag in buffer
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ulocbld_buildLanguageTag(ULocaleBuilder* builder, char* language,
|
||||
int32_t languageCapacity, UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while recording sets.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
*
|
||||
* @param builder the builder
|
||||
* @param outErrorCode Set to an error code that occurred while setting subtags.
|
||||
* Unchanged if there is no such error or if outErrorCode
|
||||
* already contained an error.
|
||||
* @return true if U_FAILURE(*outErrorCode)
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode);
|
||||
|
||||
#if U_SHOW_CPLUSPLUS_API
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* \class LocalULocaleBuilderPointer
|
||||
* "Smart pointer" class, closes a ULocaleBuilder via ulocbld_close().
|
||||
* For most methods see the LocalPointerBase base class.
|
||||
*
|
||||
* @see LocalPointerBase
|
||||
* @see LocalPointer
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleBuilderPointer, ULocaleBuilder, ulocbld_close);
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* U_SHOW_CPLUSPLUS_API */
|
||||
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
#endif // __ULOCBUILDER_H__
|
26
thirdparty/icu4c/common/unicode/unorm2.h
vendored
26
thirdparty/icu4c/common/unicode/unorm2.h
vendored
|
@ -181,7 +181,10 @@ U_CAPI const UNormalizer2 * U_EXPORT2
|
|||
unorm2_getNFKDInstance(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
|
||||
* Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Casefold
|
||||
*
|
||||
* Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
|
@ -194,6 +197,25 @@ unorm2_getNFKDInstance(UErrorCode *pErrorCode);
|
|||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
|
||||
* which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
|
||||
* See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
|
||||
*
|
||||
* Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
|
||||
* Returns an unmodifiable singleton instance. Do not delete it.
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return the requested Normalizer2, if successful
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI const UNormalizer2 * U_EXPORT2
|
||||
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns a UNormalizer2 instance which uses the specified data file
|
||||
* (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
|
||||
|
@ -206,7 +228,7 @@ unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
|
|||
* Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
|
||||
*
|
||||
* @param packageName NULL for ICU built-in data, otherwise application data package name
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
|
||||
* @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
|
||||
* @param mode normalization mode (compose or decompose etc.)
|
||||
* @param pErrorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
|
|
61
thirdparty/icu4c/common/unicode/urename.h
vendored
61
thirdparty/icu4c/common/unicode/urename.h
vendored
|
@ -138,8 +138,8 @@
|
|||
#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
|
||||
#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
|
||||
#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
|
||||
#define mixedMeasuresToMicros U_ICU_ENTRY_POINT_RENAME(mixedMeasuresToMicros)
|
||||
#define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup)
|
||||
#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
|
||||
#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
|
||||
#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
|
||||
#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
|
||||
|
@ -193,6 +193,7 @@
|
|||
#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
|
||||
#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
|
||||
#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
|
||||
#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
|
||||
#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
|
||||
#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
|
||||
#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
|
||||
|
@ -512,9 +513,6 @@
|
|||
#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
|
||||
#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
|
||||
#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
|
||||
#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
|
||||
#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
|
||||
#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
|
||||
#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
|
||||
#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
|
||||
#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
|
||||
|
@ -532,6 +530,7 @@
|
|||
#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
|
||||
#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
|
||||
#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone)
|
||||
#define ucal_getIanaTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getIanaTimeZoneID)
|
||||
#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
|
||||
#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
|
||||
#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
|
||||
|
@ -587,6 +586,7 @@
|
|||
#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
|
||||
#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
|
||||
#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
|
||||
#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8)
|
||||
#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
|
||||
#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
|
||||
#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
|
||||
|
@ -955,9 +955,16 @@
|
|||
#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close)
|
||||
#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next)
|
||||
#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open)
|
||||
#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit)
|
||||
#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer)
|
||||
#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io)
|
||||
#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit)
|
||||
#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
|
||||
#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
|
||||
#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou)
|
||||
#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
|
||||
#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode)
|
||||
#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue)
|
||||
#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
|
||||
#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
|
||||
#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
|
||||
|
@ -969,7 +976,11 @@
|
|||
#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
|
||||
#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
|
||||
#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
|
||||
#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit)
|
||||
#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
|
||||
#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou)
|
||||
#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64)
|
||||
#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop)
|
||||
#define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString)
|
||||
#define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition)
|
||||
#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
|
||||
|
@ -1133,6 +1144,39 @@
|
|||
#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
|
||||
#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
|
||||
#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
|
||||
#define ulocale_close U_ICU_ENTRY_POINT_RENAME(ulocale_close)
|
||||
#define ulocale_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocale_getBaseName)
|
||||
#define ulocale_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywordValue)
|
||||
#define ulocale_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getKeywords)
|
||||
#define ulocale_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocale_getLanguage)
|
||||
#define ulocale_getLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_getLocaleID)
|
||||
#define ulocale_getRegion U_ICU_ENTRY_POINT_RENAME(ulocale_getRegion)
|
||||
#define ulocale_getScript U_ICU_ENTRY_POINT_RENAME(ulocale_getScript)
|
||||
#define ulocale_getUnicodeKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywordValue)
|
||||
#define ulocale_getUnicodeKeywords U_ICU_ENTRY_POINT_RENAME(ulocale_getUnicodeKeywords)
|
||||
#define ulocale_getVariant U_ICU_ENTRY_POINT_RENAME(ulocale_getVariant)
|
||||
#define ulocale_isBogus U_ICU_ENTRY_POINT_RENAME(ulocale_isBogus)
|
||||
#define ulocale_openForLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocale_openForLanguageTag)
|
||||
#define ulocale_openForLocaleID U_ICU_ENTRY_POINT_RENAME(ulocale_openForLocaleID)
|
||||
#define ulocbld_addUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_addUnicodeLocaleAttribute)
|
||||
#define ulocbld_adoptULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_adoptULocale)
|
||||
#define ulocbld_buildLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLanguageTag)
|
||||
#define ulocbld_buildLocaleID U_ICU_ENTRY_POINT_RENAME(ulocbld_buildLocaleID)
|
||||
#define ulocbld_buildULocale U_ICU_ENTRY_POINT_RENAME(ulocbld_buildULocale)
|
||||
#define ulocbld_clear U_ICU_ENTRY_POINT_RENAME(ulocbld_clear)
|
||||
#define ulocbld_clearExtensions U_ICU_ENTRY_POINT_RENAME(ulocbld_clearExtensions)
|
||||
#define ulocbld_close U_ICU_ENTRY_POINT_RENAME(ulocbld_close)
|
||||
#define ulocbld_copyErrorTo U_ICU_ENTRY_POINT_RENAME(ulocbld_copyErrorTo)
|
||||
#define ulocbld_open U_ICU_ENTRY_POINT_RENAME(ulocbld_open)
|
||||
#define ulocbld_removeUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ulocbld_removeUnicodeLocaleAttribute)
|
||||
#define ulocbld_setExtension U_ICU_ENTRY_POINT_RENAME(ulocbld_setExtension)
|
||||
#define ulocbld_setLanguage U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguage)
|
||||
#define ulocbld_setLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocbld_setLanguageTag)
|
||||
#define ulocbld_setLocale U_ICU_ENTRY_POINT_RENAME(ulocbld_setLocale)
|
||||
#define ulocbld_setRegion U_ICU_ENTRY_POINT_RENAME(ulocbld_setRegion)
|
||||
#define ulocbld_setScript U_ICU_ENTRY_POINT_RENAME(ulocbld_setScript)
|
||||
#define ulocbld_setUnicodeLocaleKeyword U_ICU_ENTRY_POINT_RENAME(ulocbld_setUnicodeLocaleKeyword)
|
||||
#define ulocbld_setVariant U_ICU_ENTRY_POINT_RENAME(ulocbld_setVariant)
|
||||
#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
|
||||
#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
|
||||
#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
|
||||
|
@ -1213,6 +1257,7 @@
|
|||
#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
|
||||
#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
|
||||
#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
|
||||
#define unorm2_getNFKCSimpleCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCSimpleCasefoldInstance)
|
||||
#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
|
||||
#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
|
||||
#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
|
||||
|
@ -1349,6 +1394,7 @@
|
|||
#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
|
||||
#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
|
||||
#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
|
||||
#define uprv_currencyLeads U_ICU_ENTRY_POINT_RENAME(uprv_currencyLeads)
|
||||
#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
|
||||
#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
|
||||
#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
|
||||
|
@ -1367,6 +1413,7 @@
|
|||
#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
|
||||
#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
|
||||
#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
|
||||
#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass)
|
||||
#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
|
||||
#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
|
||||
#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
|
||||
|
@ -1763,6 +1810,9 @@
|
|||
#define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64)
|
||||
#define usnumf_openForLocale U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocale)
|
||||
#define usnumf_openForLocaleAndGroupingStrategy U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocaleAndGroupingStrategy)
|
||||
#define uspoof_areBidiConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusable)
|
||||
#define uspoof_areBidiConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUTF8)
|
||||
#define uspoof_areBidiConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areBidiConfusableUnicodeString)
|
||||
#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
|
||||
#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
|
||||
#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
|
||||
|
@ -1778,6 +1828,9 @@
|
|||
#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
|
||||
#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
|
||||
#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
|
||||
#define uspoof_getBidiSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeleton)
|
||||
#define uspoof_getBidiSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUTF8)
|
||||
#define uspoof_getBidiSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getBidiSkeletonUnicodeString)
|
||||
#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
|
||||
#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
|
||||
#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
|
||||
|
|
12
thirdparty/icu4c/common/unicode/uvernum.h
vendored
12
thirdparty/icu4c/common/unicode/uvernum.h
vendored
|
@ -53,13 +53,13 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 73
|
||||
#define U_ICU_VERSION_MAJOR_NUM 74
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 2
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
|
@ -79,7 +79,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _73
|
||||
#define U_ICU_VERSION_SUFFIX _74
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
|
@ -132,7 +132,7 @@
|
|||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "73.2"
|
||||
#define U_ICU_VERSION "74.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
|
@ -145,13 +145,13 @@
|
|||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "73"
|
||||
#define U_ICU_VERSION_SHORT "74"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "73.2"
|
||||
#define U_ICU_DATA_VERSION "74.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
|
42
thirdparty/icu4c/common/uniquecharstr.h
vendored
42
thirdparty/icu4c/common/uniquecharstr.h
vendored
|
@ -10,6 +10,7 @@
|
|||
#include "charstr.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -47,22 +48,20 @@ public:
|
|||
}
|
||||
|
||||
/**
|
||||
* Adds a string and returns a unique number for it.
|
||||
* The string's buffer contents must not change, nor move around in memory,
|
||||
* Adds a NUL-terminated string and returns a unique number for it.
|
||||
* The string must not change, nor move around in memory,
|
||||
* while this UniqueCharStrings is in use.
|
||||
* The string contents must be NUL-terminated exactly at s.length().
|
||||
*
|
||||
* Best used with read-only-alias UnicodeString objects that point to
|
||||
* stable storage, such as strings returned by resource bundle functions.
|
||||
* Best used with string data in a stable storage, such as strings returned
|
||||
* by resource bundle functions.
|
||||
*/
|
||||
int32_t add(const UnicodeString &s, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
int32_t add(const char16_t*p, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
if (isFrozen) {
|
||||
errorCode = U_NO_WRITE_PERMISSION;
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
// The string points into the resource bundle.
|
||||
const char16_t *p = s.getBuffer();
|
||||
int32_t oldIndex = uhash_geti(&map, p);
|
||||
if (oldIndex != 0) { // found duplicate
|
||||
return oldIndex;
|
||||
|
@ -71,11 +70,33 @@ public:
|
|||
// The strings object is also terminated with one implicit NUL.
|
||||
strings->append(0, errorCode);
|
||||
int32_t newIndex = strings->length();
|
||||
strings->appendInvariantChars(s, errorCode);
|
||||
strings->appendInvariantChars(p, u_strlen(p), errorCode);
|
||||
uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
|
||||
return newIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a unicode string by value and returns a unique number for it.
|
||||
*/
|
||||
int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
if (isFrozen) {
|
||||
errorCode = U_NO_WRITE_PERMISSION;
|
||||
return -1;
|
||||
}
|
||||
int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
|
||||
if (oldIndex != 0) { // found duplicate
|
||||
return oldIndex;
|
||||
}
|
||||
// We need to store the string content of the UnicodeString.
|
||||
UnicodeString *key = keyStore.create(s);
|
||||
if (key == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return -1;
|
||||
}
|
||||
return add(key->getTerminatedBuffer(), errorCode);
|
||||
}
|
||||
|
||||
void freeze() { isFrozen = true; }
|
||||
|
||||
/**
|
||||
|
@ -90,6 +111,7 @@ public:
|
|||
private:
|
||||
UHashtable map;
|
||||
CharString *strings;
|
||||
MemoryPool<UnicodeString> keyStore;
|
||||
bool isFrozen = false;
|
||||
};
|
||||
|
||||
|
|
63
thirdparty/icu4c/common/uprops.cpp
vendored
63
thirdparty/icu4c/common/uprops.cpp
vendored
|
@ -328,6 +328,53 @@ static UBool hasEmojiProperty(const BinaryProperty &/*prop*/, UChar32 c, UProper
|
|||
return EmojiProps::hasBinaryProperty(c, which);
|
||||
}
|
||||
|
||||
static UBool isIDSUnaryOperator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
// New in Unicode 15.1 for just two characters.
|
||||
return 0x2FFE<=c && c<=0x2FFF;
|
||||
}
|
||||
|
||||
/** Ranges (start/limit pairs) of ID_Compat_Math_Continue (only), from UCD PropList.txt. */
|
||||
static constexpr UChar32 ID_COMPAT_MATH_CONTINUE[] = {
|
||||
0x00B2, 0x00B3 + 1,
|
||||
0x00B9, 0x00B9 + 1,
|
||||
0x2070, 0x2070 + 1,
|
||||
0x2074, 0x207E + 1,
|
||||
0x2080, 0x208E + 1
|
||||
};
|
||||
|
||||
/** ID_Compat_Math_Start characters, from UCD PropList.txt. */
|
||||
static constexpr UChar32 ID_COMPAT_MATH_START[] = {
|
||||
0x2202,
|
||||
0x2207,
|
||||
0x221E,
|
||||
0x1D6C1,
|
||||
0x1D6DB,
|
||||
0x1D6FB,
|
||||
0x1D715,
|
||||
0x1D735,
|
||||
0x1D74F,
|
||||
0x1D76F,
|
||||
0x1D789,
|
||||
0x1D7A9,
|
||||
0x1D7C3
|
||||
};
|
||||
|
||||
static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
if (c < ID_COMPAT_MATH_START[0]) { return false; } // fastpath for common scripts
|
||||
for (UChar32 startChar : ID_COMPAT_MATH_START) {
|
||||
if (c == startChar) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
|
||||
for (int32_t i = 0; i < UPRV_LENGTHOF(ID_COMPAT_MATH_CONTINUE); i += 2) {
|
||||
if (c < ID_COMPAT_MATH_CONTINUE[i]) { return false; } // below range start
|
||||
if (c < ID_COMPAT_MATH_CONTINUE[i + 1]) { return true; } // below range limit
|
||||
}
|
||||
return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START);
|
||||
}
|
||||
|
||||
static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
|
||||
/*
|
||||
* column and mask values for binary properties from u_getUnicodeProperties().
|
||||
|
@ -409,6 +456,9 @@ static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
|
|||
{ UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_TAG_SEQUENCE
|
||||
{ UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
|
||||
{ UPROPS_SRC_EMOJI, 0, hasEmojiProperty }, // UCHAR_RGI_EMOJI
|
||||
{ UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR
|
||||
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START
|
||||
{ UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE
|
||||
};
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -759,6 +809,19 @@ uprops_getSource(UProperty which) {
|
|||
|
||||
U_CFUNC void U_EXPORT2
|
||||
uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
if (U_FAILURE(*pErrorCode)) { return; }
|
||||
if (src == UPROPS_SRC_ID_COMPAT_MATH) {
|
||||
// range limits
|
||||
for (UChar32 c : ID_COMPAT_MATH_CONTINUE) {
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
// single characters
|
||||
for (UChar32 c : ID_COMPAT_MATH_START) {
|
||||
sa->add(sa->set, c);
|
||||
sa->add(sa->set, c + 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!ulayout_ensureData(*pErrorCode)) { return; }
|
||||
const UCPTrie *trie;
|
||||
switch (src) {
|
||||
|
|
2
thirdparty/icu4c/common/uprops.h
vendored
2
thirdparty/icu4c/common/uprops.h
vendored
|
@ -379,6 +379,8 @@ enum UPropertySource {
|
|||
UPROPS_SRC_INSC,
|
||||
UPROPS_SRC_VO,
|
||||
UPROPS_SRC_EMOJI,
|
||||
UPROPS_SRC_IDSU,
|
||||
UPROPS_SRC_ID_COMPAT_MATH,
|
||||
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
|
||||
UPROPS_SRC_COUNT
|
||||
};
|
||||
|
|
233
thirdparty/icu4c/common/uresbund.cpp
vendored
233
thirdparty/icu4c/common/uresbund.cpp
vendored
|
@ -24,6 +24,7 @@
|
|||
#include "unicode/ures.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "bytesinkutil.h"
|
||||
#include "charstr.h"
|
||||
#include "uresimp.h"
|
||||
#include "ustr_imp.h"
|
||||
|
@ -2351,7 +2352,66 @@ struct GetAllChildrenSink : public ResourceSink {
|
|||
aliasedValue.setData(aliasRB->getResData());
|
||||
aliasedValue.setValidLocaleDataEntry(aliasRB->fValidLocaleDataEntry);
|
||||
aliasedValue.setResource(aliasRB->fRes, ResourceTracer(aliasRB));
|
||||
dest.put(key, aliasedValue, isRoot, errorCode);
|
||||
|
||||
if (aliasedValue.getType() != URES_TABLE) {
|
||||
dest.put(key, aliasedValue, isRoot, errorCode);
|
||||
} else {
|
||||
// if the resource we're aliasing over to is a table, the sink might iterate over its contents.
|
||||
// If it does, it'll get only the things defined in the actual alias target, not the things
|
||||
// the target inherits from its parent resources. So we walk the parent chain for the *alias target*,
|
||||
// calling dest.put() for each of the parent tables we could be inheriting from. This means
|
||||
// that dest.put() has to iterate over the children of multiple tables to get all of the inherited
|
||||
// resource values, but it already has to do that to handle normal vertical inheritance.
|
||||
UResType aliasedValueType = URES_TABLE;
|
||||
CharString tablePath;
|
||||
tablePath.append(aliasRB->fResPath, errorCode);
|
||||
const char* parentKey = key; // dest.put() changes the key
|
||||
dest.put(parentKey, aliasedValue, isRoot, errorCode);
|
||||
UResourceDataEntry* entry = aliasRB->fData;
|
||||
Resource res = aliasRB->fRes;
|
||||
while (aliasedValueType == URES_TABLE && entry->fParent != nullptr) {
|
||||
CharString localPath;
|
||||
localPath.copyFrom(tablePath, errorCode);
|
||||
char* localPathAsCharPtr = localPath.data();
|
||||
const char* childKey;
|
||||
entry = entry->fParent;
|
||||
res = entry->fData.rootRes;
|
||||
Resource newRes = res_findResource(&entry->fData, res, &localPathAsCharPtr, &childKey);
|
||||
if (newRes != RES_BOGUS) {
|
||||
aliasedValue.setData(entry->fData);
|
||||
// TODO: do I also need to call aliasedValue.setValueLocaleDataEntry() ?
|
||||
aliasedValue.setResource(newRes, ResourceTracer(aliasRB)); // probably wrong to use aliasRB here
|
||||
aliasedValueType = aliasedValue.getType();
|
||||
if (aliasedValueType == URES_ALIAS) {
|
||||
// in a few rare cases, when we get to the root resource bundle, the resource in question
|
||||
// won't be an actual table, but will instead be an alias to a table. That is, we have
|
||||
// two aliases in the inheritance path. (For some locales, such as Zulu, we see this with
|
||||
// children of the "fields" resource: "day-narrow" aliases to "day-short", which aliases
|
||||
// to "day".) When this happens, we need to make sure we follow all the aliases.
|
||||
ResourceDataValue& rdv2 = static_cast<ResourceDataValue&>(aliasedValue);
|
||||
aliasRB = getAliasTargetAsResourceBundle(rdv2.getData(), rdv2.getResource(), nullptr, -1,
|
||||
rdv2.getValidLocaleDataEntry(), nullptr, 0,
|
||||
stackTempBundle.getAlias(), &errorCode);
|
||||
tablePath.clear();
|
||||
tablePath.append(aliasRB->fResPath, errorCode);
|
||||
entry = aliasRB->fData;
|
||||
res = aliasRB->fRes;
|
||||
aliasedValue.setData(entry->fData);
|
||||
// TODO: do I also need to call aliasedValue.setValueLocaleDataEntry() ?
|
||||
aliasedValue.setResource(res, ResourceTracer(aliasRB)); // probably wrong to use aliasRB here
|
||||
aliasedValueType = aliasedValue.getType();
|
||||
}
|
||||
if (aliasedValueType == URES_TABLE) {
|
||||
dest.put(parentKey, aliasedValue, isRoot, errorCode);
|
||||
} else {
|
||||
// once we've followed the alias, the resource we're looking at really should
|
||||
// be a table
|
||||
errorCode = U_INTERNAL_PROGRAM_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dest.put(key, value, isRoot, errorCode);
|
||||
|
@ -2657,13 +2717,16 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID,
|
|||
UResourceDataEntry *entry;
|
||||
if(openType != URES_OPEN_DIRECT) {
|
||||
/* first "canonicalize" the locale ID */
|
||||
char canonLocaleID[ULOC_FULLNAME_CAPACITY];
|
||||
uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status);
|
||||
if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) {
|
||||
CharString canonLocaleID;
|
||||
{
|
||||
CharStringByteSink sink(&canonLocaleID);
|
||||
ulocimp_getBaseName(localeID, sink, status);
|
||||
}
|
||||
if(U_FAILURE(*status)) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
entry = entryOpen(path, canonLocaleID, openType, status);
|
||||
entry = entryOpen(path, canonLocaleID.data(), openType, status);
|
||||
} else {
|
||||
entry = entryOpenDirect(path, localeID, status);
|
||||
}
|
||||
|
@ -2974,15 +3037,39 @@ static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErr
|
|||
return false;
|
||||
}
|
||||
|
||||
static void getParentForFunctionalEquivalent(const char* localeID,
|
||||
UResourceBundle* res,
|
||||
UResourceBundle* bund1,
|
||||
char* parent,
|
||||
int32_t parentCapacity) {
|
||||
// Get parent.
|
||||
// First check for a parent from %%Parent resource (Note that in resource trees
|
||||
// such as collation, data may have different parents than in parentLocales).
|
||||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
parent[0] = '\0';
|
||||
if (res != NULL) {
|
||||
ures_getByKey(res, "%%Parent", bund1, &subStatus);
|
||||
if (U_SUCCESS(subStatus)) {
|
||||
int32_t parentLen = parentCapacity;
|
||||
ures_getUTF8String(bund1, parent, &parentLen, true, &subStatus);
|
||||
}
|
||||
}
|
||||
|
||||
// If none there, use normal truncation parent
|
||||
if (U_FAILURE(subStatus) || parent[0] == 0) {
|
||||
subStatus = U_ZERO_ERROR;
|
||||
uloc_getParent(localeID, parent, parentCapacity, &subStatus);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
||||
const char *path, const char *resName, const char *keyword, const char *locid,
|
||||
UBool *isAvailable, UBool omitDefault, UErrorCode *status)
|
||||
{
|
||||
char kwVal[1024] = ""; /* value of keyword 'keyword' */
|
||||
char defVal[1024] = ""; /* default value for given locale */
|
||||
char defLoc[1024] = ""; /* default value for given locale */
|
||||
char base[1024] = ""; /* base locale */
|
||||
CharString base; /* base locale */
|
||||
char found[1024] = "";
|
||||
char parent[1024] = "";
|
||||
char full[1024] = "";
|
||||
|
@ -2991,23 +3078,29 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
UErrorCode subStatus = U_ZERO_ERROR;
|
||||
int32_t length = 0;
|
||||
if(U_FAILURE(*status)) return 0;
|
||||
uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus);
|
||||
if(!uprv_strcmp(kwVal, DEFAULT_TAG)) {
|
||||
kwVal[0]=0;
|
||||
CharString kwVal;
|
||||
{
|
||||
CharStringByteSink sink(&kwVal);
|
||||
ulocimp_getKeywordValue(locid, keyword, sink, &subStatus);
|
||||
}
|
||||
if(kwVal == DEFAULT_TAG) {
|
||||
kwVal.clear();
|
||||
}
|
||||
{
|
||||
CharStringByteSink sink(&base);
|
||||
ulocimp_getBaseName(locid, sink, &subStatus);
|
||||
}
|
||||
uloc_getBaseName(locid, base, 1024-1,&subStatus);
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n",
|
||||
locid, keyword, kwVal, base, u_errorName(subStatus));
|
||||
locid, keyword, kwVal.data(), base.data(), u_errorName(subStatus));
|
||||
#endif
|
||||
ures_initStackObject(&bund1);
|
||||
ures_initStackObject(&bund2);
|
||||
|
||||
|
||||
uprv_strcpy(parent, base);
|
||||
uprv_strcpy(found, base);
|
||||
|
||||
if(isAvailable) {
|
||||
base.extract(parent, UPRV_LENGTHOF(parent), subStatus);
|
||||
base.extract(found, UPRV_LENGTHOF(found), subStatus);
|
||||
|
||||
if(isAvailable) {
|
||||
UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus);
|
||||
*isAvailable = true;
|
||||
if (U_SUCCESS(subStatus)) {
|
||||
|
@ -3054,11 +3147,11 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus));
|
||||
#endif
|
||||
uprv_strcpy(defLoc, parent);
|
||||
if(kwVal[0]==0) {
|
||||
uprv_strcpy(kwVal, defVal);
|
||||
if(kwVal.isEmpty()) {
|
||||
kwVal.append(defVal, defLen, subStatus);
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "%s;%s -> kwVal = %s\n",
|
||||
path?path:"ICUDATA", parent, keyword, kwVal);
|
||||
path?path:"ICUDATA", parent, keyword, kwVal.data());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -3071,16 +3164,19 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus));
|
||||
}
|
||||
|
||||
uloc_getParent(found,parent,sizeof(parent),&subStatus);
|
||||
if (uprv_strcmp(found, parent) != 0) {
|
||||
uprv_strcpy(parent, found);
|
||||
} else {
|
||||
getParentForFunctionalEquivalent(found,res,&bund1,parent,sizeof(parent));
|
||||
}
|
||||
ures_close(res);
|
||||
} while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status));
|
||||
|
||||
/* Now, see if we can find the kwVal collator.. start the search over.. */
|
||||
uprv_strcpy(parent, base);
|
||||
uprv_strcpy(found, base);
|
||||
|
||||
base.extract(parent, UPRV_LENGTHOF(parent), subStatus);
|
||||
base.extract(found, UPRV_LENGTHOF(found), subStatus);
|
||||
|
||||
do {
|
||||
subStatus = U_ZERO_ERROR;
|
||||
res = ures_open(path, parent, &subStatus);
|
||||
if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
|
||||
*isAvailable = false;
|
||||
|
@ -3089,7 +3185,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "%s;%s -> %s (looking for %s)\n",
|
||||
path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
|
||||
path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data());
|
||||
#endif
|
||||
if(U_FAILURE(subStatus)) {
|
||||
*status = subStatus;
|
||||
|
@ -3099,14 +3195,14 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus));
|
||||
#endif
|
||||
if(subStatus == U_ZERO_ERROR) {
|
||||
ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
|
||||
ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus);
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus));
|
||||
/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal.data(), u_errorName(subStatus));
|
||||
#endif
|
||||
if(subStatus == U_ZERO_ERROR) {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n",
|
||||
path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus));
|
||||
path?path:"ICUDATA", parent, keyword, kwVal.data(), u_errorName(subStatus));
|
||||
#endif
|
||||
uprv_strcpy(full, parent);
|
||||
if(*full == 0) {
|
||||
|
@ -3139,29 +3235,52 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
} else {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "err=%s in %s looking for %s\n",
|
||||
u_errorName(subStatus), parent, kwVal);
|
||||
u_errorName(subStatus), parent, kwVal.data());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subStatus = U_ZERO_ERROR;
|
||||
|
||||
uprv_strcpy(found, parent);
|
||||
uloc_getParent(found,parent,1023,&subStatus);
|
||||
ures_close(res);
|
||||
} while(!full[0] && *found && U_SUCCESS(*status));
|
||||
|
||||
if((full[0]==0) && uprv_strcmp(kwVal, defVal)) {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal);
|
||||
#endif
|
||||
uprv_strcpy(kwVal, defVal);
|
||||
uprv_strcpy(parent, base);
|
||||
uprv_strcpy(found, base);
|
||||
|
||||
do { /* search for 'default' named item */
|
||||
UBool haveFound = false;
|
||||
// At least for collations which may be aliased, we need to use the VALID locale
|
||||
// as the parent instead of just truncating, as long as the VALID locale is not
|
||||
// root and has a different language than the parent. Use of the VALID locale
|
||||
// here is similar to the procedure used at the end of the previous do-while loop
|
||||
// for all resource types.
|
||||
if (res != NULL && uprv_strcmp(resName, "collations") == 0) {
|
||||
subStatus = U_ZERO_ERROR;
|
||||
const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus);
|
||||
if (U_SUCCESS(subStatus) && validLoc != NULL && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) {
|
||||
char validLang[ULOC_LANG_CAPACITY];
|
||||
char parentLang[ULOC_LANG_CAPACITY];
|
||||
uloc_getLanguage(validLoc, validLang, ULOC_LANG_CAPACITY, &subStatus);
|
||||
uloc_getLanguage(parent, parentLang, ULOC_LANG_CAPACITY, &subStatus);
|
||||
if (U_SUCCESS(subStatus) && uprv_strcmp(validLang, parentLang) != 0) {
|
||||
// validLoc is not root and has a different language than parent, use it instead
|
||||
uprv_strcpy(found, validLoc);
|
||||
haveFound = true;
|
||||
}
|
||||
}
|
||||
subStatus = U_ZERO_ERROR;
|
||||
}
|
||||
if (!haveFound) {
|
||||
uprv_strcpy(found, parent);
|
||||
}
|
||||
|
||||
getParentForFunctionalEquivalent(found,res,&bund1,parent,1023);
|
||||
ures_close(res);
|
||||
subStatus = U_ZERO_ERROR;
|
||||
} while(!full[0] && *found && U_SUCCESS(*status));
|
||||
|
||||
if((full[0]==0) && kwVal != defVal) {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal.data(), defVal);
|
||||
#endif
|
||||
kwVal.clear().append(defVal, subStatus);
|
||||
base.extract(parent, UPRV_LENGTHOF(parent), subStatus);
|
||||
base.extract(found, UPRV_LENGTHOF(found), subStatus);
|
||||
|
||||
do { /* search for 'default' named item */
|
||||
res = ures_open(path, parent, &subStatus);
|
||||
if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) {
|
||||
*isAvailable = false;
|
||||
|
@ -3170,18 +3289,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "%s;%s -> %s (looking for default %s)\n",
|
||||
path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal);
|
||||
path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data());
|
||||
#endif
|
||||
if(U_FAILURE(subStatus)) {
|
||||
*status = subStatus;
|
||||
} else if(subStatus == U_ZERO_ERROR) {
|
||||
ures_getByKey(res,resName,&bund1, &subStatus);
|
||||
if(subStatus == U_ZERO_ERROR) {
|
||||
ures_getByKey(&bund1, kwVal, &bund2, &subStatus);
|
||||
ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus);
|
||||
if(subStatus == U_ZERO_ERROR) {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA",
|
||||
parent, keyword, kwVal, u_errorName(subStatus));
|
||||
parent, keyword, kwVal.data(), u_errorName(subStatus));
|
||||
#endif
|
||||
uprv_strcpy(full, parent);
|
||||
if(*full == 0) {
|
||||
|
@ -3215,18 +3334,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
}
|
||||
}
|
||||
}
|
||||
subStatus = U_ZERO_ERROR;
|
||||
|
||||
uprv_strcpy(found, parent);
|
||||
uloc_getParent(found,parent,1023,&subStatus);
|
||||
getParentForFunctionalEquivalent(found,res,&bund1,parent,1023);
|
||||
ures_close(res);
|
||||
subStatus = U_ZERO_ERROR;
|
||||
} while(!full[0] && *found && U_SUCCESS(*status));
|
||||
}
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
if(!full[0]) {
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal);
|
||||
fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal.data());
|
||||
#endif
|
||||
*status = U_MISSING_RESOURCE_ERROR;
|
||||
} else if(omitDefault) {
|
||||
|
@ -3235,21 +3354,21 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity,
|
|||
#endif
|
||||
if(uprv_strlen(defLoc) <= uprv_strlen(full)) {
|
||||
/* found the keyword in a *child* of where the default tag was present. */
|
||||
if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */
|
||||
if(kwVal == defVal) { /* if the requested kw is default, */
|
||||
/* and the default is in or in an ancestor of the current locale */
|
||||
#if defined(URES_TREE_DEBUG)
|
||||
fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal);
|
||||
fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal.data());
|
||||
#endif
|
||||
kwVal[0]=0;
|
||||
kwVal.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
uprv_strcpy(found, full);
|
||||
if(kwVal[0]) {
|
||||
if(!kwVal.isEmpty()) {
|
||||
uprv_strcat(found, "@");
|
||||
uprv_strcat(found, keyword);
|
||||
uprv_strcat(found, "=");
|
||||
uprv_strcat(found, kwVal);
|
||||
uprv_strcat(found, kwVal.data());
|
||||
} else if(!omitDefault) {
|
||||
uprv_strcat(found, "@");
|
||||
uprv_strcat(found, keyword);
|
||||
|
|
15
thirdparty/icu4c/common/ustrcase.cpp
vendored
15
thirdparty/icu4c/common/ustrcase.cpp
vendored
|
@ -1130,14 +1130,18 @@ int32_t toUpper(uint32_t options,
|
|||
// Adding one only to the final vowel in a longer sequence
|
||||
// (which does not occur in normal writing) would require lookahead.
|
||||
// Set the same flag as for preserving an existing dialytika.
|
||||
if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
|
||||
(upper == 0x399 || upper == 0x3A5)) {
|
||||
data |= HAS_DIALYTIKA;
|
||||
if ((data & HAS_VOWEL) != 0 &&
|
||||
(state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) !=
|
||||
0 &&
|
||||
(upper == 0x399 || upper == 0x3A5)) {
|
||||
data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) ? HAS_DIALYTIKA
|
||||
: HAS_COMBINING_DIALYTIKA;
|
||||
}
|
||||
int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota.
|
||||
if ((data & HAS_YPOGEGRAMMENI) != 0) {
|
||||
numYpogegrammeni = 1;
|
||||
}
|
||||
const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0;
|
||||
// Skip combining diacritics after this Greek letter.
|
||||
while (nextIndex < srcLength) {
|
||||
uint32_t diacriticData = getDiacriticData(src[nextIndex]);
|
||||
|
@ -1152,7 +1156,8 @@ int32_t toUpper(uint32_t options,
|
|||
}
|
||||
}
|
||||
if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
|
||||
nextState |= AFTER_VOWEL_WITH_ACCENT;
|
||||
nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT
|
||||
: AFTER_VOWEL_WITH_COMBINING_ACCENT;
|
||||
}
|
||||
// Map according to Greek rules.
|
||||
UBool addTonos = false;
|
||||
|
@ -1163,7 +1168,7 @@ int32_t toUpper(uint32_t options,
|
|||
!isFollowedByCasedLetter(src, nextIndex, srcLength)) {
|
||||
// Keep disjunctive "or" with (only) a tonos.
|
||||
// We use the same "word boundary" conditions as for the Final_Sigma test.
|
||||
if (i == nextIndex) {
|
||||
if (hasPrecomposedAccent) {
|
||||
upper = 0x389; // Preserve the precomposed form.
|
||||
} else {
|
||||
addTonos = true;
|
||||
|
|
13
thirdparty/icu4c/common/uts46.cpp
vendored
13
thirdparty/icu4c/common/uts46.cpp
vendored
|
@ -669,14 +669,6 @@ UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
|
|||
return length;
|
||||
}
|
||||
|
||||
// Some non-ASCII characters are equivalent to sequences with
|
||||
// non-LDH ASCII characters. To find them:
|
||||
// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
|
||||
static inline UBool
|
||||
isNonASCIIDisallowedSTD3Valid(UChar32 c) {
|
||||
return c==0x2260 || c==0x226E || c==0x226F;
|
||||
}
|
||||
|
||||
// Replace the label in dest with the label string, if the label was modified.
|
||||
// If &label==&dest then the label was modified in-place and labelLength
|
||||
// is the new label length, different from label.length().
|
||||
|
@ -820,10 +812,7 @@ UTS46::processLabel(UnicodeString &dest,
|
|||
}
|
||||
} else {
|
||||
oredChars|=c;
|
||||
if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) {
|
||||
info.labelErrors|=UIDNA_ERROR_DISALLOWED;
|
||||
*s=0xfffd;
|
||||
} else if(c==0xfffd) {
|
||||
if(c==0xfffd) {
|
||||
info.labelErrors|=UIDNA_ERROR_DISALLOWED;
|
||||
}
|
||||
}
|
||||
|
|
307
thirdparty/icu4c/i18n/unicode/uspoof.h
vendored
307
thirdparty/icu4c/i18n/unicode/uspoof.h
vendored
|
@ -19,6 +19,7 @@
|
|||
#ifndef USPOOF_H
|
||||
#define USPOOF_H
|
||||
|
||||
#include "unicode/ubidi.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/parseerr.h"
|
||||
|
@ -83,6 +84,25 @@
|
|||
* the instance should be created once (e.g., upon application startup), and the efficient
|
||||
* {@link uspoof_areConfusable} method can be used at runtime.
|
||||
*
|
||||
* If the paragraph direction used to display the strings is known, the bidi function should be used instead:
|
||||
*
|
||||
* \code{.c}
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* // These strings look identical when rendered in a left-to-right context.
|
||||
* // They look distinct in a right-to-left context.
|
||||
* UChar* str1 = (UChar*) u"A1\u05D0"; // A1א
|
||||
* UChar* str2 = (UChar*) u"A\u05D01"; // Aא1
|
||||
*
|
||||
* USpoofChecker* sc = uspoof_open(&status);
|
||||
* uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
|
||||
*
|
||||
* int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status);
|
||||
* UBool result = bitmask != 0;
|
||||
* // areBidiConfusable: 1 (status: U_ZERO_ERROR)
|
||||
* printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status));
|
||||
* uspoof_close(sc);
|
||||
* \endcode
|
||||
*
|
||||
* <p>
|
||||
* The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers. It will automatically call
|
||||
* {@link uspoof_close} when the object goes out of scope:
|
||||
|
@ -339,6 +359,51 @@
|
|||
* COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
|
||||
* scripts.
|
||||
*
|
||||
* <h2>Advanced bidirectional usage</h2>
|
||||
* If the paragraph direction with which the identifiers will be displayed is not known, there are
|
||||
* multiple options for confusable detection depending on the circumstances.
|
||||
*
|
||||
* <p>
|
||||
* In some circumstances, the only concern is confusion between identifiers displayed with the same
|
||||
* paragraph direction.
|
||||
*
|
||||
* <p>
|
||||
* An example is the case where identifiers are usernames prefixed with the @ symbol.
|
||||
* That symbol will appear to the left in a left-to-right context, and to the right in a
|
||||
* right-to-left context, so that an identifier displayed in a left-to-right context can never be
|
||||
* confused with an identifier displayed in a right-to-left context:
|
||||
* <ul>
|
||||
* <li>
|
||||
* The usernames "A1א" (A one aleph) and "Aא1" (A aleph 1)
|
||||
* would be considered confusable, since they both appear as \@A1א in a left-to-right context, and the
|
||||
* usernames "אA_1" (aleph A underscore one) and "א1_A" (aleph one underscore A) would be considered
|
||||
* confusable, since they both appear as A_1א@ in a right-to-left context.
|
||||
* </li>
|
||||
* <li>
|
||||
* The username "Mark_" would not be considered confusable with the username "_Mark",
|
||||
* even though the latter would appear as Mark_@ in a right-to-left context, and the
|
||||
* former as \@Mark_ in a left-to-right context.
|
||||
* </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* In that case, the caller should check for both LTR-confusability and RTL-confusability:
|
||||
*
|
||||
* \code{.cpp}
|
||||
* bool confusableInEitherDirection =
|
||||
* uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) ||
|
||||
* uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status);
|
||||
* \endcode
|
||||
*
|
||||
* If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR
|
||||
* with LTR and RTL with RTL.
|
||||
*
|
||||
* <p>
|
||||
* In cases where confusability between the visual appearances of an identifier displayed in a
|
||||
* left-to-right context with another identifier displayed in a right-to-left context is a concern,
|
||||
* the LTR skeleton of one can be compared with the RTL skeleton of the other. However, this
|
||||
* very broad definition of confusability may have unexpected results; for instance, it treats the
|
||||
* ASCII identifiers "Mark_" and "_Mark" as confusable.
|
||||
*
|
||||
* <h2>Additional Information</h2>
|
||||
*
|
||||
* A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
|
||||
|
@ -519,7 +584,7 @@ typedef enum USpoofChecks {
|
|||
|
||||
|
||||
/**
|
||||
* Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
|
||||
* Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and
|
||||
* for returned identifier restriction levels in check results.
|
||||
*
|
||||
* @stable ICU 51
|
||||
|
@ -633,8 +698,8 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
|
|||
/**
|
||||
* Open a Spoof Checker from the source form of the spoof data.
|
||||
* The input corresponds to the Unicode data file confusables.txt
|
||||
* as described in Unicode UAX #39. The syntax of the source data
|
||||
* is as described in UAX #39 for this file, and the content of
|
||||
* as described in Unicode Technical Standard #39. The syntax of the source data
|
||||
* is as described in UTS #39 for this file, and the content of
|
||||
* this file is acceptable input.
|
||||
*
|
||||
* The character encoding of the (char *) input text is UTF-8.
|
||||
|
@ -1111,7 +1176,7 @@ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *
|
|||
|
||||
|
||||
/**
|
||||
* Check the whether two specified strings are visually confusable.
|
||||
* Check whether two specified strings are visually confusable.
|
||||
*
|
||||
* If the strings are confusable, the return value will be nonzero, as long as
|
||||
* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
|
||||
|
@ -1159,7 +1224,58 @@ uspoof_areConfusable(const USpoofChecker *sc,
|
|||
const UChar *id2, int32_t length2,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Check whether two specified strings are visually confusable when
|
||||
* displayed in a context with the given paragraph direction.
|
||||
*
|
||||
* If the strings are confusable, the return value will be nonzero, as long as
|
||||
* {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
|
||||
*
|
||||
* The bits in the return value correspond to flags for each of the classes of
|
||||
* confusables applicable to the two input strings. According to UTS 39
|
||||
* section 4, the possible flags are:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
|
||||
* <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
|
||||
* <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
|
||||
* </ul>
|
||||
*
|
||||
* If one or more of the above flags were not listed in uspoof_setChecks(), this
|
||||
* function will never report that class of confusable. The check
|
||||
* {@link USPOOF_CONFUSABLE} enables all three flags.
|
||||
*
|
||||
*
|
||||
* @param sc The USpoofChecker
|
||||
* @param direction The paragraph direction with which the identifiers are
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param id1 The first of the two identifiers to be compared for
|
||||
* confusability. The strings are in UTF-16 format.
|
||||
* @param length1 the length of the first identifier, expressed in
|
||||
* 16 bit UTF-16 code units, or -1 if the string is
|
||||
* nul terminated.
|
||||
* @param id2 The second of the two identifiers to be compared for
|
||||
* confusability. The identifiers are in UTF-16 format.
|
||||
* @param length2 The length of the second identifiers, expressed in
|
||||
* 16 bit UTF-16 code units, or -1 if the string is
|
||||
* nul terminated.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check.
|
||||
* Confusability of the identifiers is not reported here,
|
||||
* but through this function's return value.
|
||||
* @return An integer value with bit(s) set corresponding to
|
||||
* the type of confusability found, as defined by
|
||||
* enum USpoofChecks. Zero is returned if the identifiers
|
||||
* are not confusable.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const UChar *id1, int32_t length1,
|
||||
const UChar *id2, int32_t length2,
|
||||
UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
|
||||
|
@ -1192,14 +1308,45 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
|
|||
const char *id2, int32_t length2,
|
||||
UErrorCode *status);
|
||||
|
||||
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.
|
||||
*
|
||||
* @param sc The USpoofChecker
|
||||
* @param direction The paragraph direction with which the identifiers are
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param id1 The first of the two identifiers to be compared for
|
||||
* confusability. The strings are in UTF-8 format.
|
||||
* @param length1 the length of the first identifiers, in bytes, or -1
|
||||
* if the string is nul terminated.
|
||||
* @param id2 The second of the two identifiers to be compared for
|
||||
* confusability. The strings are in UTF-8 format.
|
||||
* @param length2 The length of the second string in bytes, or -1
|
||||
* if the string is nul terminated.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check.
|
||||
* Confusability of the strings is not reported here,
|
||||
* but through this function's return value.
|
||||
* @return An integer value with bit(s) set corresponding to
|
||||
* the type of confusability found, as defined by
|
||||
* enum USpoofChecks. Zero is returned if the strings
|
||||
* are not confusable.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*
|
||||
* @see uspoof_areBidiConfusable
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const char *id1, int32_t length1,
|
||||
const char *id2, int32_t length2,
|
||||
UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Get the "skeleton" for an identifier.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are confusable if their skeletons are identical.
|
||||
* See Unicode UAX #39 for additional information.
|
||||
* See Unicode Technical Standard #39 for additional information.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
|
@ -1233,11 +1380,50 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
|||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Get the "bidiSkeleton" for an identifier and a direction.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
|
||||
* they are RTL-confusable if their RTL bidiSkeletons are identical.
|
||||
* See Unicode Technical Standard #39 for additional information:
|
||||
* https://www.unicode.org/reports/tr39/#Confusable_Detection.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
* set of existing identifiers, by creating an efficiently
|
||||
* searchable collection of the skeletons.
|
||||
*
|
||||
* @param sc The USpoofChecker.
|
||||
* @param direction The context direction with which the identifier will be
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param id The input identifier whose skeleton will be computed.
|
||||
* @param length The length of the input identifier, expressed in 16 bit
|
||||
* UTF-16 code units, or -1 if the string is zero terminated.
|
||||
* @param dest The output buffer, to receive the skeleton string.
|
||||
* @param destCapacity The length of the output buffer, in 16 bit units.
|
||||
* The destCapacity may be zero, in which case the function will
|
||||
* return the actual length of the skeleton.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check.
|
||||
* @return The length of the skeleton string. The returned length
|
||||
* is always that of the complete skeleton, even when the
|
||||
* supplied buffer is too small (or of zero length)
|
||||
*
|
||||
* @draft ICU 74
|
||||
* @see uspoof_areBidiConfusable
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
|
||||
UBiDiDirection direction,
|
||||
const UChar *id, int32_t length,
|
||||
UChar *dest, int32_t destCapacity, UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Get the "skeleton" for an identifier.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are confusable if their skeletons are identical.
|
||||
* See Unicode UAX #39 for additional information.
|
||||
* See Unicode Technical Standard #39 for additional information.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
|
@ -1273,6 +1459,46 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
|
|||
char *dest, int32_t destCapacity,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Get the "bidiSkeleton" for an identifier and a direction.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
|
||||
* they are RTL-confusable if their RTL bidiSkeletons are identical.
|
||||
* See Unicode Technical Standard #39 for additional information:
|
||||
* https://www.unicode.org/reports/tr39/#Confusable_Detection.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
* set of existing identifiers, by creating an efficiently
|
||||
* searchable collection of the skeletons.
|
||||
*
|
||||
* @param sc The USpoofChecker
|
||||
* @param direction The context direction with which the identifier will be
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param id The UTF-8 format identifier whose skeleton will be computed.
|
||||
* @param length The length of the input string, in bytes,
|
||||
* or -1 if the string is zero terminated.
|
||||
* @param dest The output buffer, to receive the skeleton string.
|
||||
* @param destCapacity The length of the output buffer, in bytes.
|
||||
* The destCapacity may be zero, in which case the function will
|
||||
* return the actual length of the skeleton.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check. Possible Errors include U_INVALID_CHAR_FOUND
|
||||
* for invalid UTF-8 sequences, and
|
||||
* U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
|
||||
* to hold the complete skeleton.
|
||||
* @return The length of the skeleton string, in bytes. The returned length
|
||||
* is always that of the complete skeleton, even when the
|
||||
* supplied buffer is too small (or of zero length)
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const char *id, int32_t length, char *dest,
|
||||
int32_t destCapacity, UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
|
||||
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
||||
|
@ -1510,11 +1736,42 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
|
|||
const icu::UnicodeString &s2,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.
|
||||
*
|
||||
* @param sc The USpoofChecker
|
||||
* @param direction The paragraph direction with which the identifiers are
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param s1 The first of the two identifiers to be compared for
|
||||
* confusability. The strings are in UTF-8 format.
|
||||
* @param s2 The second of the two identifiers to be compared for
|
||||
* confusability. The strings are in UTF-8 format.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check.
|
||||
* Confusability of the identifiers is not reported here,
|
||||
* but through this function's return value.
|
||||
* @return An integer value with bit(s) set corresponding to
|
||||
* the type of confusability found, as defined by
|
||||
* enum USpoofChecks. Zero is returned if the identifiers
|
||||
* are not confusable.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*
|
||||
* @see uspoof_areBidiConfusable
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
|
||||
UBiDiDirection direction,
|
||||
const icu::UnicodeString &s1,
|
||||
const icu::UnicodeString &s2,
|
||||
UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Get the "skeleton" for an identifier.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are confusable if their skeletons are identical.
|
||||
* See Unicode UAX #39 for additional information.
|
||||
* See Unicode Technical Standard #39 for additional information.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
|
@ -1540,6 +1797,36 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
|||
icu::UnicodeString &dest,
|
||||
UErrorCode *status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Get the "bidiSkeleton" for an identifier and a direction.
|
||||
* Skeletons are a transformation of the input identifier;
|
||||
* Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
|
||||
* they are RTL-confusable if their RTL bidiSkeletons are identical.
|
||||
* See Unicode Technical Standard #39 for additional information.
|
||||
* https://www.unicode.org/reports/tr39/#Confusable_Detection.
|
||||
*
|
||||
* Using skeletons directly makes it possible to quickly check
|
||||
* whether an identifier is confusable with any of some large
|
||||
* set of existing identifiers, by creating an efficiently
|
||||
* searchable collection of the skeletons.
|
||||
*
|
||||
* @param sc The USpoofChecker.
|
||||
* @param direction The context direction with which the identifier will be
|
||||
* displayed. Must be either UBIDI_LTR or UBIDI_RTL.
|
||||
* @param id The input identifier whose bidiSkeleton will be computed.
|
||||
* @param dest The output identifier, to receive the skeleton string.
|
||||
* @param status The error code, set if an error occurred while attempting to
|
||||
* perform the check.
|
||||
* @return A reference to the destination (skeleton) string.
|
||||
*
|
||||
* @draft ICU 74
|
||||
*/
|
||||
U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(
|
||||
const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
|
||||
icu::UnicodeString &dest, UErrorCode *status);
|
||||
#endif /* U_HIDE_DRAFT_API */
|
||||
|
||||
/**
|
||||
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
|
||||
* in http://unicode.org/Public/security/latest/xidmodifications.txt
|
||||
|
|
182
thirdparty/icu4c/i18n/uspoof.cpp
vendored
182
thirdparty/icu4c/i18n/uspoof.cpp
vendored
|
@ -15,6 +15,7 @@
|
|||
*
|
||||
* Unicode Spoof Detection
|
||||
*/
|
||||
#include "unicode/ubidi.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/normalizer2.h"
|
||||
#include "unicode/uspoof.h"
|
||||
|
@ -141,8 +142,8 @@ void U_CALLCONV initializeStatics(UErrorCode &status) {
|
|||
u"\\U0001DF00-\\U0001DF1E\\U0001DF25-\\U0001DF2A\\U0001E08F\\U0001E7E0-"
|
||||
u"\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED\\U0001E7EE\\U0001E7F0-"
|
||||
u"\\U0001E7FE\\U00020000-\\U0002A6DF\\U0002A700-\\U0002B739\\U0002B740-"
|
||||
u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U00030000-"
|
||||
u"\\U0003134A\\U00031350-\\U000323AF]";
|
||||
u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U0002EBF0-"
|
||||
u"\\U0002EE5D\\U00030000-\\U0003134A\\U00031350-\\U000323AF]";
|
||||
|
||||
gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status);
|
||||
if (gRecommendedSet == nullptr) {
|
||||
|
@ -538,6 +539,90 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
|
|||
return result;
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const char16_t *id1, int32_t length1,
|
||||
const char16_t *id2, int32_t length2,
|
||||
UErrorCode *status) {
|
||||
UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
|
||||
UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
|
||||
if (id1Str.isBogus() || id2Str.isBogus()) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const char *id1, int32_t length1, const char *id2,
|
||||
int32_t length2, UErrorCode *status) {
|
||||
if (length1 < -1 || length2 < -1) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
UnicodeString id1Str = UnicodeString::fromUTF8(
|
||||
StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
|
||||
UnicodeString id2Str = UnicodeString::fromUTF8(
|
||||
StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
|
||||
return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
|
||||
}
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
|
||||
UBiDiDirection direction,
|
||||
const icu::UnicodeString &id1,
|
||||
const icu::UnicodeString &id2,
|
||||
UErrorCode *status) {
|
||||
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
//
|
||||
// See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
|
||||
// and for definitions of the types (single, whole, mixed-script) of confusables.
|
||||
|
||||
// We only care about a few of the check flags. Ignore the others.
|
||||
// If no tests relevant to this function have been specified, return an error.
|
||||
// TODO: is this really the right thing to do? It's probably an error on the caller's part,
|
||||
// but logically we would just return 0 (no error).
|
||||
if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
|
||||
*status = U_INVALID_STATE_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Compute the skeletons and check for confusability.
|
||||
UnicodeString id1Skeleton;
|
||||
uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
|
||||
UnicodeString id2Skeleton;
|
||||
uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
if (id1Skeleton != id2Skeleton) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// If we get here, the strings are confusable. Now we just need to set the flags for the appropriate
|
||||
// classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
|
||||
// of id1 and id2.
|
||||
ScriptSet id1RSS;
|
||||
This->getResolvedScriptSet(id1, id1RSS, *status);
|
||||
ScriptSet id2RSS;
|
||||
This->getResolvedScriptSet(id2, id2RSS, *status);
|
||||
|
||||
// Turn on all applicable flags
|
||||
uint32_t result = 0;
|
||||
if (id1RSS.intersects(id2RSS)) {
|
||||
result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
|
||||
} else {
|
||||
result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
|
||||
if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
|
||||
result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
|
||||
}
|
||||
}
|
||||
|
||||
// Turn off flags that the user doesn't want
|
||||
return result & This->fChecks;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uspoof_checkUnicodeString(const USpoofChecker *sc,
|
||||
|
@ -697,6 +782,60 @@ uspoof_getSkeleton(const USpoofChecker *sc,
|
|||
return destStr.length();
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const UChar *id, int32_t length, UChar *dest,
|
||||
int32_t destCapacity, UErrorCode *status) {
|
||||
UnicodeString idStr((length == -1), id, length); // Aliasing constructor
|
||||
if (idStr.isBogus()) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
UnicodeString destStr;
|
||||
uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
|
||||
return destStr.extract(dest, destCapacity, *status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
|
||||
UBiDiDirection direction,
|
||||
const UnicodeString &id,
|
||||
UnicodeString &dest,
|
||||
UErrorCode *status) {
|
||||
dest.remove();
|
||||
if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return dest;
|
||||
}
|
||||
UBiDi *bidi = ubidi_open();
|
||||
ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
|
||||
/*embeddingLevels*/ nullptr, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
ubidi_close(bidi);
|
||||
return dest;
|
||||
}
|
||||
UnicodeString reordered;
|
||||
int32_t const size = ubidi_getProcessedLength(bidi);
|
||||
UChar* const reorderedBuffer = reordered.getBuffer(size);
|
||||
if (reorderedBuffer == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
ubidi_close(bidi);
|
||||
return dest;
|
||||
}
|
||||
ubidi_writeReordered(bidi, reorderedBuffer, size,
|
||||
UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
|
||||
reordered.releaseBuffer(size);
|
||||
ubidi_close(bidi);
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
// The type parameter is deprecated since ICU 58; any number may be passed.
|
||||
constexpr uint32_t deprecatedType = 58;
|
||||
return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
|
||||
}
|
||||
|
||||
|
||||
|
||||
U_I18N_API UnicodeString & U_EXPORT2
|
||||
|
@ -721,19 +860,17 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
|
|||
for (inputIndex=0; inputIndex < normalizedLen; ) {
|
||||
UChar32 c = nfdId.char32At(inputIndex);
|
||||
inputIndex += U16_LENGTH(c);
|
||||
This->fSpoofData->confusableLookup(c, skelStr);
|
||||
if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
|
||||
This->fSpoofData->confusableLookup(c, skelStr);
|
||||
}
|
||||
}
|
||||
|
||||
gNfdNormalizer->normalize(skelStr, dest, *status);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uspoof_getSkeletonUTF8(const USpoofChecker *sc,
|
||||
uint32_t type,
|
||||
const char *id, int32_t length,
|
||||
char *dest, int32_t destCapacity,
|
||||
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
|
||||
int32_t length, char *dest, int32_t destCapacity,
|
||||
UErrorCode *status) {
|
||||
SpoofImpl::validateThis(sc, *status);
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -744,7 +881,8 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
|
|||
return 0;
|
||||
}
|
||||
|
||||
UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
|
||||
UnicodeString srcStr = UnicodeString::fromUTF8(
|
||||
StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
|
||||
UnicodeString destStr;
|
||||
uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -752,8 +890,28 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
|
|||
}
|
||||
|
||||
int32_t lengthInUTF8 = 0;
|
||||
u_strToUTF8(dest, destCapacity, &lengthInUTF8,
|
||||
destStr.getBuffer(), destStr.length(), status);
|
||||
u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
|
||||
return lengthInUTF8;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
|
||||
const char *id, int32_t length, char *dest,
|
||||
int32_t destCapacity, UErrorCode *status) {
|
||||
if (length < -1) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
UnicodeString srcStr = UnicodeString::fromUTF8(
|
||||
StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
|
||||
UnicodeString destStr;
|
||||
uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t lengthInUTF8 = 0;
|
||||
u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
|
||||
return lengthInUTF8;
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
Loading…
Reference in a new issue