virtualx-engine/thirdparty/icu4c/common/ruleiter.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2003-2011, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: September 24 2003
* Since: ICU 2.8
**********************************************************************
*/
#include "ruleiter.h"
#include "unicode/parsepos.h"
#include "unicode/symtable.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#include "patternprops.h"

/* \U87654321 or \ud800\udc00 */
#define MAX_U_NOTATION_LEN 12

U_NAMESPACE_BEGIN

RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
                      ParsePosition& thePos) :
    text(theText),
    pos(thePos),
    sym(theSym),
    buf(0),
    bufPos(0)
{}

UBool RuleCharacterIterator::atEnd() const {
    return buf == 0 && pos.getIndex() == text.length();
}

UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
    if (U_FAILURE(ec)) return DONE;

    UChar32 c = DONE;
    isEscaped = false;

    for (;;) {
        c = _current();
        _advance(U16_LENGTH(c));

        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
            (options & PARSE_VARIABLES) != 0 && sym != 0) {
            UnicodeString name = sym->parseReference(text, pos, text.length());
            // If name is empty there was an isolated SYMBOL_REF;
            // return it.  Caller must be prepared for this.
            if (name.length() == 0) {
                break;
            }
            bufPos = 0;
            buf = sym->lookup(name);
            if (buf == 0) {
                ec = U_UNDEFINED_VARIABLE;
                return DONE;
            }
            // Handle empty variable value
            if (buf->length() == 0) {
                buf = 0;
            }
            continue;
        }

        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
            continue;
        }

        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
            UnicodeString tempEscape;
            int32_t offset = 0;
            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
            jumpahead(offset);
            isEscaped = true;
            if (c < 0) {
                ec = U_MALFORMED_UNICODE_ESCAPE;
                return DONE;
            }
        }

        break;
    }

    return c;
}

void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
    p.buf = buf;
    p.pos = pos.getIndex();
    p.bufPos = bufPos;
}

void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
    buf = p.buf;
    pos.setIndex(p.pos);
    bufPos = p.bufPos;
}

void RuleCharacterIterator::skipIgnored(int32_t options) {
    if ((options & SKIP_WHITESPACE) != 0) {
        for (;;) {
            UChar32 a = _current();
            if (!PatternProps::isWhiteSpace(a)) break;
            _advance(U16_LENGTH(a));
        }
    }
}

UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    if (maxLookAhead < 0) {
        maxLookAhead = 0x7FFFFFFF;
    }
    if (buf != 0) {
        buf->extract(bufPos, maxLookAhead, result);
    } else {
        text.extract(pos.getIndex(), maxLookAhead, result);
    }
    return result;
}

void RuleCharacterIterator::jumpahead(int32_t count) {
    _advance(count);
}

/*
UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    int32_t b = pos.getIndex();
    text.extract(0, b, result);
    return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
}
*/

UChar32 RuleCharacterIterator::_current() const {
    if (buf != 0) {
        return buf->char32At(bufPos);
    } else {
        int i = pos.getIndex();
        return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
    }
}

void RuleCharacterIterator::_advance(int32_t count) {
    if (buf != 0) {
        bufPos += count;
        if (bufPos == buf->length()) {
            buf = 0;
        }
    } else {
        pos.setIndex(pos.getIndex() + count);
        if (pos.getIndex() > text.length()) {
            pos.setIndex(text.length());
        }
    }
}

U_NAMESPACE_END

//eof
[Complex Text Layouts] Add third-party TextServer dependencies (ICU, HarfBuzz, Graphite). 2020-08-11 11:10:23 +02:00			`// © 2016 and later: Unicode, Inc. and others.`
			`// License & terms of use: http://www.unicode.org/copyright.html`
			`/*`
			`**********************************************************************`
			`* Copyright (c) 2003-2011, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* Author: Alan Liu`
			`* Created: September 24 2003`
			`* Since: ICU 2.8`
			`**********************************************************************`
			`*/`
			`#include "ruleiter.h"`
			`#include "unicode/parsepos.h"`
			`#include "unicode/symtable.h"`
			`#include "unicode/unistr.h"`
			`#include "unicode/utf16.h"`
			`#include "patternprops.h"`

			`/* \U87654321 or \ud800\udc00 */`
			`#define MAX_U_NOTATION_LEN 12`

			`U_NAMESPACE_BEGIN`

			`RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,`
			`ParsePosition& thePos) :`
			`text(theText),`
			`pos(thePos),`
			`sym(theSym),`
			`buf(0),`
			`bufPos(0)`
			`{}`

			`UBool RuleCharacterIterator::atEnd() const {`
			`return buf == 0 && pos.getIndex() == text.length();`
			`}`

			`UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {`
			`if (U_FAILURE(ec)) return DONE;`

			`UChar32 c = DONE;`
ICU: Update to version 72.1 2022-10-28 08:11:55 +02:00			`isEscaped = false;`
[Complex Text Layouts] Add third-party TextServer dependencies (ICU, HarfBuzz, Graphite). 2020-08-11 11:10:23 +02:00
			`for (;;) {`
			`c = _current();`
			`_advance(U16_LENGTH(c));`

			`if (c == SymbolTable::SYMBOL_REF && buf == 0 &&`
			`(options & PARSE_VARIABLES) != 0 && sym != 0) {`
			`UnicodeString name = sym->parseReference(text, pos, text.length());`
			`// If name is empty there was an isolated SYMBOL_REF;`
			`// return it. Caller must be prepared for this.`
			`if (name.length() == 0) {`
			`break;`
			`}`
			`bufPos = 0;`
			`buf = sym->lookup(name);`
			`if (buf == 0) {`
			`ec = U_UNDEFINED_VARIABLE;`
			`return DONE;`
			`}`
			`// Handle empty variable value`
			`if (buf->length() == 0) {`
			`buf = 0;`
			`}`
			`continue;`
			`}`

			`if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {`
			`continue;`
			`}`

			`if (c == 0x5C /'\\'/ && (options & PARSE_ESCAPES) != 0) {`
			`UnicodeString tempEscape;`
			`int32_t offset = 0;`
			`c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);`
			`jumpahead(offset);`
ICU: Update to version 72.1 2022-10-28 08:11:55 +02:00			`isEscaped = true;`
[Complex Text Layouts] Add third-party TextServer dependencies (ICU, HarfBuzz, Graphite). 2020-08-11 11:10:23 +02:00			`if (c < 0) {`
			`ec = U_MALFORMED_UNICODE_ESCAPE;`
			`return DONE;`
			`}`
			`}`

			`break;`
			`}`

			`return c;`
			`}`

			`void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {`
			`p.buf = buf;`
			`p.pos = pos.getIndex();`
			`p.bufPos = bufPos;`
			`}`

			`void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {`
			`buf = p.buf;`
			`pos.setIndex(p.pos);`
			`bufPos = p.bufPos;`
			`}`

			`void RuleCharacterIterator::skipIgnored(int32_t options) {`
			`if ((options & SKIP_WHITESPACE) != 0) {`
			`for (;;) {`
			`UChar32 a = _current();`
			`if (!PatternProps::isWhiteSpace(a)) break;`
			`_advance(U16_LENGTH(a));`
			`}`
			`}`
			`}`

			`UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {`
			`if (maxLookAhead < 0) {`
			`maxLookAhead = 0x7FFFFFFF;`
			`}`
			`if (buf != 0) {`
			`buf->extract(bufPos, maxLookAhead, result);`
			`} else {`
			`text.extract(pos.getIndex(), maxLookAhead, result);`
			`}`
			`return result;`
			`}`

			`void RuleCharacterIterator::jumpahead(int32_t count) {`
			`_advance(count);`
			`}`

			`/*`
			`UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {`
			`int32_t b = pos.getIndex();`
			`text.extract(0, b, result);`
Update HarfBuzz, ICU and FreeType HarfBuzz: Update to version 7.3.0 ICU4C: Update to version 73.1 FreeType: Update to version 2.13.0 2023-05-23 02:05:01 +02:00			`return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '\|' at index`
[Complex Text Layouts] Add third-party TextServer dependencies (ICU, HarfBuzz, Graphite). 2020-08-11 11:10:23 +02:00			`}`
			`*/`

			`UChar32 RuleCharacterIterator::_current() const {`
			`if (buf != 0) {`
			`return buf->char32At(bufPos);`
			`} else {`
			`int i = pos.getIndex();`
			`return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;`
			`}`
			`}`

			`void RuleCharacterIterator::_advance(int32_t count) {`
			`if (buf != 0) {`
			`bufPos += count;`
			`if (bufPos == buf->length()) {`
			`buf = 0;`
			`}`
			`} else {`
			`pos.setIndex(pos.getIndex() + count);`
			`if (pos.getIndex() > text.length()) {`
			`pos.setIndex(text.length());`
			`}`
			`}`
			`}`

			`U_NAMESPACE_END`

			`//eof`