virtualx-engine/drivers/nrex/nrex.cpp

//  NREX: Node RegEx
//  Version 0.1
//
//  Copyright (c) 2015, Zher Huei Lee
//  All rights reserved.
//
//  This software is provided 'as-is', without any express or implied
//  warranty.  In no event will the authors be held liable for any damages
//  arising from the use of this software.
//
//  Permission is granted to anyone to use this software for any purpose,
//  including commercial applications, and to alter it and redistribute it
//  freely, subject to the following restrictions:
//
//   1. The origin of this software must not be misrepresented; you must not
//      claim that you wrote the original software. If you use this software
//      in a product, an acknowledgment in the product documentation would
//      be appreciated but is not required.
//
//   2. Altered source versions must be plainly marked as such, and must not
//      be misrepresented as being the original software.
//
//   3. This notice may not be removed or altered from any source
//      distribution.
//

#include "nrex.hpp"

#ifdef NREX_UNICODE
#include <wctype.h>
#include <wchar.h>
#define NREX_ISALPHANUM iswalnum
#define NREX_ISSPACE iswspace
#define NREX_STRLEN wcslen
#else
#include <ctype.h>
#include <string.h>
#define NREX_ISALPHANUM isalnum
#define NREX_ISSPACE isspace
#define NREX_STRLEN strlen
#endif

#ifdef NREX_THROW_ERROR
#define NREX_COMPILE_ERROR(M) throw nrex_compile_error(M)
#else
#define NREX_COMPILE_ERROR(M) reset(); return false
#endif

#ifndef NREX_NEW
#define NREX_NEW(X) new X
#define NREX_NEW_ARRAY(X, N) new X[N]
#define NREX_DELETE(X) delete X
#define NREX_DELETE_ARRAY(X) delete[] X
#endif

template<typename T>
class nrex_array
{
    private:
        T* _data;
        unsigned int _reserved;
        unsigned int _size;
    public:
        nrex_array()
            : _data(NREX_NEW_ARRAY(T, 2))
            , _reserved(2)
            , _size(0)
        {
        }

        ~nrex_array()
        {
            NREX_DELETE_ARRAY(_data);
        }

        unsigned int size() const
        {
            return _size;
        }

        void reserve(unsigned int size)
        {
            T* old = _data;
            _data = NREX_NEW_ARRAY(T, size);
            _reserved = size;
            for (unsigned int i = 0; i < _size; ++i)
            {
                _data[i] = old[i];
            }
            NREX_DELETE_ARRAY(old);
        }

        void push(T item)
        {
            if (_size == _reserved)
            {
                reserve(_reserved * 2);
            }
            _data[_size] = item;
            _size++;
        }

        T& top()
        {
            return _data[_size - 1];
        }

        const T& operator[] (unsigned int i) const
        {
            return _data[i];
        }

        void pop()
        {
            if (_size > 0)
            {
                --_size;
            }
        }
};

static int nrex_parse_hex(nrex_char c)
{
    if ('0' <= c && c <= '9')
    {
        return int(c - '0');
    }
    else if ('a' <= c && c <= 'f')
    {
        return int(c - 'a') + 10;
    }
    else if ('A' <= c && c <= 'F')
    {
        return int(c - 'A') + 10;
    }
    return -1;
}

static nrex_char nrex_unescape(const nrex_char*& c)
{
    switch (c[1])
    {
        case '0': ++c; return '\0';
        case 'a': ++c; return '\a';
        case 'e': ++c; return '\e';
        case 'f': ++c; return '\f';
        case 'n': ++c; return '\n';
        case 'r': ++c; return '\r';
        case 't': ++c; return '\t';
        case 'v': ++c; return '\v';
        case 'b': ++c; return '\b';
        case 'x':
        {
            int point = 0;
            for (int i = 2; i <= 3; ++i)
            {
                int res = nrex_parse_hex(c[i]);
                if (res == -1)
                {
                    return '\0';
                }
                point = (point << 4) + res;
            }
            c = &c[3];
            return nrex_char(point);
        }
        case 'u':
        {
            int point = 0;
            for (int i = 2; i <= 5; ++i)
            {
                int res = nrex_parse_hex(c[i]);
                if (res == -1)
                {
                    return '\0';
                }
                point = (point << 4) + res;
            }
            c = &c[5];
            return nrex_char(point);
        }
    }
    return (++c)[0];
}

struct nrex_search
{
        const nrex_char* str;
        nrex_result* captures;
        int end;
        bool complete;

        nrex_char at(int pos)
        {
            return str[pos];
        }

        nrex_search(const nrex_char* str, nrex_result* captures)
            : str(str)
            , captures(captures)
            , end(0)
        {
        }
};

struct nrex_node
{
        nrex_node* next;
        nrex_node* previous;
        nrex_node* parent;
        bool quantifiable;
        int length;

        nrex_node(bool quantify = false)
            : next(NULL)
            , previous(NULL)
            , parent(NULL)
            , quantifiable(quantify)
            , length(-1)
        {
        }

        virtual ~nrex_node()
        {
            if (next)
            {
                NREX_DELETE(next);
            }
        }

        virtual int test(nrex_search* s, int pos) const
        {
            return next ? next->test(s, pos) : -1;
        }

        virtual int test_parent(nrex_search* s, int pos) const
        {
            if (next)
            {
                pos = next->test(s, pos);
            }
            if (parent && pos >= 0)
            {
                pos = parent->test_parent(s, pos);
            }
            if (pos >= 0)
            {
                s->complete = true;
            }
            return pos;
        }

        void increment_length(int amount, bool subtract = false)
        {
            if (amount >= 0 && length >= 0)
            {
                if (!subtract)
                {
                    length += amount;
                }
                else
                {
                    length -= amount;
                }
            }
            else
            {
                length = -1;
            }
            if (parent)
            {
                parent->increment_length(amount, subtract);
            }
        }
};

struct nrex_node_group : public nrex_node
{
        static const int NonCapture = -1;
        static const int Bracket = -2;
        static const int LookAhead = -3;
        static const int LookBehind = -4;

        int mode;
        bool negate;
        nrex_array<nrex_node*> childset;
        nrex_node* back;

        nrex_node_group(int mode)
            : nrex_node(true)
            , mode(mode)
            , negate(false)
            , back(NULL)
        {
            if (mode != Bracket)
            {
                length = 0;
            }
            else
            {
                length = 1;
            }
            if (mode == LookAhead || mode == LookBehind)
            {
                quantifiable = false;
            }
        }

        virtual ~nrex_node_group()
        {
            for (unsigned int i = 0; i < childset.size(); ++i)
            {
                NREX_DELETE(childset[i]);
            }

        }

        int test(nrex_search* s, int pos) const
        {
            if (mode >= 0)
            {
                s->captures[mode].start = pos;
            }
            for (unsigned int i = 0; i < childset.size(); ++i)
            {
                s->complete = false;
                int offset = 0;
                if (mode == LookBehind)
                {
                    if (pos < length)
                    {
                        return -1;
                    }
                    offset = length;
                }
                int res = childset[i]->test(s, pos - offset);
                if (s->complete)
                {
                    return res;
                }
                if (negate)
                {
                    if (res < 0)
                    {
                        res = pos + 1;
                    }
                    else
                    {
                        return -1;
                    }
                    if (i + 1 < childset.size())
                    {
                        continue;
                    }
                }
                if (res >= 0)
                {
                    if (mode >= 0)
                    {
                        s->captures[mode].length = res - pos;
                    }
                    else if (mode == LookAhead || mode == LookBehind)
                    {
                        res = pos;
                    }
                    return next ? next->test(s, res) : res;
                }
            }
            return -1;
        }

        virtual int test_parent(nrex_search* s, int pos) const
        {
            if (mode >= 0)
            {
                s->captures[mode].length = pos - s->captures[mode].start;
            }
            return nrex_node::test_parent(s, pos);
        }

        void add_childset()
        {
            if (childset.size() > 0 && mode != Bracket)
            {
                length = -1;
            }
            back = NULL;
        }

        void add_child(nrex_node* node)
        {
            node->parent = this;
            node->previous = back;
            if (back && mode != Bracket)
            {
                back->next = node;
            }
            else
            {
                childset.push(node);
            }
            if (mode != Bracket)
            {
                increment_length(node->length);
            }
            back = node;
        }

        nrex_node* swap_back(nrex_node* node)
        {
            if (!back)
            {
                add_child(node);
                return NULL;
            }
            nrex_node* old = back;
            if (!old->previous)
            {
                childset.pop();
            }
            if (mode != Bracket)
            {
                increment_length(old->length, true);
            }
            back = old->previous;
            add_child(node);
            return old;
        }

        void pop_back()
        {
            if (back)
            {
                nrex_node* old = back;
                if (!old->previous)
                {
                    childset.pop();
                }
                if (mode != Bracket)
                {
                    increment_length(old->length, true);
                }
                back = old->previous;
                NREX_DELETE(old);
            }
        }
};

struct nrex_node_char : public nrex_node
{
        nrex_char ch;

        nrex_node_char(nrex_char c)
            : nrex_node(true)
            , ch(c)
        {
            length = 1;
        }

        int test(nrex_search* s, int pos) const
        {
            if (s->end <= pos || 0 > pos || s->at(pos) != ch)
            {
                return -1;
            }
            return next ? next->test(s, pos + 1) : pos + 1;
        }
};

struct nrex_node_range : public nrex_node
{
        nrex_char start;
        nrex_char end;

        nrex_node_range(nrex_char s, nrex_char e)
            : nrex_node(true)
            , start(s)
            , end(e)
        {
            length = 1;
        }

        int test(nrex_search* s, int pos) const
        {
            if (s->end <= pos || 0 > pos)
            {
                return -1;
            }
            nrex_char c = s->at(pos);
            if (c < start || end < c)
            {
                return -1;
            }
            return next ? next->test(s, pos + 1) : pos + 1;
        }
};

enum nrex_class_type
{
    nrex_class_none,
    nrex_class_alnum,
    nrex_class_alpha,
    nrex_class_blank,
    nrex_class_cntrl,
    nrex_class_digit,
    nrex_class_graph,
    nrex_class_lower,
    nrex_class_print,
    nrex_class_punct,
    nrex_class_space,
    nrex_class_upper,
    nrex_class_xdigit,
    nrex_class_word
};

static bool nrex_compare_class(const nrex_char** pos, const char* text)
{
    unsigned int i = 0;
    for (i = 0; text[i] != '\0'; ++i)
    {
        if ((*pos)[i] != text[i])
        {
            return false;
        }
    }
    if ((*pos)[i++] != ':' || (*pos)[i] != ']')
    {
        return false;
    }
    *pos = &(*pos)[i];
    return true;
}

#define NREX_COMPARE_CLASS(POS, NAME) if (nrex_compare_class(POS, #NAME)) return nrex_class_ ## NAME

static nrex_class_type nrex_parse_class(const nrex_char** pos)
{
    NREX_COMPARE_CLASS(pos, alnum);
    NREX_COMPARE_CLASS(pos, alpha);
    NREX_COMPARE_CLASS(pos, blank);
    NREX_COMPARE_CLASS(pos, cntrl);
    NREX_COMPARE_CLASS(pos, digit);
    NREX_COMPARE_CLASS(pos, graph);
    NREX_COMPARE_CLASS(pos, lower);
    NREX_COMPARE_CLASS(pos, print);
    NREX_COMPARE_CLASS(pos, punct);
    NREX_COMPARE_CLASS(pos, space);
    NREX_COMPARE_CLASS(pos, upper);
    NREX_COMPARE_CLASS(pos, xdigit);
    NREX_COMPARE_CLASS(pos, word);
    return nrex_class_none;
}

struct nrex_node_class : public nrex_node
{
        nrex_class_type type;

        nrex_node_class(nrex_class_type t)
            : nrex_node(true)
            , type(t)
        {
            length = 1;
        }

        int test(nrex_search* s, int pos) const
        {
            if (s->end <= pos || 0 > pos)
            {
                return -1;
            }
            if (!test_class(s->at(pos)))
            {
                return -1;
            }
            return next ? next->test(s, pos + 1) : pos + 1;
        }

        bool test_class(nrex_char c) const
        {
            if ((0 <= c && c <= 0x1F) || c == 0x7F)
            {
                if (type == nrex_class_cntrl)
                {
                    return true;
                }
            }
            else if (c < 0x7F)
            {
                if (type == nrex_class_print)
                {
                    return true;
                }
                else if (type == nrex_class_graph && c != ' ')
                {
                    return true;
                }
                else if ('0' <= c && c <= '9')
                {
                    switch (type)
                    {
                        case nrex_class_alnum:
                        case nrex_class_digit:
                        case nrex_class_xdigit:
                        case nrex_class_word:
                            return true;
                        default:
                            break;
                    }
                }
                else if ('A' <= c && c <= 'Z')
                {
                    switch (type)
                    {
                        case nrex_class_alnum:
                        case nrex_class_alpha:
                        case nrex_class_upper:
                        case nrex_class_word:
                            return true;
                        case nrex_class_xdigit:
                            if (c <= 'F')
                            {
                                return true;
                            }
                        default:
                            break;
                    }
                }
                else if ('a' <= c && c <= 'z')
                {
                    switch (type)
                    {
                        case nrex_class_alnum:
                        case nrex_class_alpha:
                        case nrex_class_lower:
                        case nrex_class_word:
                            return true;
                        case nrex_class_xdigit:
                            if (c <= 'f')
                            {
                                return true;
                            }
                        default:
                            break;
                    }
                }
            }
            switch (c)
            {
                case ' ':
                case '\t':
                    if (type == nrex_class_blank)
                    {
                        return true;
                    }
                case '\r':
                case '\n':
                case '\f':
                    if (type == nrex_class_space)
                    {
                        return true;
                    }
                    break;
                case '_':
                    if (type == nrex_class_word)
                    {
                        return true;
                    }
                case ']':
                case '[':
                case '!':
                case '"':
                case '#':
                case '$':
                case '%':
                case '&':
                case '\'':
                case '(':
                case ')':
                case '*':
                case '+':
                case ',':
                case '.':
                case '/':
                case ':':
                case ';':
                case '<':
                case '=':
                case '>':
                case '?':
                case '@':
                case '\\':
                case '^':
                case '`':
                case '{':
                case '|':
                case '}':
                case '~':
                case '-':
                    if (type == nrex_class_punct)
                    {
                        return true;
                    }
                    break;
                default:
                    break;
            }
            return false;
        }
};

static bool nrex_is_shorthand(nrex_char repr)
{
    switch (repr)
    {
        case 'W':
        case 'w':
        case 'D':
        case 'd':
        case 'S':
        case 's':
            return true;
    }
    return false;
}

struct nrex_node_shorthand : public nrex_node
{
        nrex_char repr;

        nrex_node_shorthand(nrex_char c)
            : nrex_node(true)
            , repr(c)
        {
            length = 1;
        }

        int test(nrex_search* s, int pos) const
        {
            if (s->end <= pos || 0 > pos)
            {
                return -1;
            }
            bool found = false;
            bool invert = false;
            nrex_char c = s->at(pos);
            switch (repr)
            {
                case '.':
                    found = true;
                    break;
                case 'W':
                    invert = true;
                case 'w':
                    if (c == '_' || NREX_ISALPHANUM(c))
                    {
                        found = true;
                    }
                    break;
                case 'D':
                    invert = true;
                case 'd':
                    if ('0' <= c && c <= '9')
                    {
                        found = true;
                    }
                    break;
                case 'S':
                    invert = true;
                case 's':
                    if (NREX_ISSPACE(c))
                    {
                        found = true;
                    }
                    break;
            }
            if (found == invert)
            {
                return -1;
            }
            return next ? next->test(s, pos + 1) : pos + 1;
        }
};

static bool nrex_is_quantifier(nrex_char repr)
{
    switch (repr)
    {
        case '?':
        case '*':
        case '+':
        case '{':
            return true;
    }
    return false;
}

struct nrex_node_quantifier : public nrex_node
{
        int min;
        int max;
        bool greedy;
        nrex_node* child;

        nrex_node_quantifier(int min, int max)
            : nrex_node()
            , min(min)
            , max(max)
            , greedy(true)
            , child(NULL)
        {
        }

        virtual ~nrex_node_quantifier()
        {
            if (child)
            {
                NREX_DELETE(child);
            }
        }

        int test(nrex_search* s, int pos) const
        {
            return test_step(s, pos, 0, pos);
        }

        int test_step(nrex_search* s, int pos, int level, int start) const
        {
            if (pos > s->end)
            {
                return -1;
            }
            if (!greedy && level > min)
            {
                int res = pos;
                if (next)
                {
                    res = next->test(s, res);
                }
                if (s->complete)
                {
                    return res;
                }
                if (res >= 0 && parent->test_parent(s, res) >= 0)
                {
                    return res;
                }
            }
            if (max >= 0 && level > max)
            {
                return -1;
            }
            if (level > 1 && level > min + 1 && pos == start)
            {
                return -1;
            }
            int res = pos;
            if (level >= 1)
            {
                res = child->test(s, pos);
                if (s->complete)
                {
                    return res;
                }
            }
            if (res >= 0)
            {
                int res_step = test_step(s, res, level + 1, start);
                if (res_step >= 0)
                {
                    return res_step;
                }
                else if (greedy && level >= min)
                {
                    if (next)
                    {
                        res = next->test(s, res);
                    }
                    if (s->complete)
                    {
                        return res;
                    }
                    if (res >= 0 && parent->test_parent(s, res) >= 0)
                    {
                        return res;
                    }
                }
            }
            return -1;
        }
};

struct nrex_node_anchor : public nrex_node
{
        bool end;

        nrex_node_anchor(bool end)
            : nrex_node()
            , end(end)
        {
            length = 0;
        }

        int test(nrex_search* s, int pos) const
        {
            if (!end && pos != 0)
            {
                return -1;
            }
            else if (end && pos != s->end)
            {
                return -1;
            }
            return next ? next->test(s, pos) : pos;
        }
};

struct nrex_node_word_boundary : public nrex_node
{
        bool inverse;

        nrex_node_word_boundary(bool inverse)
            : nrex_node()
            , inverse(inverse)
        {
            length = 0;
        }

        int test(nrex_search* s, int pos) const
        {
            bool left = false;
            bool right = false;
            if (pos != 0)
            {
                nrex_char c = s->at(pos - 1);
                if (c == '_' || NREX_ISALPHANUM(c))
                {
                    left = true;
                }
            }
            if (pos != s->end)
            {
                nrex_char c = s->at(pos);
                if (c == '_' || NREX_ISALPHANUM(c))
                {
                    right = true;
                }
            }
            if ((left != right) == inverse)
            {
                return -1;
            }
            return next ? next->test(s, pos) : pos;
        }
};

struct nrex_node_backreference : public nrex_node
{
        int ref;

        nrex_node_backreference(int ref)
            : nrex_node(true)
            , ref(ref)
        {
            length = -1;
        }

        int test(nrex_search* s, int pos) const
        {
            nrex_result& r = s->captures[ref];
            for (int i = 0; i < r.length; ++i)
            {
                if (pos + i >= s->end)
                {
                    return -1;
                }
                if (s->at(r.start + i) != s->at(pos + i))
                {
                    return -1;
                }
            }
            return next ? next->test(s, pos + r.length) : pos + r.length;
        }
};

bool nrex_has_lookbehind(nrex_array<nrex_node_group*>& stack)
{
    for (unsigned int i = 0; i < stack.size(); i++)
    {
        if (stack[i]->mode == nrex_node_group::LookBehind)
        {
            return true;
        }
    }
    return false;
}

nrex::nrex()
    : _capturing(0)
    , _root(NULL)
{
}

nrex::nrex(const nrex_char* pattern, int captures)
    : _capturing(0)
    , _root(NULL)
{
    compile(pattern, captures);
}

nrex::~nrex()
{
    if (_root)
    {
        NREX_DELETE(_root);
    }
}

bool nrex::valid() const
{
    return (_root != NULL);
}

void nrex::reset()
{
    _capturing = 0;
    if (_root)
    {
        NREX_DELETE(_root);
    }
    _root = NULL;
}

int nrex::capture_size() const
{
    if (_root)
    {
        return _capturing + 1;
    }
    return 0;
}

bool nrex::compile(const nrex_char* pattern, int captures)
{
    reset();
    nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
    nrex_array<nrex_node_group*> stack;
    stack.push(root);
    _root = root;

    for (const nrex_char* c = pattern; c[0] != '\0'; ++c)
    {
        if (c[0] == '(')
        {
            if (c[1] == '?')
            {
                if (c[2] == ':')
                {
                    c = &c[2];
                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::NonCapture));
                    stack.top()->add_child(group);
                    stack.push(group);
                }
                else if (c[2] == '!' || c[2] == '=')
                {
                    c = &c[2];
                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::LookAhead));
                    group->negate = (c[0] == '!');
                    stack.top()->add_child(group);
                    stack.push(group);
                }
                else if (c[2] == '<' && (c[3] == '!' || c[3] == '='))
                {
                    c = &c[3];
                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::LookBehind));
                    group->negate = (c[0] == '!');
                    stack.top()->add_child(group);
                    stack.push(group);
                }
                else
                {
                    NREX_COMPILE_ERROR("unrecognised qualifier for group");
                }
            }
            else if (captures >= 0 && _capturing < captures)
            {
                nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
                stack.top()->add_child(group);
                stack.push(group);
            }
            else
            {
                nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::NonCapture));
                stack.top()->add_child(group);
                stack.push(group);
            }
        }
        else if (c[0] == ')')
        {
            if (stack.size() > 1)
            {
                stack.pop();
            }
            else
            {
                NREX_COMPILE_ERROR("unexpected ')'");
            }
        }
        else if (c[0] == '[')
        {
            nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::Bracket));
            stack.top()->add_child(group);
            if (c[1] == '^')
            {
                group->negate = true;
                ++c;
            }
            bool first_child = true;
            nrex_char previous_child;
            bool previous_child_single = false;
            while (true)
            {
                group->add_childset();
                ++c;
                if (c[0] == '\0')
                {
                    NREX_COMPILE_ERROR("unclosed bracket expression '['");
                }
                if (c[0] == '[' && c[1] == ':')
                {
                    const nrex_char* d = &c[2];
                    nrex_class_type cls = nrex_parse_class(&d);
                    if (cls != nrex_class_none)
                    {
                        c = d;
                        group->add_child(NREX_NEW(nrex_node_class(cls)));
                        previous_child_single = false;
                    }
                    else
                    {
                        group->add_child(NREX_NEW(nrex_node_char('[')));
                        previous_child = '[';
                        previous_child_single = true;
                    }
                }
                else if (c[0] == ']' && !first_child)
                {
                    break;
                }
                else if (c[0] == '\\')
                {
                    if (nrex_is_shorthand(c[1]))
                    {
                        group->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
                        ++c;
                        previous_child_single = false;
                    }
                    else
                    {
                        const nrex_char* d = c;
                        nrex_char unescaped = nrex_unescape(d);
                        if (c == d)
                        {
                            NREX_COMPILE_ERROR("invalid escape token");
                        }
                        group->add_child(NREX_NEW(nrex_node_char(unescaped)));
                        c = d;
                        previous_child = unescaped;
                        previous_child_single = true;
                    }
                }
                else if (previous_child_single && c[0] == '-')
                {
                    bool is_range = false;
                    nrex_char next;
                    if (c[1] != '\0' && c[1] != ']')
                    {
                        if (c[1] == '\\')
                        {
                            const nrex_char* d = ++c;
                            next = nrex_unescape(d);
                            if (c == d)
                            {
                                NREX_COMPILE_ERROR("invalid escape token in range");
                            }
                        }
                        else
                        {
                            next = c[1];
                            ++c;
                        }
                        is_range = true;
                    }
                    if (is_range)
                    {
                        if (next < previous_child)
                        {
                            NREX_COMPILE_ERROR("text range out of order");
                        }
                        group->pop_back();
                        group->add_child(NREX_NEW(nrex_node_range(previous_child, next)));
                        previous_child_single = false;
                    }
                    else
                    {
                        group->add_child(NREX_NEW(nrex_node_char(c[0])));
                        previous_child = c[0];
                        previous_child_single = true;
                    }
                }
                else
                {
                    group->add_child(NREX_NEW(nrex_node_char(c[0])));
                    previous_child = c[0];
                    previous_child_single = true;
                }
                first_child = false;
            }
        }
        else if (nrex_is_quantifier(c[0]))
        {
            int min = 0;
            int max = -1;
            bool valid_quantifier = true;
            if (c[0] == '?')
            {
                min = 0;
                max = 1;
            }
            else if (c[0] == '+')
            {
                min = 1;
                max = -1;
            }
            else if (c[0] == '*')
            {
                min = 0;
                max = -1;
            }
            else if (c[0] == '{')
            {
                bool max_set = false;
                const nrex_char* d = c;
                while (true)
                {
                    ++d;
                    if (d[0] == '\0')
                    {
                        valid_quantifier = false;
                        break;
                    }
                    else if (d[0] == '}')
                    {
                        break;
                    }
                    else if (d[0] == ',')
                    {
                        max_set = true;
                        continue;
                    }
                    else if (d[0] < '0' || '9' < d[0])
                    {
                        valid_quantifier = false;
                        break;
                    }
                    if (max_set)
                    {
                        if (max < 0)
                        {
                            max = int(d[0] - '0');
                        }
                        else
                        {
                            max = max * 10 + int(d[0] - '0');
                        }
                    }
                    else
                    {
                        min = min * 10 + int(d[0] - '0');
                    }
                }
                if (!max_set)
                {
                    max = min;
                }
                if (valid_quantifier)
                {
                    c = d;
                }
            }
            if (valid_quantifier)
            {
                if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
                {
                    NREX_COMPILE_ERROR("element not quantifiable");
                }
                nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
                if (min == max)
                {
                    if (stack.top()->back->length >= 0)
                    {
                        quant->length = max * stack.top()->back->length;
                    }
                }
                else
                {
                    if (nrex_has_lookbehind(stack))
                    {
                        NREX_COMPILE_ERROR("variable length quantifiers inside lookbehind not supported");
                    }
                }
                quant->child = stack.top()->swap_back(quant);
                quant->child->previous = NULL;
                quant->child->next = NULL;
                quant->child->parent = quant;
                if (c[1] == '?')
                {
                    quant->greedy = false;
                    ++c;
                }
            }
            else
            {
                stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
            }
        }
        else if (c[0] == '|')
        {
            if (nrex_has_lookbehind(stack))
            {
                NREX_COMPILE_ERROR("alternations inside lookbehind not supported");
            }
            stack.top()->add_childset();
        }
        else if (c[0] == '^' || c[0] == '$')
        {
            stack.top()->add_child(NREX_NEW(nrex_node_anchor((c[0] == '$'))));
        }
        else if (c[0] == '.')
        {
            stack.top()->add_child(NREX_NEW(nrex_node_shorthand('.')));
        }
        else if (c[0] == '\\')
        {
            if (nrex_is_shorthand(c[1]))
            {
                stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
                ++c;
            }
            else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
            {
                int ref = 0;
                bool unclosed = false;
                if (c[1] == 'g')
                {
                    unclosed = true;
                    c = &c[2];
                }
                while ('0' <= c[1] && c[1] <= '9')
                {
                    ref = ref * 10 + int(c[1] - '0');
                    ++c;
                }
                if (c[1] == '}')
                {
                    unclosed = false;
                    ++c;
                }
                if (ref > _capturing || ref <= 0 || unclosed)
                {
                    NREX_COMPILE_ERROR("backreference to non-existent capture");
                }
                if (nrex_has_lookbehind(stack))
                {
                    NREX_COMPILE_ERROR("backreferences inside lookbehind not supported");
                }
                stack.top()->add_child(NREX_NEW(nrex_node_backreference(ref)));
            }
            else if (c[1] == 'b' || c[1] == 'B')
            {
                stack.top()->add_child(NREX_NEW(nrex_node_word_boundary(c[1] == 'B')));
                ++c;
            }
            else
            {
                const nrex_char* d = c;
                nrex_char unescaped = nrex_unescape(d);
                if (c == d)
                {
                    NREX_COMPILE_ERROR("invalid escape token");
                }
                stack.top()->add_child(NREX_NEW(nrex_node_char(unescaped)));
                c = d;
            }
        }
        else
        {
            stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
        }
    }
    if (stack.size() > 1)
    {
        NREX_COMPILE_ERROR("unclosed group '('");
    }
    return true;
}

bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
{
    if (!_root)
    {
        return false;
    }
    nrex_search s(str, captures);
    if (end >= offset)
    {
        s.end = end;
    }
    else
    {
        s.end = NREX_STRLEN(str);
    }
    for (int i = offset; i <= s.end; ++i)
    {
        for (int c = 0; c <= _capturing; ++c)
        {
            captures[c].start = 0;
            captures[c].length = 0;
        }
        if (_root->test(&s, i) >= 0)
        {
            return true;
        }
    }
    return false;
}