354 lines
8.5 KiB
C++
354 lines
8.5 KiB
C++
/*************************************************
|
|
* Perl-Compatible Regular Expressions *
|
|
*************************************************/
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
and semantics are as close as possible to those of the Perl 5 language.
|
|
|
|
Written by Philip Hazel
|
|
This module by Zoltan Herczeg and Sebastian Pop
|
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
|
New API code Copyright (c) 2016-2019 University of Cambridge
|
|
|
|
-----------------------------------------------------------------------------
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its
|
|
contributors may be used to endorse or promote products derived from
|
|
this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
-----------------------------------------------------------------------------
|
|
*/
|
|
|
|
# if defined(FFCS)
|
|
# if defined(FF_UTF)
|
|
# define FF_FUN ffcs_utf
|
|
# else
|
|
# define FF_FUN ffcs
|
|
# endif
|
|
|
|
# elif defined(FFCS_2)
|
|
# if defined(FF_UTF)
|
|
# define FF_FUN ffcs_2_utf
|
|
# else
|
|
# define FF_FUN ffcs_2
|
|
# endif
|
|
|
|
# elif defined(FFCS_MASK)
|
|
# if defined(FF_UTF)
|
|
# define FF_FUN ffcs_mask_utf
|
|
# else
|
|
# define FF_FUN ffcs_mask
|
|
# endif
|
|
|
|
# elif defined(FFCPS_0)
|
|
# if defined (FF_UTF)
|
|
# define FF_FUN ffcps_0_utf
|
|
# else
|
|
# define FF_FUN ffcps_0
|
|
# endif
|
|
|
|
# elif defined (FFCPS_1)
|
|
# if defined (FF_UTF)
|
|
# define FF_FUN ffcps_1_utf
|
|
# else
|
|
# define FF_FUN ffcps_1
|
|
# endif
|
|
|
|
# elif defined (FFCPS_DEFAULT)
|
|
# if defined (FF_UTF)
|
|
# define FF_FUN ffcps_default_utf
|
|
# else
|
|
# define FF_FUN ffcps_default
|
|
# endif
|
|
# endif
|
|
|
|
#if (defined(__GNUC__) && __SANITIZE_ADDRESS__) \
|
|
|| (defined(__clang__) \
|
|
&& ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
|
|
__attribute__((no_sanitize_address))
|
|
#endif
|
|
static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 **str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
|
|
#undef FF_FUN
|
|
{
|
|
quad_word qw;
|
|
int_char ic;
|
|
|
|
SLJIT_UNUSED_ARG(offs1);
|
|
SLJIT_UNUSED_ARG(offs2);
|
|
|
|
ic.x = chars;
|
|
|
|
#if defined(FFCS)
|
|
sljit_u8 c1 = ic.c.c1;
|
|
vect_t vc1 = VDUPQ(c1);
|
|
|
|
#elif defined(FFCS_2)
|
|
sljit_u8 c1 = ic.c.c1;
|
|
vect_t vc1 = VDUPQ(c1);
|
|
sljit_u8 c2 = ic.c.c2;
|
|
vect_t vc2 = VDUPQ(c2);
|
|
|
|
#elif defined(FFCS_MASK)
|
|
sljit_u8 c1 = ic.c.c1;
|
|
vect_t vc1 = VDUPQ(c1);
|
|
sljit_u8 mask = ic.c.c2;
|
|
vect_t vmask = VDUPQ(mask);
|
|
#endif
|
|
|
|
#if defined(FFCPS)
|
|
compare_type compare1_type = compare_match1;
|
|
compare_type compare2_type = compare_match1;
|
|
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
|
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
|
PCRE2_UCHAR char1a = ic.c.c1;
|
|
PCRE2_UCHAR char2a = ic.c.c3;
|
|
|
|
# ifdef FFCPS_CHAR1A2A
|
|
cmp1a = VDUPQ(char1a);
|
|
cmp2a = VDUPQ(char2a);
|
|
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
|
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
|
# else
|
|
PCRE2_UCHAR char1b = ic.c.c2;
|
|
PCRE2_UCHAR char2b = ic.c.c4;
|
|
if (char1a == char1b)
|
|
{
|
|
cmp1a = VDUPQ(char1a);
|
|
cmp1b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
|
}
|
|
else
|
|
{
|
|
sljit_u32 bit1 = char1a ^ char1b;
|
|
if (is_powerof2(bit1))
|
|
{
|
|
compare1_type = compare_match1i;
|
|
cmp1a = VDUPQ(char1a | bit1);
|
|
cmp1b = VDUPQ(bit1);
|
|
}
|
|
else
|
|
{
|
|
compare1_type = compare_match2;
|
|
cmp1a = VDUPQ(char1a);
|
|
cmp1b = VDUPQ(char1b);
|
|
}
|
|
}
|
|
|
|
if (char2a == char2b)
|
|
{
|
|
cmp2a = VDUPQ(char2a);
|
|
cmp2b = VDUPQ(0); /* to avoid errors on older compilers -Werror=maybe-uninitialized */
|
|
}
|
|
else
|
|
{
|
|
sljit_u32 bit2 = char2a ^ char2b;
|
|
if (is_powerof2(bit2))
|
|
{
|
|
compare2_type = compare_match1i;
|
|
cmp2a = VDUPQ(char2a | bit2);
|
|
cmp2b = VDUPQ(bit2);
|
|
}
|
|
else
|
|
{
|
|
compare2_type = compare_match2;
|
|
cmp2a = VDUPQ(char2a);
|
|
cmp2b = VDUPQ(char2b);
|
|
}
|
|
}
|
|
# endif
|
|
|
|
*str_ptr += IN_UCHARS(offs1);
|
|
#endif
|
|
|
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
|
vect_t char_mask = VDUPQ(0xff);
|
|
#endif
|
|
|
|
#if defined(FF_UTF)
|
|
restart:;
|
|
#endif
|
|
|
|
#if defined(FFCPS)
|
|
if (*str_ptr >= str_end)
|
|
return NULL;
|
|
sljit_u8 *p1 = *str_ptr - diff;
|
|
#endif
|
|
sljit_s32 align_offset = ((uint64_t)*str_ptr & 0xf);
|
|
*str_ptr = (sljit_u8 *) ((uint64_t)*str_ptr & ~0xf);
|
|
vect_t data = VLD1Q(*str_ptr);
|
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
|
data = VANDQ(data, char_mask);
|
|
#endif
|
|
|
|
#if defined(FFCS)
|
|
vect_t eq = VCEQQ(data, vc1);
|
|
|
|
#elif defined(FFCS_2)
|
|
vect_t eq1 = VCEQQ(data, vc1);
|
|
vect_t eq2 = VCEQQ(data, vc2);
|
|
vect_t eq = VORRQ(eq1, eq2);
|
|
|
|
#elif defined(FFCS_MASK)
|
|
vect_t eq = VORRQ(data, vmask);
|
|
eq = VCEQQ(eq, vc1);
|
|
|
|
#elif defined(FFCPS)
|
|
# if defined(FFCPS_DIFF1)
|
|
vect_t prev_data = data;
|
|
# endif
|
|
|
|
vect_t data2;
|
|
if (p1 < *str_ptr)
|
|
{
|
|
data2 = VLD1Q(*str_ptr - diff);
|
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
|
data2 = VANDQ(data2, char_mask);
|
|
#endif
|
|
}
|
|
else
|
|
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
|
|
|
if (compare1_type == compare_match1)
|
|
data = VCEQQ(data, cmp1a);
|
|
else
|
|
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
|
|
|
if (compare2_type == compare_match1)
|
|
data2 = VCEQQ(data2, cmp2a);
|
|
else
|
|
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
|
|
|
vect_t eq = VANDQ(data, data2);
|
|
#endif
|
|
|
|
VST1Q(qw.mem, eq);
|
|
/* Ignore matches before the first STR_PTR. */
|
|
if (align_offset < 8)
|
|
{
|
|
qw.dw[0] >>= align_offset * 8;
|
|
if (qw.dw[0])
|
|
{
|
|
*str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
|
|
goto match;
|
|
}
|
|
if (qw.dw[1])
|
|
{
|
|
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
|
goto match;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
qw.dw[1] >>= (align_offset - 8) * 8;
|
|
if (qw.dw[1])
|
|
{
|
|
*str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
|
|
goto match;
|
|
}
|
|
}
|
|
*str_ptr += 16;
|
|
|
|
while (*str_ptr < str_end)
|
|
{
|
|
vect_t orig_data = VLD1Q(*str_ptr);
|
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
|
orig_data = VANDQ(orig_data, char_mask);
|
|
#endif
|
|
data = orig_data;
|
|
|
|
#if defined(FFCS)
|
|
eq = VCEQQ(data, vc1);
|
|
|
|
#elif defined(FFCS_2)
|
|
eq1 = VCEQQ(data, vc1);
|
|
eq2 = VCEQQ(data, vc2);
|
|
eq = VORRQ(eq1, eq2);
|
|
|
|
#elif defined(FFCS_MASK)
|
|
eq = VORRQ(data, vmask);
|
|
eq = VCEQQ(eq, vc1);
|
|
#endif
|
|
|
|
#if defined(FFCPS)
|
|
# if defined (FFCPS_DIFF1)
|
|
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
|
# else
|
|
data2 = VLD1Q(*str_ptr - diff);
|
|
# if PCRE2_CODE_UNIT_WIDTH != 8
|
|
data2 = VANDQ(data2, char_mask);
|
|
# endif
|
|
# endif
|
|
|
|
# ifdef FFCPS_CHAR1A2A
|
|
data = VCEQQ(data, cmp1a);
|
|
data2 = VCEQQ(data2, cmp2a);
|
|
# else
|
|
if (compare1_type == compare_match1)
|
|
data = VCEQQ(data, cmp1a);
|
|
else
|
|
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
|
if (compare2_type == compare_match1)
|
|
data2 = VCEQQ(data2, cmp2a);
|
|
else
|
|
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
|
# endif
|
|
|
|
eq = VANDQ(data, data2);
|
|
#endif
|
|
|
|
VST1Q(qw.mem, eq);
|
|
if (qw.dw[0])
|
|
*str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
|
|
else if (qw.dw[1])
|
|
*str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
|
|
else {
|
|
*str_ptr += 16;
|
|
#if defined (FFCPS_DIFF1)
|
|
prev_data = orig_data;
|
|
#endif
|
|
continue;
|
|
}
|
|
|
|
match:;
|
|
if (*str_ptr >= str_end)
|
|
/* Failed match. */
|
|
return NULL;
|
|
|
|
#if defined(FF_UTF)
|
|
if (utf_continue((PCRE2_SPTR)*str_ptr - offs1))
|
|
{
|
|
/* Not a match. */
|
|
*str_ptr += IN_UCHARS(1);
|
|
goto restart;
|
|
}
|
|
#endif
|
|
|
|
/* Match. */
|
|
#if defined (FFCPS)
|
|
*str_ptr -= IN_UCHARS(offs1);
|
|
#endif
|
|
return *str_ptr;
|
|
}
|
|
|
|
/* Failed match. */
|
|
return NULL;
|
|
}
|