diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 59c4dc40203..d4050885400 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -365,8 +365,8 @@ License: Apache-2.0 Files: ./thirdparty/pcre2/ Comment: PCRE2 -Copyright: 1997-2021, University of Cambridge - 2009-2021, Zoltan Herczeg +Copyright: 1997-2022, University of Cambridge + 2009-2022, Zoltan Herczeg License: BSD-3-clause Files: ./thirdparty/recastnavigation/ diff --git a/thirdparty/README.md b/thirdparty/README.md index b147f9754ff..400d376d4dc 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -531,7 +531,7 @@ Exclude: ## pcre2 - Upstream: http://www.pcre.org -- Version: 10.39 (35fee4193b852cb504892352bd0155de10809889, 2021) +- Version: 10.40 (3103b8f20a3b9944b177e812fde29fbfb8b90558, 2022) - License: BSD-3-Clause Files extracted from upstream source: diff --git a/thirdparty/pcre2/AUTHORS b/thirdparty/pcre2/AUTHORS index bec8a1e5adc..11ef898b250 100644 --- a/thirdparty/pcre2/AUTHORS +++ b/thirdparty/pcre2/AUTHORS @@ -8,7 +8,7 @@ Email domain: gmail.com Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved. #### diff --git a/thirdparty/pcre2/LICENCE b/thirdparty/pcre2/LICENCE index b1ec61be440..2f3cd5cac53 100644 --- a/thirdparty/pcre2/LICENCE +++ b/thirdparty/pcre2/LICENCE @@ -26,7 +26,7 @@ Email domain: gmail.com Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved. @@ -37,7 +37,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -48,7 +48,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved. diff --git a/thirdparty/pcre2/src/config.h b/thirdparty/pcre2/src/config.h index a13593715e4..76dc5868b13 100644 --- a/thirdparty/pcre2/src/config.h +++ b/thirdparty/pcre2/src/config.h @@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */ /* Have PTHREAD_PRIO_INHERIT. */ /* #undef HAVE_PTHREAD_PRIO_INHERIT */ +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READLINE_H */ + /* Define to 1 if you have the header file. */ /* #undef HAVE_READLINE_HISTORY_H */ @@ -233,7 +236,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.39" +#define PACKAGE_STRING "PCRE2 10.40" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -242,7 +245,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.39" +#define PACKAGE_VERSION "10.40" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -435,7 +438,7 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.39" +#define VERSION "10.40" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ diff --git a/thirdparty/pcre2/src/pcre2.h b/thirdparty/pcre2/src/pcre2.h index 90a97d9cb7a..8adcede57cf 100644 --- a/thirdparty/pcre2/src/pcre2.h +++ b/thirdparty/pcre2/src/pcre2.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 39 +#define PCRE2_MINOR 40 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2021-10-29 +#define PCRE2_DATE 2022-04-14 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate diff --git a/thirdparty/pcre2/src/pcre2_auto_possess.c b/thirdparty/pcre2/src/pcre2_auto_possess.c index e5e08956827..419fd490018 100644 --- a/thirdparty/pcre2/src/pcre2_auto_possess.c +++ b/thirdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -123,18 +123,21 @@ opcode is used to select the column. The values are as follows: */ static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { -/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ +/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ + { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ }; /* This table is used to check whether auto-possessification is possible @@ -196,6 +199,7 @@ static BOOL check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, BOOL negated) { +BOOL ok; const uint32_t *p; const ucd_record *prop = GET_UCD(c); @@ -215,6 +219,11 @@ switch(ptype) case PT_SC: return (pdata == prop->script) == negated; + case PT_SCX: + ok = (pdata == prop->script + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + return ok == negated; + /* These are specials */ case PT_ALNUM: @@ -251,6 +260,14 @@ switch(ptype) if (c == *p++) return negated; } break; /* Control never reaches here */ + + /* Haven't yet thought these through. */ + + case PT_BIDICL: + return FALSE; + + case PT_BOOL: + return FALSE; } return FALSE; diff --git a/thirdparty/pcre2/src/pcre2_compile.c b/thirdparty/pcre2/src/pcre2_compile.c index 383159be763..de259c9c405 100644 --- a/thirdparty/pcre2/src/pcre2_compile.c +++ b/thirdparty/pcre2/src/pcre2_compile.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -124,7 +124,7 @@ static unsigned int static int compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t, - uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *, + uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *, compile_block *, PCRE2_SIZE *); static int @@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */ #define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7))) -/* Private flags added to firstcu and reqcu. */ +/* Values and flags for the unsigned xxcuflags variables that accompany xxcu +variables, which are concerned with first and required code units. A value +greater than or equal to REQ_NONE means "no code unit set"; otherwise the +matching xxcu variable is set, and the low valued bits are relevant. */ -#define REQ_CASELESS (1u << 0) /* Indicates caselessness */ -#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */ -/* Negative values for the firstcu and reqcu flags */ -#define REQ_UNSET (-2) /* Not yet found anything */ -#define REQ_NONE (-1) /* Found not fixed char */ +#define REQ_UNSET 0xffffffffu /* Not yet found anything */ +#define REQ_NONE 0xfffffffeu /* Found not fixed character */ +#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */ +#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */ /* These flags are used in the groupinfo vector. */ @@ -2088,7 +2090,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr, PCRE2_UCHAR c; PCRE2_SIZE i, bot, top; PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR name[32]; +PCRE2_UCHAR name[50]; +PCRE2_UCHAR *vptr = NULL; +uint16_t ptscript = PT_NOTSCRIPT; if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; @@ -2100,36 +2104,95 @@ negation. */ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; + if (*ptr == CHAR_CIRCUMFLEX_ACCENT) { *negptr = TRUE; ptr++; } + for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; + while (c == '_' || c == '-' || isspace(c)) + { + if (ptr >= cb->end_pattern) goto ERROR_RETURN; + c = *ptr++; + } if (c == CHAR_NUL) goto ERROR_RETURN; if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; + name[i] = tolower(c); + if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i; } + if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; name[i] = 0; } -/* Otherwise there is just one following character, which must be an ASCII -letter. */ +/* If { doesn't follow \p or \P there is just one following character, which +must be an ASCII letter. */ else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) { - name[0] = c; + name[0] = tolower(c); name[1] = 0; } else goto ERROR_RETURN; *ptrptr = ptr; -/* Search for a recognized property name using binary chop. */ +/* If the property contains ':' or '=' we have class name and value separately +specified. The following are supported: + + . Bidi_Class (synonym bc), for which the property names are "bidi". + . Script (synonym sc) for which the property name is the script name + . Script_Extensions (synonym scx), ditto + +As this is a small number, we currently just check the names directly. If this +grows, a sorted table and a switch will be neater. + +For both the script properties, set a PT_xxx value so that (1) they can be +distinguished and (2) invalid script names that happen to be the name of +another property can be diagnosed. */ + +if (vptr != NULL) + { + int offset = 0; + PCRE2_UCHAR sname[8]; + + *vptr = 0; /* Terminate property name */ + if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 || + PRIV(strcmp_c8)(name, STRING_bc) == 0) + { + offset = 4; + sname[0] = CHAR_b; + sname[1] = CHAR_i; /* There is no strcpy_c8 function */ + sname[2] = CHAR_d; + sname[3] = CHAR_i; + } + + else if (PRIV(strcmp_c8)(name, STRING_script) == 0 || + PRIV(strcmp_c8)(name, STRING_sc) == 0) + ptscript = PT_SC; + + else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 || + PRIV(strcmp_c8)(name, STRING_scx) == 0) + ptscript = PT_SCX; + + else + { + *errorcodeptr = ERR47; + return FALSE; + } + + /* Adjust the string in name[] as needed */ + + memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR)); + if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR)); + } + +/* Search for a recognized property using binary chop. */ bot = 0; top = PRIV(utt_size); @@ -2139,15 +2202,37 @@ while (bot < top) int r; i = (bot + top) >> 1; r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); + + /* When a matching property is found, some extra checking is needed when the + \p{xx:yy} syntax is used and xx is either sc or scx. */ + if (r == 0) { - *ptypeptr = PRIV(utt)[i].type; *pdataptr = PRIV(utt)[i].value; - return TRUE; + if (vptr == NULL || ptscript == PT_NOTSCRIPT) + { + *ptypeptr = PRIV(utt)[i].type; + return TRUE; + } + + switch (PRIV(utt)[i].type) + { + case PT_SC: + *ptypeptr = PT_SC; + return TRUE; + + case PT_SCX: + *ptypeptr = ptscript; + return TRUE; + } + + break; /* Non-script found */ } + if (r > 0) bot = i + 1; else top = i; } -*errorcodeptr = ERR47; /* Unrecognized name */ + +*errorcodeptr = ERR47; /* Unrecognized property */ return FALSE; ERROR_RETURN: /* Malformed \P or \p */ @@ -5285,9 +5370,9 @@ Arguments: pptrptr points to the current parsed pattern pointer errorcodeptr points to error code variable firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number + firstcuflagsptr place to put the first code unit flags reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number + reqcuflagsptr place to put the last required code unit flags bcptr points to current branch chain cb contains pointers to tables etc. lengthptr NULL during the real compile phase @@ -5300,8 +5385,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero static int compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, - int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr, - uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, + int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr, + uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) { int bravalue = 0; @@ -5316,9 +5401,9 @@ uint32_t zeroreqcu, zerofirstcu; uint32_t escape; uint32_t *pptr = *pptrptr; uint32_t meta, meta_arg; -int32_t firstcuflags, reqcuflags; -int32_t zeroreqcuflags, zerofirstcuflags; -int32_t req_caseopt, reqvary, tempreqvary; +uint32_t firstcuflags, reqcuflags; +uint32_t zeroreqcuflags, zerofirstcuflags; +uint32_t req_caseopt, reqvary, tempreqvary; PCRE2_SIZE offset = 0; PCRE2_SIZE length_prevgroup = 0; PCRE2_UCHAR *code = *codeptr; @@ -5374,13 +5459,13 @@ item types that can be repeated set these backoff variables appropriately. */ firstcu = reqcu = zerofirstcu = zeroreqcu = 0; firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; -/* The variable req_caseopt contains either the REQ_CASELESS value or zero, +/* The variable req_caseopt contains either the REQ_CASELESS bit or zero, according to the current setting of the caseless flag. The REQ_CASELESS value leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables to record the case status of the value. This is used only for ASCII characters. */ -req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; +req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0; /* Switch on next META item until the end of the branch */ @@ -5395,13 +5480,12 @@ for (;; pptr++) BOOL possessive_quantifier; BOOL note_group_empty; int class_has_8bitchar; - int i; uint32_t mclength; uint32_t skipunits; uint32_t subreqcu, subfirstcu; uint32_t groupnumber; uint32_t verbarglen, verbculen; - int32_t subreqcuflags, subfirstcuflags; /* Must be signed */ + uint32_t subreqcuflags, subfirstcuflags; open_capitem *oc; PCRE2_UCHAR mcbuffer[8]; @@ -5770,9 +5854,9 @@ for (;; pptr++) if (taboffset >= 0) { if (tabopt >= 0) - for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; + for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset]; else - for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; + for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset]; } /* Now see if we need to remove any special characters. An option @@ -5786,9 +5870,9 @@ for (;; pptr++) being built and we are done. */ if (local_negate) - for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i]; + for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]); else - for (i = 0; i < 32; i++) classbits[i] |= pbits[i]; + for (int i = 0; i < 32; i++) classbits[i] |= pbits[i]; /* Every class contains at least one < 256 character. */ @@ -5827,21 +5911,23 @@ for (;; pptr++) switch(escape) { case ESC_d: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit]; break; case ESC_D: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]); break; case ESC_w: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word]; break; case ESC_W: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_word]); break; /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl @@ -5852,12 +5938,13 @@ for (;; pptr++) longer treat \s and \S specially. */ case ESC_s: - for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; + for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space]; break; case ESC_S: should_flip_negation = TRUE; - for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space]; + for (int i = 0; i < 32; i++) + classbits[i] |= (uint8_t)(~cbits[i+cbit_space]); break; /* When adding the horizontal or vertical space lists to a class, or @@ -6098,7 +6185,7 @@ for (;; pptr++) if (negate_class && !xclass_has_prop) { /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; } memcpy(code, classbits, 32); code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); @@ -6124,7 +6211,7 @@ for (;; pptr++) if (negate_class) { /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ - for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; + for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; } memcpy(code, classbits, 32); } @@ -6198,7 +6285,7 @@ for (;; pptr++) verbarglen = *(++pptr); verbculen = 0; tempcode = code++; - for (i = 0; i < (int)verbarglen; i++) + for (int i = 0; i < (int)verbarglen; i++) { meta = *(++pptr); #ifdef SUPPORT_UNICODE @@ -6247,6 +6334,7 @@ for (;; pptr++) bravalue = OP_COND; { int count, index; + unsigned int i; PCRE2_SPTR name; named_group *ng = cb->named_groups; uint32_t length = *(++pptr); @@ -6286,7 +6374,7 @@ for (;; pptr++) groupnumber = 0; if (meta == META_COND_RNUMBER) { - for (i = 1; i < (int)length; i++) + for (i = 1; i < length; i++) { groupnumber = groupnumber * 10 + name[i] - CHAR_0; if (groupnumber > MAX_GROUP_NUMBER) @@ -6608,7 +6696,7 @@ for (;; pptr++) if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) { - if (subfirstcuflags >= 0) + if (subfirstcuflags < REQ_NONE) { firstcu = subfirstcu; firstcuflags = subfirstcuflags; @@ -6622,7 +6710,7 @@ for (;; pptr++) into reqcu if there wasn't one, using the vary flag that was in existence beforehand. */ - else if (subfirstcuflags >= 0 && subreqcuflags < 0) + else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE) { subreqcu = subfirstcu; subreqcuflags = subfirstcuflags | tempreqvary; @@ -6631,7 +6719,7 @@ for (;; pptr++) /* If the subpattern set a required code unit (or set a first code unit that isn't really the first code unit - see above), set it. */ - if (subreqcuflags >= 0) + if (subreqcuflags < REQ_NONE) { reqcu = subreqcu; reqcuflags = subreqcuflags; @@ -6650,7 +6738,7 @@ for (;; pptr++) in that example, 'X' ends up set for both. */ else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) && - subreqcuflags >= 0 && subfirstcuflags >= 0) + subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE) { reqcu = subreqcu; reqcuflags = subreqcuflags; @@ -6680,7 +6768,7 @@ for (;; pptr++) this name is duplicated. */ groupnumber = 0; - for (i = 0; i < cb->names_found; i++, ng++) + for (unsigned int i = 0; i < cb->names_found; i++, ng++) { if (length == ng->length && PRIV(strncmp)(name, ng->name, length) == 0) @@ -6935,14 +7023,19 @@ for (;; pptr++) #endif /* MAYBE_UTF_MULTI */ /* Handle the case of a single code unit - either with no UTF support, or - with UTF disabled, or for a single-code-unit UTF character. */ + with UTF disabled, or for a single-code-unit UTF character. In the latter + case, for a repeated positive match, get the caseless flag for the + required code unit from the previous character, because a class like [Aa] + sets a caseless A but by now the req_caseopt flag has been reset. */ + { mcbuffer[0] = code[-1]; mclength = 1; if (op_previous <= OP_CHARI && repeat_min > 1) { reqcu = mcbuffer[0]; - reqcuflags = req_caseopt | cb->req_varyopt; + reqcuflags = cb->req_varyopt; + if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS; } } goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ @@ -7034,7 +7127,7 @@ for (;; pptr++) *lengthptr += delta; } - else for (i = 0; i < replicate; i++) + else for (int i = 0; i < replicate; i++) { memcpy(code, previous, CU2BYTES(1 + LINK_SIZE)); previous = code; @@ -7210,12 +7303,12 @@ for (;; pptr++) else { - if (groupsetfirstcu && reqcuflags < 0) + if (groupsetfirstcu && reqcuflags >= REQ_NONE) { reqcu = firstcu; reqcuflags = firstcuflags; } - for (i = 1; (uint32_t)i < repeat_min; i++) + for (uint32_t i = 1; i < repeat_min; i++) { memcpy(code, previous, CU2BYTES(len)); code += len; @@ -7259,14 +7352,14 @@ for (;; pptr++) /* This is compiling for real */ - else for (i = repeat_max - 1; i >= 0; i--) + else for (uint32_t i = repeat_max; i >= 1; i--) { *code++ = OP_BRAZERO + repeat_type; /* All but the final copy start a new nesting, maintaining the chain of brackets outstanding. */ - if (i != 0) + if (i != 1) { int linkoffset; *code++ = OP_BRA; @@ -7985,9 +8078,9 @@ Arguments: errorcodeptr -> pointer to error code variable skipunits skip this many code units at start (for brackets and OP_COND) firstcuptr place to put the first required code unit - firstcuflagsptr place to put the first code unit flags, or a negative number + firstcuflagsptr place to put the first code unit flags reqcuptr place to put the last required code unit - reqcuflagsptr place to put the last required code unit flags, or a negative number + reqcuflagsptr place to put the last required code unit flags bcptr pointer to the chain of currently open branches cb points to the data block with tables pointers etc. lengthptr NULL during the real compile phase @@ -8001,7 +8094,7 @@ Returns: 0 There has been an error static int compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr, - int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr, + uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr) { PCRE2_UCHAR *code = *codeptr; @@ -8014,9 +8107,9 @@ int okreturn = 1; uint32_t *pptr = *pptrptr; uint32_t firstcu, reqcu; uint32_t lookbehindlength; -int32_t firstcuflags, reqcuflags; +uint32_t firstcuflags, reqcuflags; uint32_t branchfirstcu, branchreqcu; -int32_t branchfirstcuflags, branchreqcuflags; +uint32_t branchfirstcuflags, branchreqcuflags; PCRE2_SIZE length; branch_chain bc; @@ -8135,9 +8228,9 @@ for (;;) if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) { - if (firstcuflags >= 0) + if (firstcuflags < REQ_NONE) { - if (reqcuflags < 0) + if (reqcuflags >= REQ_NONE) { reqcu = firstcu; reqcuflags = firstcuflags; @@ -8149,8 +8242,8 @@ for (;;) /* If we (now or from before) have no firstcu, a firstcu from the branch becomes a reqcu if there isn't a branch reqcu. */ - if (firstcuflags < 0 && branchfirstcuflags >= 0 && - branchreqcuflags < 0) + if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE && + branchreqcuflags >= REQ_NONE) { branchreqcu = branchfirstcu; branchreqcuflags = branchfirstcuflags; @@ -8298,7 +8391,7 @@ Returns: TRUE or FALSE */ static BOOL -is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, +is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb, int atomcount, BOOL inassert) { do { @@ -8321,7 +8414,7 @@ do { op == OP_SCBRA || op == OP_SCBRAPOS) { int n = GET2(scode, 1+LINK_SIZE); - int new_map = bracket_map | ((n < 32)? (1u << n) : 1); + uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1); if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE; } @@ -8681,15 +8774,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags */ static uint32_t -find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert) +find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert) { uint32_t c = 0; -int cflags = REQ_NONE; +uint32_t cflags = REQ_NONE; *flags = REQ_NONE; do { uint32_t d; - int dflags; + uint32_t dflags; int xl = (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); @@ -8712,9 +8805,8 @@ do { case OP_SCRIPT_RUN: d = find_firstassertedcu(scode, &dflags, inassert + ((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0)); - if (dflags < 0) - return 0; - if (cflags < 0) { c = d; cflags = dflags; } + if (dflags >= REQ_NONE) return 0; + if (cflags >= REQ_NONE) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; break; @@ -8727,7 +8819,7 @@ do { case OP_MINPLUS: case OP_POSPLUS: if (inassert == 0) return 0; - if (cflags < 0) { c = scode[1]; cflags = 0; } + if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; } else if (c != scode[1]) return 0; break; @@ -8753,7 +8845,7 @@ do { #endif #endif - if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } + if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; } else if (c != scode[1]) return 0; break; } @@ -9689,7 +9781,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */ PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */ PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */ -int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ +uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ uint32_t firstcu, reqcu; /* Value of first/req code unit */ uint32_t setflags = 0; /* NL and BSR set flags */ @@ -10369,13 +10461,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) (these are not saved during the compile because they can cause conflicts with actual literals that follow). */ - if (firstcuflags < 0) + if (firstcuflags >= REQ_NONE) firstcu = find_firstassertedcu(codestart, &firstcuflags, 0); /* Save the data for a first code unit. The existence of one means the minimum length must be at least 1. */ - if (firstcuflags >= 0) + if (firstcuflags < REQ_NONE) { re->first_codeunit = firstcu; re->flags |= PCRE2_FIRSTSET; @@ -10422,16 +10514,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) different character and not a non-starting code unit of the first character, because the minimum length count is in characters, not code units. */ - if (reqcuflags >= 0) + if (reqcuflags < REQ_NONE) { #if PCRE2_CODE_UNIT_WIDTH == 16 if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ - firstcuflags < 0 || /* First not set */ + firstcuflags >= REQ_NONE || /* First not set */ (firstcu & 0xf800) != 0xd800 || /* First not surrogate */ (reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */ #elif PCRE2_CODE_UNIT_WIDTH == 8 if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */ - firstcuflags < 0 || /* First not set */ + firstcuflags >= REQ_NONE || /* First not set */ (firstcu & 0x80) == 0 || /* First is ASCII */ (reqcu & 0x80) == 0) /* Req is ASCII */ #endif diff --git a/thirdparty/pcre2/src/pcre2_dfa_match.c b/thirdparty/pcre2/src/pcre2_dfa_match.c index 060dc7669a1..d29130f2d0c 100644 --- a/thirdparty/pcre2/src/pcre2_dfa_match.c +++ b/thirdparty/pcre2/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -1193,6 +1193,11 @@ for (;;) OK = prop->script == code[2]; break; + case PT_SCX: + OK = (prop->script == code[2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1240,6 +1245,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[2]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1451,6 +1465,11 @@ for (;;) OK = prop->script == code[3]; break; + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1498,6 +1517,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1692,6 +1720,11 @@ for (;;) OK = prop->script == code[3]; break; + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -1739,6 +1772,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1958,6 +2000,12 @@ for (;;) OK = prop->script == code[1 + IMM2_SIZE + 2]; break; + case PT_SCX: + OK = (prop->script == code[1 + IMM2_SIZE + 2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), + code[1 + IMM2_SIZE + 2]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: @@ -2005,6 +2053,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -3285,8 +3342,15 @@ rws->next = NULL; rws->size = RWS_BASE_SIZE; rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; -/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated -subject string. */ +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; if (length == PCRE2_ZERO_TERMINATED) { @@ -3294,11 +3358,6 @@ if (length == PCRE2_ZERO_TERMINATED) was_zero_terminated = 1; } -/* Plausibility checks */ - -if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; diff --git a/thirdparty/pcre2/src/pcre2_error.c b/thirdparty/pcre2/src/pcre2_error.c index 3dee63d0dbe..09904c03e3b 100644 --- a/thirdparty/pcre2/src/pcre2_error.c +++ b/thirdparty/pcre2/src/pcre2_error.c @@ -119,7 +119,7 @@ static const unsigned char compile_error_texts[] = /* 45 */ "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" "malformed \\P or \\p sequence\0" - "unknown property name after \\P or \\p\0" + "unknown property after \\P or \\p\0" "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" /* 50 */ @@ -253,7 +253,7 @@ static const unsigned char match_error_texts[] = "unknown substring\0" /* 50 */ "non-unique substring name\0" - "NULL argument passed\0" + "NULL argument passed with non-zero length\0" "nested recursion at the same subject position\0" "matching depth limit exceeded\0" "requested value is not available\0" diff --git a/thirdparty/pcre2/src/pcre2_extuni.c b/thirdparty/pcre2/src/pcre2_extuni.c index 5a719e9cb4f..b23946b0d16 100644 --- a/thirdparty/pcre2/src/pcre2_extuni.c +++ b/thirdparty/pcre2/src/pcre2_extuni.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -105,7 +105,7 @@ while (eptr < end_subject) /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { int ricount = 0; PCRE2_SPTR bptr = eptr - 1; @@ -123,7 +123,7 @@ while (eptr < end_subject) } else c = *bptr; - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; } if ((ricount & 1) != 0) break; /* Grapheme break required */ diff --git a/thirdparty/pcre2/src/pcre2_internal.h b/thirdparty/pcre2/src/pcre2_internal.h index d8fad1e93ba..fe7a0e005a0 100644 --- a/thirdparty/pcre2/src/pcre2_internal.h +++ b/thirdparty/pcre2/src/pcre2_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -954,6 +954,13 @@ a positive value. */ #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_MARK "MARK" +#define STRING_bc "bc" +#define STRING_bidiclass "bidiclass" +#define STRING_sc "sc" +#define STRING_script "script" +#define STRING_scriptextensions "scriptextensions" +#define STRING_scx "scx" + #else /* SUPPORT_UNICODE */ /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This @@ -1248,26 +1255,39 @@ only. */ #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_MARK STR_M STR_A STR_R STR_K +#define STRING_bc STR_b STR_c +#define STRING_bidiclass STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s +#define STRING_sc STR_s STR_c +#define STRING_script STR_s STR_c STR_r STR_i STR_p STR_t +#define STRING_scriptextensions STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s +#define STRING_scx STR_s STR_c STR_x + + #endif /* SUPPORT_UNICODE */ /* -------------------- End of character and string names -------------------*/ /* -------------------- Definitions for compiled patterns -------------------*/ -/* Codes for different types of Unicode property */ +/* Codes for different types of Unicode property. If these definitions are +changed, the autopossessifying table in pcre2_auto_possess.c must be updated to +match. */ #define PT_ANY 0 /* Any property - matches all chars */ #define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ #define PT_GC 2 /* Specified general characteristic (e.g. L) */ #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ -#define PT_SC 4 /* Script (e.g. Han) */ -#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ -#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ -#define PT_WORD 8 /* Word - L plus N plus underscore */ -#define PT_CLIST 9 /* Pseudo-property: match character list */ -#define PT_UCNC 10 /* Universal Character nameable character */ -#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ +#define PT_SC 4 /* Script only (e.g. Han) */ +#define PT_SCX 5 /* Script extensions (includes SC) */ +#define PT_ALNUM 6 /* Alphanumeric - the union of L and N */ +#define PT_SPACE 7 /* Perl space - general category Z plus 9,10,12,13 */ +#define PT_PXSPACE 8 /* POSIX space - Z plus 9,10,11,12,13 */ +#define PT_WORD 9 /* Word - L plus N plus underscore */ +#define PT_CLIST 10 /* Pseudo-property: match character list */ +#define PT_UCNC 11 /* Universal Character nameable character */ +#define PT_BIDICL 12 /* Specified bidi class */ +#define PT_BOOL 13 /* Boolean property */ +#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode @@ -1275,22 +1295,27 @@ handling of these classes. They are not available via the \p or \P escapes like those in the above list, and so they do not take part in the autopossessifying table. */ -#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ -#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ -#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ +#define PT_PXGRAPH 14 /* [:graph:] - characters that mark the paper */ +#define PT_PXPRINT 15 /* [:print:] - [:graph:] plus non-control spaces */ +#define PT_PXPUNCT 16 /* [:punct:] - punctuation characters */ + +/* This value is used when parsing \p and \P escapes to indicate that neither +\p{script:...} nor \p{scx:...} has been encountered. */ + +#define PT_NOTSCRIPT 255 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ -#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ +#define XCL_NOT 0x01 /* Flag: this is a negative class */ +#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ +#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ -#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ +#define XCL_END 0 /* Marks end of individual items */ +#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 @@ -1797,8 +1822,8 @@ typedef struct { uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ uint8_t caseset; /* offset to multichar other cases or zero */ int32_t other_case; /* offset to other case, or zero if none */ - int16_t scriptx; /* script extension value */ - int16_t dummy; /* spare - to round to multiple of 4 bytes */ + uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */ + uint16_t bprops; /* binary properties offset */ } ucd_record; /* UCD access macros */ @@ -1815,13 +1840,30 @@ typedef struct { #define GET_UCD(ch) REAL_GET_UCD(ch) #endif +#define UCD_SCRIPTX_MASK 0x3ff +#define UCD_BIDICLASS_SHIFT 11 +#define UCD_BPROPS_MASK 0xfff + +#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK) +#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT) +#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK) + #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop #define UCD_CASESET(ch) GET_UCD(ch)->caseset #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) -#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx +#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) +#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch)) +#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) + +/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words +that form a bitmap representing a list of scripts or boolean properties. These +macros test or set a bit in the map by number. */ + +#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32))) +#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32))) /* Header for serialized pcre2 codes. */ @@ -1878,6 +1920,7 @@ extern const uint8_t PRIV(utf8_table4)[]; #endif #define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_) #define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_) +#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_) #define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_) #define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_) #define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_) @@ -1901,9 +1944,10 @@ extern const pcre2_match_context PRIV(default_match_context); extern const uint8_t PRIV(default_tables)[]; extern const uint32_t PRIV(hspace_list)[]; extern const uint32_t PRIV(vspace_list)[]; +extern const uint32_t PRIV(ucd_boolprop_sets)[]; extern const uint32_t PRIV(ucd_caseless_sets)[]; extern const uint32_t PRIV(ucd_digit_sets)[]; -extern const uint8_t PRIV(ucd_script_sets)[]; +extern const uint32_t PRIV(ucd_script_sets)[]; extern const ucd_record PRIV(ucd_records)[]; #if PCRE2_CODE_UNIT_WIDTH == 32 extern const ucd_record PRIV(dummy_ucd_record)[]; diff --git a/thirdparty/pcre2/src/pcre2_intmodedep.h b/thirdparty/pcre2/src/pcre2_intmodedep.h index ea3b3ec6987..f8a3d25de67 100644 --- a/thirdparty/pcre2/src/pcre2_intmodedep.h +++ b/thirdparty/pcre2/src/pcre2_intmodedep.h @@ -519,7 +519,7 @@ it is. This is called only in UTF-32 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-32 only code. -These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */ +These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */ #define BACKCHAR(eptr) do { } while (0) @@ -747,8 +747,8 @@ typedef struct compile_block { uint32_t class_range_start; /* Overall class range start */ uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ + uint32_t req_varyopt; /* "After variable item" flag for reqbyte */ int max_lookbehind; /* Maximum lookbehind (characters) */ - int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_recurse; /* Had a recursion or subroutine call */ @@ -764,7 +764,7 @@ typedef struct pcre2_real_jit_stack { } pcre2_real_jit_stack; /* Structure for items in a linked list that represents an explicit recursive -call within the pattern when running pcre_dfa_match(). */ +call within the pattern when running pcre2_dfa_match(). */ typedef struct dfa_recursion_info { struct dfa_recursion_info *prevrec; @@ -838,6 +838,17 @@ multiple of PCRE2_SIZE. See various comments above. */ typedef char check_heapframe_size[ ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; +/* Structure for computing the alignment of heapframe. */ + +typedef struct heapframe_align { + char unalign; /* Completely unalign the current offset */ + heapframe frame; /* Offset is its alignment */ +} heapframe_align; + +/* This define is the minimum alignment required for a heapframe, in bytes. */ + +#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame) + /* Structure for passing "static" information around between the functions doing traditional NFA matching (pcre2_match() and friends). */ diff --git a/thirdparty/pcre2/src/pcre2_jit_compile.c b/thirdparty/pcre2/src/pcre2_jit_compile.c index db2ce65598d..d726c3ca049 100644 --- a/thirdparty/pcre2/src/pcre2_jit_compile.c +++ b/thirdparty/pcre2/src/pcre2_jit_compile.c @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -413,6 +413,9 @@ typedef struct compiler_common { /* Locals used by fast fail optimization. */ sljit_s32 early_fail_start_ptr; sljit_s32 early_fail_end_ptr; + /* Variables used by recursive call generator. */ + sljit_s32 recurse_bitset_size; + uint8_t *recurse_bitset; /* Flipped and lower case tables. */ const sljit_u8 *fcc; @@ -613,6 +616,8 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP2U(op, src1, src1w, src2, src2w) \ + sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w)) #define OP_SRC(op, src, srcw) \ sljit_emit_op_src(compiler, (op), (src), (srcw)) #define LABEL() \ @@ -1621,7 +1626,7 @@ if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ * Skip the check of the second part. */ -if (PRIVATE_DATA(end - LINK_SIZE) == 0) +if (PRIVATE_DATA(end - LINK_SIZE) != 0) return TRUE; next = end; @@ -2315,22 +2320,47 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) #undef RECURSE_TMP_REG_COUNT -static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) +static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) +{ +uint8_t *byte; +uint8_t mask; + +SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); + +bit_index >>= SLJIT_WORD_SHIFT; + +SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size); + +mask = 1 << (bit_index & 0x7); +byte = common->recurse_bitset + (bit_index >> 3); + +if (*byte & mask) + return FALSE; + +*byte |= mask; +return TRUE; +} + +enum get_recurse_flags { + recurse_flag_quit_found = (1 << 0), + recurse_flag_accept_found = (1 << 1), + recurse_flag_setsom_found = (1 << 2), + recurse_flag_setmark_found = (1 << 3), + recurse_flag_control_head_found = (1 << 4), +}; + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags) { int length = 1; -int size; +int size, offset; PCRE2_SPTR alternative; -BOOL quit_found = FALSE; -BOOL accept_found = FALSE; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; +uint32_t recurse_flags = 0; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_flags |= recurse_flag_control_head_found; #endif /* Calculate the sum of the private machine words. */ @@ -2341,24 +2371,26 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; cc += 1; break; case OP_RECURSE: if (common->has_set_som) - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; if (common->mark_ptr != 0) - setmark_found = TRUE; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0) { - length++; + if (recurse_check_bit(common, offset)) + length++; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2377,39 +2409,55 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - length++; SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - length += 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - length += 2 + 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: /* Might be a hidden SCOND. */ alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) length++; cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 2; #ifdef SUPPORT_UNICODE @@ -2418,8 +2466,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2427,8 +2479,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2436,20 +2492,29 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1 + IMM2_SIZE; break; @@ -2461,7 +2526,9 @@ while (cc < ccend) #else size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif - if (PRIVATE_DATA(cc) != 0) + + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length += get_class_iterator_size(cc + size); cc += size; break; @@ -2471,12 +2538,11 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (!setmark_found) - setmark_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; if (common->control_head_ptr != 0) - control_head_found = TRUE; + recurse_flags |= recurse_flag_control_head_found; if (*cc != OP_MARK) - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; @@ -2484,26 +2550,24 @@ while (cc < ccend) case OP_PRUNE: case OP_SKIP: case OP_COMMIT: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc++; break; case OP_SKIP_ARG: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - quit_found = TRUE; - if (!control_head_found) - control_head_found = TRUE; + recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found; cc++; break; case OP_ACCEPT: case OP_ASSERT_ACCEPT: - accept_found = TRUE; + recurse_flags |= recurse_flag_accept_found; cc++; break; @@ -2515,21 +2579,17 @@ while (cc < ccend) } SLJIT_ASSERT(cc == ccend); -if (control_head_found) +if (recurse_flags & recurse_flag_control_head_found) length++; -if (capture_last_found) - length++; -if (quit_found) +if (recurse_flags & recurse_flag_quit_found) { - if (setsom_found) + if (recurse_flags & recurse_flag_setsom_found) length++; - if (setmark_found) + if (recurse_flags & recurse_flag_setmark_found) length++; } -*needs_control_head = control_head_found; -*has_quit = quit_found; -*has_accept = accept_found; +*result_flags = recurse_flags; return length; } @@ -2542,7 +2602,7 @@ enum copy_recurse_data_types { }; static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - int type, int stackptr, int stacktop, BOOL has_quit) + int type, int stackptr, int stacktop, uint32_t recurse_flags) { delayed_mem_copy_status status; PCRE2_SPTR alternative; @@ -2551,14 +2611,12 @@ sljit_sw shared_srcw[3]; sljit_sw kept_shared_srcw[2]; int private_count, shared_count, kept_shared_count; int from_sp, base_reg, offset, i; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_check_bit(common, common->control_head_ptr); #endif switch (type) @@ -2646,45 +2704,42 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - if (has_quit && !setsom_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } cc += 1; break; case OP_RECURSE: - if (has_quit) + if (recurse_flags & recurse_flag_quit_found) { - if (common->has_set_som && !setsom_found) + if (common->has_set_som && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } - if (common->mark_ptr != 0 && !setmark_found) + if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[kept_shared_count] = common->mark_ptr; kept_shared_count++; - setmark_found = TRUE; } } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { shared_srcw[0] = common->capture_last_ptr; shared_count = 1; - capture_last_found = TRUE; } cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0) { - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2703,50 +2758,66 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; - - if (common->capture_last_ptr != 0 && !capture_last_found) + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; } - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { - private_count = 1; - private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } + + if (common->optimized_cbracket[offset] == 0) + { + private_srcw[0] = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + } + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; - - if (common->capture_last_ptr != 0 && !capture_last_found) + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } + + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + { + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } - private_count = 2; private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + + offset = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, offset)) + { + private_srcw[private_count] = offset; + private_count++; + } cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -2755,18 +2826,17 @@ while (cc < ccend) alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; } cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2774,11 +2844,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2; #ifdef SUPPORT_UNICODE @@ -2787,11 +2858,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE @@ -2800,30 +2872,30 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1 + IMM2_SIZE; break; @@ -2837,23 +2909,28 @@ while (cc < ccend) i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif if (PRIVATE_DATA(cc) != 0) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); switch(get_class_iterator_size(cc + i)) { case 1: - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); break; case 2: - private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + if (recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } break; default: SLJIT_UNREACHABLE(); break; } + } cc += i; break; @@ -2862,28 +2939,25 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (has_quit && !setmark_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[0] = common->mark_ptr; kept_shared_count = 1; - setmark_found = TRUE; } - if (common->control_head_ptr != 0 && !control_head_found) + if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr)) { private_srcw[0] = common->control_head_ptr; private_count = 1; - control_head_found = TRUE; } cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - if (!control_head_found) + if (recurse_check_bit(common, common->control_head_ptr)) { private_srcw[0] = common->control_head_ptr; private_count = 1; - control_head_found = TRUE; } cc++; break; @@ -2891,7 +2965,7 @@ while (cc < ccend) default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); - break; + continue; } if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) @@ -3743,9 +3817,9 @@ if (common->invalid_utf) else { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); } } @@ -3982,7 +4056,7 @@ if (common->utf) { if (options & READ_CHAR_UPDATE_STR_PTR) OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); if (options & READ_CHAR_UPDATE_STR_PTR) CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); if (max >= 0xd800) @@ -4010,9 +4084,9 @@ if (common->invalid_utf) else { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); } } @@ -4167,7 +4241,7 @@ if (common->utf && negated) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); } else @@ -4250,7 +4324,7 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -4267,7 +4341,7 @@ if (common->invalid_utf && !must_be_valid) return; } - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -4332,7 +4406,7 @@ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); /* Searching for the first zero. */ -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000); @@ -4345,7 +4419,7 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); /* Three byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000); @@ -4373,7 +4447,7 @@ struct sljit_jump *compare; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); @@ -4432,7 +4506,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); @@ -4447,14 +4521,14 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000); exit_invalid[2] = NULL; } else exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); three_byte_entry = LABEL(); @@ -4462,7 +4536,7 @@ three_byte_entry = LABEL(); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800); exit_invalid[3] = NULL; } @@ -4473,7 +4547,7 @@ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); exit_invalid[4] = NULL; } @@ -4490,7 +4564,7 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0); exit_invalid[5] = NULL; } @@ -4500,7 +4574,7 @@ else OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); exit_invalid[6] = NULL; } @@ -4522,7 +4596,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); @@ -4537,7 +4611,7 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); exit_invalid[10] = NULL; } @@ -4830,7 +4904,7 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800); exit_invalid[2] = NULL; } @@ -4840,7 +4914,7 @@ else OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); exit_invalid[3] = NULL; } @@ -4865,7 +4939,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); exit_invalid[5] = NULL; } @@ -4968,7 +5042,7 @@ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); -OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); +OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -5239,7 +5313,7 @@ if (newlinecheck) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -5304,12 +5378,12 @@ else if (common->utf) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); CMOV(SLJIT_LESS, STR_PTR, TMP2, 0); } else { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -5860,7 +5934,7 @@ if (has_match_end) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); CMOV(SLJIT_GREATER, STR_END, TMP1, 0); } @@ -6063,7 +6137,7 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); CMOV(SLJIT_GREATER, STR_END, TMP1, 0); } else @@ -6200,7 +6274,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6228,7 +6302,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6293,7 +6367,7 @@ if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (common->mode != PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } } @@ -6319,7 +6393,7 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6355,7 +6429,7 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); CMOV(SLJIT_GREATER, STR_END, TMP1, 0); } @@ -6385,12 +6459,12 @@ if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &ma if (!HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0); } else { OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); } JUMPTO(SLJIT_ZERO, start); } @@ -6525,7 +6599,7 @@ jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { @@ -6600,10 +6674,10 @@ if (common->ucp) jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); JUMPHERE(jump); OP1(SLJIT_MOV, TMP3, 0, TMP2, 0); @@ -6646,10 +6720,10 @@ if (common->ucp) jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); JUMPHERE(jump); } @@ -6916,7 +6990,7 @@ j = 0; if (char_list[0] == 0) { i++; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO); } else @@ -6928,7 +7002,7 @@ while (i < len) j++; else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]); CMOV(SLJIT_ZERO, TMP2, TMP1, 0); } i++; @@ -6942,7 +7016,7 @@ if (j != 0) if ((char_list[i] & 0x100) != 0) { j--; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); CMOV(SLJIT_ZERO, TMP2, TMP1, 0); } } @@ -6971,9 +7045,9 @@ DEFINE_COMPILER; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -6981,7 +7055,7 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -6997,29 +7071,29 @@ DEFINE_COMPILER; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) { #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -7037,9 +7111,9 @@ DEFINE_COMPILER; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -7047,7 +7121,7 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -7412,6 +7486,21 @@ return cc; static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); +#ifdef SUPPORT_UNICODE +#define XCLASS_SAVE_CHAR 0x001 +#define XCLASS_CHAR_SAVED 0x002 +#define XCLASS_HAS_TYPE 0x004 +#define XCLASS_HAS_SCRIPT 0x008 +#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 +#define XCLASS_HAS_BOOL 0x020 +#define XCLASS_HAS_BIDICL 0x040 +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) +#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 + +#endif /* SUPPORT_UNICODE */ + static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; @@ -7426,8 +7515,7 @@ BOOL utf = common->utf; #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ #ifdef SUPPORT_UNICODE -BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; -BOOL charsaved = FALSE; +sljit_u32 unicode_status = 0; int typereg = TMP1; const sljit_u32 *other_cases; sljit_uw typeoffset; @@ -7454,7 +7542,7 @@ while (*cc != XCL_END) if (c > max) max = c; if (c < min) min = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } else if (*cc == XCL_RANGE) @@ -7465,7 +7553,7 @@ while (*cc != XCL_END) GETCHARINCTEST(c, cc); if (c > max) max = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } #ifdef SUPPORT_UNICODE @@ -7473,7 +7561,7 @@ while (*cc != XCL_END) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_CLIST) + if (*cc == PT_CLIST && cc[-1] == XCL_PROP) { other_cases = PRIV(ucd_caseless_sets) + cc[1]; while (*other_cases != NOTACHAR) @@ -7506,11 +7594,21 @@ while (*cc != XCL_END) case PT_GC: case PT_PC: case PT_ALNUM: - needstype = TRUE; + unicode_status |= XCLASS_HAS_TYPE; break; + case PT_SCX: + unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; + if (cc[-1] == XCL_NOTPROP) + { + unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; + break; + } + compares++; + /* Fall through */ + case PT_SC: - needsscript = TRUE; + unicode_status |= XCLASS_HAS_SCRIPT; break; case PT_SPACE: @@ -7519,13 +7617,20 @@ while (*cc != XCL_END) case PT_PXGRAPH: case PT_PXPRINT: case PT_PXPUNCT: - needstype = TRUE; - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; break; case PT_CLIST: case PT_UCNC: - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; + break; + + case PT_BOOL: + unicode_status |= XCLASS_HAS_BOOL; + break; + + case PT_BIDICL: + unicode_status |= XCLASS_HAS_BIDICL; break; default: @@ -7545,7 +7650,7 @@ if ((cc[-1] & XCL_NOT) != 0) else { #ifdef SUPPORT_UNICODE - read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0); + read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); #else /* !SUPPORT_UNICODE */ read_char(common, min, max, NULL, 0); #endif /* SUPPORT_UNICODE */ @@ -7562,7 +7667,7 @@ if ((cc[-1] & XCL_HASPROP) == 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); } @@ -7581,7 +7686,7 @@ else if ((cc[-1] & XCL_MAP) != 0) { OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #ifdef SUPPORT_UNICODE - charsaved = TRUE; + unicode_status |= XCLASS_CHAR_SAVED; #endif /* SUPPORT_UNICODE */ if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) { @@ -7595,7 +7700,7 @@ else if ((cc[-1] & XCL_MAP) != 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -7609,9 +7714,9 @@ else if ((cc[-1] & XCL_MAP) != 0) } #ifdef SUPPORT_UNICODE -if (needstype || needsscript) +if (unicode_status & XCLASS_NEEDS_UCD) { - if (needschar && !charsaved) + if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 @@ -7631,17 +7736,16 @@ if (needstype || needsscript) OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - /* Before anything else, we deal with scripts. */ - if (needsscript) + ccbegin = cc; + + if (unicode_status & XCLASS_HAS_BIDICL) { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - - ccbegin = cc; + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); while (*cc != XCL_END) { @@ -7660,7 +7764,7 @@ if (needstype || needsscript) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_SC) + if (*cc == PT_BIDICL) { compares--; invertcmp = (compares == 0 && list != backtracks); @@ -7674,52 +7778,176 @@ if (needstype || needsscript) } cc = ccbegin; + } - if (needstype) + if (unicode_status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + while (*cc != XCL_END) { - /* TMP2 has already been shifted by 2 */ - if (!needschar) + if (*cc == XCL_SINGLE) { - OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); } else { - OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - typereg = RETURN_ADDR; + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; } } - else if (needschar) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + cc = ccbegin; } - else if (needstype) + + if (unicode_status & XCLASS_HAS_SCRIPT) { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - if (!needschar) + while (*cc != XCL_END) { - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + switch (*cc) + { + case PT_SCX: + if (cc[-1] == XCL_NOTPROP) + break; + /* Fall through */ - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - } - else - { - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + case PT_SC: + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - typereg = RETURN_ADDR; + add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); + } + cc += 2; + } } + + cc = ccbegin; } - else if (needschar) + + if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) + { + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; + } + else + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; + } + } + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + } + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SCX) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + + jump = NULL; + if (cc[-1] == XCL_NOTPROP) + { + jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); + if (invertcmp) + { + add_jump(compiler, backtracks, jump); + jump = NULL; + } + invertcmp ^= 0x1; + } + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + + if (jump != NULL) + JUMPHERE(jump); + } + cc += 2; + } + } + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + cc = ccbegin; + } + + if (unicode_status & XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + typereg = RETURN_ADDR; + + OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + } } #endif /* SUPPORT_UNICODE */ @@ -7743,13 +7971,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7769,13 +7997,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7801,11 +8029,11 @@ while (*cc != XCL_END) break; case PT_LAMP: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); + OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); + OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); + OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -7821,6 +8049,9 @@ while (*cc != XCL_END) break; case PT_SC: + case PT_SCX: + case PT_BOOL: + case PT_BIDICL: compares++; /* Do nothing. */ break; @@ -7828,32 +8059,32 @@ while (*cc != XCL_END) case PT_SPACE: case PT_PXSPACE: SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); SET_TYPE_OFFSET(ucp_Zl); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_WORD: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); /* Fall through. */ case PT_ALNUM: SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); SET_TYPE_OFFSET(ucp_Nd); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -7876,7 +8107,7 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); other_cases += 2; } @@ -7889,41 +8120,41 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); other_cases += 3; } else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); } while (*other_cases != NOTACHAR) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); } jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_UCNC: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); SET_CHAR_OFFSET(0xa0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -7931,20 +8162,20 @@ while (*cc != XCL_END) case PT_PXGRAPH: /* C and Z groups are the farthest two groups. */ SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); JUMPHERE(jump); @@ -7954,20 +8185,20 @@ while (*cc != XCL_END) case PT_PXPRINT: /* C and Z groups are the farthest two groups. */ SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL); jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); JUMPHERE(jump); @@ -7976,15 +8207,15 @@ while (*cc != XCL_END) case PT_PXPUNCT: SET_TYPE_OFFSET(ucp_Sc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); SET_TYPE_OFFSET(ucp_Pc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -8069,9 +8300,9 @@ switch(type) else { jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); check_partial(common, TRUE); @@ -8094,7 +8325,7 @@ switch(type) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_GREATER); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); /* Equal. */ @@ -8137,10 +8368,10 @@ switch(type) if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } else - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) @@ -8157,10 +8388,10 @@ switch(type) if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } else - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); @@ -8200,14 +8431,14 @@ switch(type) OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } return cc; @@ -8219,13 +8450,13 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } else { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); jump[0] = JUMP(SLJIT_JUMP); @@ -8319,7 +8550,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8331,7 +8562,7 @@ do BACKCHAR(bptr); GETCHAR(c, bptr); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8387,7 +8618,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8397,7 +8628,7 @@ do { GETCHARBACK_INVALID(c, bptr, start_subject, break); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8455,7 +8686,7 @@ while (cc < end_subject) /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = cc - 1; @@ -8470,7 +8701,7 @@ while (cc < end_subject) break; #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; } @@ -8520,7 +8751,7 @@ switch(type) #endif read_char8_type(common, backtracks, type == OP_NOT_DIGIT); /* Flip the starting bit in the negative case. */ - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8534,7 +8765,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8548,7 +8779,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8596,7 +8827,7 @@ switch(type) #elif PCRE2_CODE_UNIT_WIDTH == 16 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -8690,13 +8921,13 @@ switch(type) OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); #if PCRE2_CODE_UNIT_WIDTH != 32 - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); if (common->invalid_utf) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); if (!common->utf || common->invalid_utf) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #endif @@ -8758,7 +8989,7 @@ switch(type) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); } @@ -8878,7 +9109,7 @@ switch(type) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 @@ -9116,7 +9347,7 @@ if (common->utf && *cc == OP_REFI) caseless_loop = LABEL(); OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0); JUMPTO(SLJIT_EQUAL, loop); JUMPTO(SLJIT_LESS, caseless_loop); @@ -9575,12 +9806,12 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, callout_arg_size); /* Check return value. */ -OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); if (common->abort_label == NULL) add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); @@ -10148,10 +10379,10 @@ SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); #ifdef SUPPORT_UNICODE -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run)); #else -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run)); #endif OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); @@ -11374,7 +11605,7 @@ switch(opcode) if (common->mode == PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -11667,7 +11898,7 @@ switch(opcode) if (common->mode == PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -11751,9 +11982,9 @@ if (HAS_VIRTUAL_REGISTERS) else OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); if (common->accept_label == NULL) add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO)); else @@ -13004,7 +13235,7 @@ if (opcode == OP_SKIP_ARG) SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); @@ -13248,10 +13479,8 @@ DEFINE_COMPILER; PCRE2_SPTR cc = common->start + common->currententry->start; PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); -BOOL needs_control_head; -BOOL has_quit; -BOOL has_accept; -int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept); +uint32_t recurse_flags = 0; +int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags); int alt_count, alt_max, local_size; backtrack_common altbacktrack; jump_list *match = NULL; @@ -13285,12 +13514,12 @@ allocate_stack(common, private_data_size + local_size); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags); /* This variable is saved and restored all time when we enter or exit from a recursive context. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); -if (needs_control_head) +if (recurse_flags & recurse_flag_control_head_found) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); if (alt_max > 1) @@ -13315,10 +13544,10 @@ while (1) if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1); + allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - if (alt_max > 1 || has_accept) + if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) { if (alt_max > 3) put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); @@ -13337,14 +13566,14 @@ while (1) sljit_emit_fast_enter(compiler, TMP1, 0); - if (has_accept) + if (recurse_flags & recurse_flag_accept_found) accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); if (alt_max > 1) { @@ -13361,7 +13590,7 @@ while (1) next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } else - free_stack(common, has_accept ? 2 : 1); + free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1); } else if (alt_max > 3) { @@ -13396,7 +13625,7 @@ while (1) quit = LABEL(); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); @@ -13405,15 +13634,15 @@ OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); if (common->quit != NULL) { - SLJIT_ASSERT(has_quit); + SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found); set_jumps(common->quit, LABEL()); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags); JUMPTO(SLJIT_JUMP, quit); } -if (has_accept) +if (recurse_flags & recurse_flag_accept_found) { JUMPHERE(accept_exit); free_stack(common, 2); @@ -13421,7 +13650,7 @@ if (has_accept) /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); @@ -13431,7 +13660,7 @@ if (has_accept) if (common->accept != NULL) { - SLJIT_ASSERT(has_accept); + SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found); set_jumps(common->accept, LABEL()); @@ -13446,7 +13675,7 @@ set_jumps(match, LABEL()); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); @@ -13652,7 +13881,7 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); total_length = ccend - common->start; -common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); if (!common->private_data_ptrs) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13692,8 +13921,9 @@ if (!compiler) } common->compiler = compiler; -/* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); +/* Main pcre2_jit_exec entry. */ +SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -13900,9 +14130,9 @@ if (common->might_be_empty) JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); JUMPTO(SLJIT_ZERO, empty_match_found_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); @@ -13915,20 +14145,40 @@ common->early_fail_end_ptr = 0; common->currententry = common->entries; common->local_quit_available = TRUE; quit_label = common->quit_label; -while (common->currententry != NULL) +if (common->currententry != NULL) { - /* Might add new entries. */ - compile_recurse(common); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + /* A free bit for each private data. */ + common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3; + SLJIT_ASSERT(common->recurse_bitset_size > 0); + common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);; + + if (common->recurse_bitset != NULL) { + do + { + /* Might add new entries. */ + compile_recurse(common); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + break; + flush_stubs(common); + common->currententry = common->currententry->next; + } + while (common->currententry != NULL); + + SLJIT_FREE(common->recurse_bitset, allocator_data); + } + + if (common->currententry != NULL) + { + /* The common->recurse_bitset has been freed. */ + SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); + sljit_free_compiler(compiler); SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } - flush_stubs(common); - common->currententry = common->currententry->next; } common->local_quit_available = FALSE; common->quit_label = quit_label; @@ -13947,7 +14197,7 @@ OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize)); jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); diff --git a/thirdparty/pcre2/src/pcre2_jit_match.c b/thirdparty/pcre2/src/pcre2_jit_match.c index 7e13b8cfeed..1ab3af073e6 100644 --- a/thirdparty/pcre2/src/pcre2_jit_match.c +++ b/thirdparty/pcre2/src/pcre2_jit_match.c @@ -120,7 +120,7 @@ else if ((options & PCRE2_PARTIAL_SOFT) != 0) if (functions == NULL || functions->executable_funcs[index] == NULL) return PCRE2_ERROR_JIT_BADOPTION; -/* Sanity checks should be handled by pcre_exec. */ +/* Sanity checks should be handled by pcre2_match. */ arguments.str = subject + start_offset; arguments.begin = subject; arguments.end = subject + length; diff --git a/thirdparty/pcre2/src/pcre2_jit_misc.c b/thirdparty/pcre2/src/pcre2_jit_misc.c index ec924e0f9b5..e57afad065b 100644 --- a/thirdparty/pcre2/src/pcre2_jit_misc.c +++ b/thirdparty/pcre2/src/pcre2_jit_misc.c @@ -135,7 +135,7 @@ return NULL; pcre2_jit_stack *jit_stack; -if (startsize < 1 || maxsize < 1) +if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE) return NULL; if (startsize > maxsize) startsize = maxsize; diff --git a/thirdparty/pcre2/src/pcre2_jit_simd_inc.h b/thirdparty/pcre2/src/pcre2_jit_simd_inc.h index aa029cce38f..d99cfc5ce46 100644 --- a/thirdparty/pcre2/src/pcre2_jit_simd_inc.h +++ b/thirdparty/pcre2/src/pcre2_jit_simd_inc.h @@ -339,7 +339,7 @@ if (common->mode != PCRE2_JIT_COMPLETE) { JUMPHERE(partial_quit[0]); JUMPHERE(partial_quit[1]); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -537,7 +537,7 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); CMOV(SLJIT_LESS, STR_END, TMP1, 0); } @@ -883,14 +883,14 @@ if (char1 == char2) #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); #endif } else @@ -904,14 +904,14 @@ else #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); #endif } else @@ -922,14 +922,14 @@ else #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf && offset > 0) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf)); else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); #endif } } @@ -1067,7 +1067,7 @@ else OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0); CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); } @@ -1084,31 +1084,31 @@ if (diff == 1) { if (char1a == char1b && char2a == char2b) { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0)); } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1)); } } else { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf)); else #endif - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), - SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), + SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default)); } /* Restore STR_PTR register. */ @@ -1418,7 +1418,7 @@ if (common->mode != PCRE2_JIT_COMPLETE) { JUMPHERE(partial_quit[0]); JUMPHERE(partial_quit[1]); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); } else @@ -1673,7 +1673,7 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); CMOV(SLJIT_LESS, STR_END, TMP1, 0); } diff --git a/thirdparty/pcre2/src/pcre2_match.c b/thirdparty/pcre2/src/pcre2_match.c index f28cdbb47a5..6354e1bb9e2 100644 --- a/thirdparty/pcre2/src/pcre2_match.c +++ b/thirdparty/pcre2/src/pcre2_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015-2021 University of Cambridge + New API code Copyright (c) 2015-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ POSSIBILITY OF SUCH DAMAGE. /* #define DEBUG_SHOW_OPS */ /* #define DEBUG_SHOW_RMATCH */ -#ifdef DEBUG_FRAME_DISPLAY +#ifdef DEBUG_FRAMES_DISPLAY #include #endif @@ -159,7 +159,8 @@ enum { RM100=100, RM101 }; #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, - RM216, RM217, RM218, RM219, RM220, RM221, RM222 }; + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224, RM225 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -2421,40 +2422,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { const uint32_t *cp; const ucd_record *prop = GET_UCD(fc); + BOOL notmatch = Fop == OP_NOTPROP; switch(Fecode[1]) { case PT_ANY: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; case PT_LAMP: if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP)) + prop->chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_GC: - if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP)) + if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_PC: - if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP)) + if ((Fecode[2] == prop->chartype) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_SC: - if ((Fecode[2] != prop->script) == (Fop == OP_PROP)) + if ((Fecode[2] == prop->script) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_SCX: + { + BOOL ok = (Fecode[2] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* These are specials */ case PT_ALNUM: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP)) + PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch) RRETURN(MATCH_NOMATCH); break; @@ -2468,12 +2478,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; default: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == - (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); break; } break; @@ -2481,7 +2491,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); case PT_WORD: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || PRIV(ucp_gentype)[prop->chartype] == ucp_N || - fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP)) + fc == CHAR_UNDERSCORE) == notmatch) RRETURN(MATCH_NOMATCH); break; @@ -2490,19 +2500,32 @@ fprintf(stderr, "++ op=%d\n", *Fecode); for (;;) { if (fc < *cp) - { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } + { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } } if (fc == *cp++) - { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } + { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; } } break; case PT_UCNC: if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Fop == OP_NOTPROP)) + fc >= 0xe000) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BOOL: + { + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Fecode[2]) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* This should never occur */ default: @@ -2616,18 +2639,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* First, ensure the minimum number of matches are present. Use inline code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). The code for UTF mode is separated out for - tidiness, except for Unicode property tests. */ + (i.e. keep it out of the loops). As there are no calls to RMATCH in the + loops, we can use an ordinary variable for "notmatch". The code for UTF + mode is separated out for tidiness, except for Unicode property tests. */ if (Lmin > 0) { #ifdef SUPPORT_UNICODE if (proptype >= 0) /* Property tests in all modes */ { + BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { case PT_ANY: - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); for (i = 1; i <= Lmin; i++) { if (Feptr >= mb->end_subject) @@ -2652,7 +2677,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); chartype = UCD_CHARTYPE(fc); if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2666,7 +2691,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2680,7 +2705,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2694,7 +2719,26 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SCX: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2710,7 +2754,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARINCTEST(fc, Feptr); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2733,11 +2777,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; default: - if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) RRETURN(MATCH_NOMATCH); break; } @@ -2756,7 +2800,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARINCTEST(fc, Feptr); category = UCD_CATEGORY(fc); if ((category == ucp_L || category == ucp_N || - fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP)) + fc == CHAR_UNDERSCORE) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2776,12 +2820,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { if (fc < *cp) { - if (Lctype == OP_NOTPROP) break; + if (notmatch) break; RRETURN(MATCH_NOMATCH); } if (fc == *cp++) { - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; } } @@ -2799,7 +2843,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARINCTEST(fc, Feptr); if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Lctype == OP_NOTPROP)) + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BIDICL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BOOL: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -3343,7 +3420,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (Lmin == Lmax) continue; /* If minimizing, we have to test the rest of the pattern before each - subsequent match. */ + subsequent match. This means we cannot use a local "notmatch" variable as + in the other cases. As all 4 temporary 32-bit values in the frame are + already in use, just test the type each time. */ if (reptype == REPTYPE_MIN) { @@ -3440,6 +3519,28 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* Control never gets here */ + case PT_SCX: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM225); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + case PT_ALNUM: for (;;) { @@ -3454,8 +3555,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARINCTEST(fc, Feptr); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == - (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ @@ -3562,6 +3662,45 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* Control never gets here */ + case PT_BIDICL: + for (;;) + { + RMATCH(Fecode, RM224); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_BOOL: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM223); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + /* This should never occur */ default: return PCRE2_ERROR_INTERNAL; @@ -3870,7 +4009,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* If maximizing, it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). */ + test once at the start (i.e. keep it out of the loops). Once again, + "notmatch" can be an ordinary local variable because the loops do not call + RMATCH. */ else { @@ -3879,6 +4020,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); #ifdef SUPPORT_UNICODE if (proptype >= 0) { + BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { case PT_ANY: @@ -3891,7 +4033,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if (Lctype == OP_NOTPROP) break; + if (notmatch) break; Feptr+= len; } break; @@ -3910,7 +4052,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); chartype = UCD_CHARTYPE(fc); if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + chartype == ucp_Lt) == notmatch) break; Feptr+= len; } @@ -3926,8 +4068,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) - break; + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break; Feptr+= len; } break; @@ -3942,8 +4083,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) - break; + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break; Feptr+= len; } break; @@ -3958,8 +4098,27 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SCX: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) break; Feptr+= len; } break; @@ -3976,8 +4135,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARLENTEST(fc, Feptr, len); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == - (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == notmatch) break; Feptr+= len; } @@ -4002,11 +4160,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Lctype == OP_NOTPROP) goto ENDLOOP99; /* Break the loop */ + if (notmatch) goto ENDLOOP99; /* Break the loop */ break; default: - if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) goto ENDLOOP99; /* Break the loop */ break; } @@ -4028,7 +4186,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARLENTEST(fc, Feptr, len); category = UCD_CATEGORY(fc); if ((category == ucp_L || category == ucp_N || - fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP)) + fc == CHAR_UNDERSCORE) == notmatch) break; Feptr+= len; } @@ -4049,9 +4207,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode); for (;;) { if (fc < *cp) - { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; } + { if (notmatch) break; else goto GOT_MAX; } if (fc == *cp++) - { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; } + { if (notmatch) goto GOT_MAX; else break; } } Feptr += len; } @@ -4070,12 +4228,47 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARLENTEST(fc, Feptr, len); if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Lctype == OP_NOTPROP)) + fc >= 0xe000) == notmatch) break; Feptr += len; } break; + case PT_BIDICL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_BOOL: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) break; + Feptr+= len; + } + break; + default: return PCRE2_ERROR_INTERNAL; } @@ -6066,7 +6259,7 @@ switch (Freturn_id) LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) - LBL(221) LBL(222) + LBL(221) LBL(222) LBL(223) LBL(224) LBL(225) #endif default: @@ -6129,8 +6322,8 @@ PCRE2_UCHAR req_cu2 = 0; PCRE2_SPTR bumpalong_limit; PCRE2_SPTR end_subject; PCRE2_SPTR true_end_subject; -PCRE2_SPTR start_match = subject + start_offset; -PCRE2_SPTR req_cu_ptr = start_match - 1; +PCRE2_SPTR start_match; +PCRE2_SPTR req_cu_ptr; PCRE2_SPTR start_partial; PCRE2_SPTR match_partial; @@ -6170,9 +6363,18 @@ PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)] PCRE2_KEEP_UNINITIALIZED; mb->stack_frames = (heapframe *)stack_frames_vector; -/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated -subject string. */ +/* Recognize NULL, length 0 as an empty string. */ +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; + +start_match = subject + start_offset; +req_cu_ptr = start_match - 1; if (length == PCRE2_ZERO_TERMINATED) { length = PRIV(strlen)(subject); @@ -6180,11 +6382,6 @@ if (length == PCRE2_ZERO_TERMINATED) } true_end_subject = end_subject = subject + length; -/* Plausibility checks */ - -if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (code == NULL || subject == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; /* Check that the first field in the block is the magic number. */ @@ -6482,7 +6679,7 @@ if (utf && /* If the end precedes start_match, it means there is invalid UTF in the extra code units we reversed over because of a lookbehind. Advance past the first bad code unit, and then skip invalid character starting code units in - 8-bit and 16-bit modes, and try again. */ + 8-bit and 16-bit modes, and try again with the original end point. */ if (end_subject < start_match) { @@ -6491,6 +6688,7 @@ if (utf && while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject)) mb->check_subject++; #endif + end_subject = true_end_subject; } /* Otherwise, set the not end of line option, and do the match. */ @@ -6601,10 +6799,16 @@ the pattern. It is not used at all if there are no capturing parentheses. The last of these is changed within the match() function if the frame vector has to be expanded. We therefore put it into the match block so that it is -correct when calling match() more than once for non-anchored patterns. */ +correct when calling match() more than once for non-anchored patterns. -frame_size = offsetof(heapframe, ovector) + - re->top_bracket * 2 * sizeof(PCRE2_SIZE); +We must also pad frame_size for alignment to ensure subsequent frames are as +aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE +array, that does not guarantee it is suitably aligned for pointers, as some +architectures have pointers that are larger than a size_t. */ + +frame_size = (offsetof(heapframe, ovector) + + re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) & + ~(HEAPFRAME_ALIGNMENT - 1); /* Limits set in the pattern override the match context only if they are smaller. */ @@ -6648,7 +6852,7 @@ mb->match_frames_top = to avoid uninitialized memory read errors when it is copied to a new frame. */ memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff, - re->top_bracket * 2 * sizeof(PCRE2_SIZE)); + frame_size - offsetof(heapframe, ovector)); /* Pointers to the individual character tables */ diff --git a/thirdparty/pcre2/src/pcre2_script_run.c b/thirdparty/pcre2/src/pcre2_script_run.c index 91a48330283..4926fa63bbf 100644 --- a/thirdparty/pcre2/src/pcre2_script_run.c +++ b/thirdparty/pcre2/src/pcre2_script_run.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -68,26 +68,26 @@ Arguments: Returns: TRUE if this is a valid script run */ -/* These dummy values must be less than the negation of the largest offset in -the PRIV(ucd_script_sets) vector, which is held in a 16-bit field in UCD -records (and is only likely to be a few hundred). */ +/* These are states in the checking process. */ -#define SCRIPT_UNSET (-99999) -#define SCRIPT_HANPENDING (-99998) -#define SCRIPT_HANHIRAKATA (-99997) -#define SCRIPT_HANBOPOMOFO (-99996) -#define SCRIPT_HANHANGUL (-99995) -#define SCRIPT_LIST (-99994) +enum { SCRIPT_UNSET, /* Requirement as yet unknown */ + SCRIPT_MAP, /* Bitmap contains acceptable scripts */ + SCRIPT_HANPENDING, /* Have had only Han characters */ + SCRIPT_HANHIRAKATA, /* Expect Han or Hirikata */ + SCRIPT_HANBOPOMOFO, /* Expect Han or Bopomofo */ + SCRIPT_HANHANGUL /* Expect Han or Hangul */ + }; -#define INTERSECTION_LIST_SIZE 50 +#define UCD_MAPSIZE (ucp_Unknown/32 + 1) +#define FULL_MAPSIZE (ucp_Script_Count/32 + 1) BOOL PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf) { #ifdef SUPPORT_UNICODE -int require_script = SCRIPT_UNSET; -uint8_t intersection_list[INTERSECTION_LIST_SIZE]; -const uint8_t *require_list = NULL; +uint32_t require_state = SCRIPT_UNSET; +uint32_t require_map[FULL_MAPSIZE]; +uint32_t map[FULL_MAPSIZE]; uint32_t require_digitset = 0; uint32_t c; @@ -101,11 +101,17 @@ if (ptr >= endptr) return TRUE; GETCHARINCTEST(c, ptr); if (ptr >= endptr) return TRUE; +/* Initialize the require map. This is a full-size bitmap that has a bit for +every script, as opposed to the maps in ucd_script_sets, which only have bits +for scripts less than ucp_Unknown - those that appear in script extension +lists. */ + +for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0; + /* Scan strings of two or more characters, checking the Unicode characteristics -of each code point. We make use of the Script Extensions property. There is -special code for scripts that can be combined with characters from the Han -Chinese script. This may be used in conjunction with four other scripts in -these combinations: +of each code point. There is special code for scripts that can be combined with +characters from the Han Chinese script. This may be used in conjunction with +four other scripts in these combinations: . Han with Hiragana and Katakana is allowed (for Japanese). . Han with Bopomofo is allowed (for Taiwanese Mandarin). @@ -119,310 +125,207 @@ Hence the SCRIPT_HANPENDING state. */ for (;;) { const ucd_record *ucd = GET_UCD(c); - int32_t scriptx = ucd->scriptx; + uint32_t script = ucd->script; - /* If the script extension is Unknown, the string is not a valid script run. - Such characters can only form script runs of length one. */ + /* If the script is Unknown, the string is not a valid script run. Such + characters can only form script runs of length one (see test above). */ - if (scriptx == ucp_Unknown) return FALSE; + if (script == ucp_Unknown) return FALSE; - /* A character whose script extension is Inherited is always accepted with - any script, and plays no further part in this testing. A character whose - script is Common is always accepted, but must still be tested for a digit - below. The scriptx value at this point is non-zero, because zero is - ucp_Unknown, tested for above. */ + /* A character without any script extensions whose script is Inherited or + Common is always accepted with any script. If there are extensions, the + following processing happens for all scripts. */ - if (scriptx != ucp_Inherited) + if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common)) { - if (scriptx != ucp_Common) + BOOL OK; + + /* Set up a full-sized map for this character that can include bits for all + scripts. Copy the scriptx map for this character (which covers those + scripts that appear in script extension lists), set the remaining values to + zero, and then, except for Common or Inherited, add this script's bit to + the map. */ + + memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t)); + memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t)); + if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script); + + /* Handle the different checking states */ + + switch(require_state) { - /* If the script extension value is positive, the character is not a mark - that can be used with many scripts. In the simple case we either set or - compare with the required script. However, handling the scripts that can - combine with Han are more complicated, as is the case when the previous - characters have been man-script marks. */ + /* First significant character - it might follow Common or Inherited + characters that do not have any script extensions. */ - if (scriptx > 0) + case SCRIPT_UNSET: + switch(script) { - switch(require_script) - { - /* Either the first significant character (require_script unset) or - after only Han characters. */ + case ucp_Han: + require_state = SCRIPT_HANPENDING; + break; - case SCRIPT_UNSET: - case SCRIPT_HANPENDING: - switch(scriptx) - { - case ucp_Han: - require_script = SCRIPT_HANPENDING; - break; + case ucp_Hiragana: + case ucp_Katakana: + require_state = SCRIPT_HANHIRAKATA; + break; - case ucp_Hiragana: - case ucp_Katakana: - require_script = SCRIPT_HANHIRAKATA; - break; + case ucp_Bopomofo: + require_state = SCRIPT_HANBOPOMOFO; + break; - case ucp_Bopomofo: - require_script = SCRIPT_HANBOPOMOFO; - break; + case ucp_Hangul: + require_state = SCRIPT_HANHANGUL; + break; - case ucp_Hangul: - require_script = SCRIPT_HANHANGUL; - break; + default: + memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t)); + require_state = SCRIPT_MAP; + break; + } + break; - /* Not a Han-related script. If expecting one, fail. Otherise set - the requirement to this script. */ + /* The first significant character was Han. An inspection of the Unicode + 11.0.0 files shows that there are the following types of Script Extension + list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul + scripts: - default: - if (require_script == SCRIPT_HANPENDING) return FALSE; - require_script = scriptx; - break; - } - break; + . Bopomofo + Han + . Han + Hiragana + Katakana + . Hiragana + Katakana + . Bopopmofo + Hangul + Han + Hiragana + Katakana - /* Previously encountered one of the "with Han" scripts. Check that - this character is appropriate. */ - - case SCRIPT_HANHIRAKATA: - if (scriptx != ucp_Han && scriptx != ucp_Hiragana && - scriptx != ucp_Katakana) - return FALSE; - break; - - case SCRIPT_HANBOPOMOFO: - if (scriptx != ucp_Han && scriptx != ucp_Bopomofo) return FALSE; - break; - - case SCRIPT_HANHANGUL: - if (scriptx != ucp_Han && scriptx != ucp_Hangul) return FALSE; - break; - - /* We have a list of scripts to check that is derived from one or - more previous characters. This is either one of the lists in - ucd_script_sets[] (for one previous character) or the intersection of - several lists for multiple characters. */ - - case SCRIPT_LIST: - { - const uint8_t *list; - for (list = require_list; *list != 0; list++) - { - if (*list == scriptx) break; - } - if (*list == 0) return FALSE; - } - - /* The rest of the string must be in this script, but we have to - allow for the Han complications. */ - - switch(scriptx) - { - case ucp_Han: - require_script = SCRIPT_HANPENDING; - break; - - case ucp_Hiragana: - case ucp_Katakana: - require_script = SCRIPT_HANHIRAKATA; - break; - - case ucp_Bopomofo: - require_script = SCRIPT_HANBOPOMOFO; - break; - - case ucp_Hangul: - require_script = SCRIPT_HANHANGUL; - break; - - default: - require_script = scriptx; - break; - } - break; - - /* This is the easy case when a single script is required. */ - - default: - if (scriptx != require_script) return FALSE; - break; - } - } /* End of handing positive scriptx */ - - /* If scriptx is negative, this character is a mark-type character that - has a list of permitted scripts. */ - - else - { - uint32_t chspecial; - const uint8_t *clist, *rlist; - const uint8_t *list = PRIV(ucd_script_sets) - scriptx; - - switch(require_script) - { - case SCRIPT_UNSET: - require_list = PRIV(ucd_script_sets) - scriptx; - require_script = SCRIPT_LIST; - break; - - /* An inspection of the Unicode 11.0.0 files shows that there are the - following types of Script Extension list that involve the Han, - Bopomofo, Hiragana, Katakana, and Hangul scripts: - - . Bopomofo + Han - . Han + Hiragana + Katakana - . Hiragana + Katakana - . Bopopmofo + Hangul + Han + Hiragana + Katakana - - The following code tries to make sense of this. */ + The following code tries to make sense of this. */ #define FOUND_BOPOMOFO 1 #define FOUND_HIRAGANA 2 #define FOUND_KATAKANA 4 #define FOUND_HANGUL 8 - case SCRIPT_HANPENDING: - chspecial = 0; - for (; *list != 0; list++) - { - switch (*list) - { - case ucp_Bopomofo: chspecial |= FOUND_BOPOMOFO; break; - case ucp_Hiragana: chspecial |= FOUND_HIRAGANA; break; - case ucp_Katakana: chspecial |= FOUND_KATAKANA; break; - case ucp_Hangul: chspecial |= FOUND_HANGUL; break; - default: break; - } - } - - if (chspecial == 0) return FALSE; - - if (chspecial == FOUND_BOPOMOFO) - { - require_script = SCRIPT_HANBOPOMOFO; - } - else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA)) - { - require_script = SCRIPT_HANHIRAKATA; - } - - /* Otherwise it must be allowed with all of them, so remain in - the pending state. */ - - break; - - case SCRIPT_HANHIRAKATA: - for (; *list != 0; list++) - { - if (*list == ucp_Hiragana || *list == ucp_Katakana) break; - } - if (*list == 0) return FALSE; - break; - - case SCRIPT_HANBOPOMOFO: - for (; *list != 0; list++) - { - if (*list == ucp_Bopomofo) break; - } - if (*list == 0) return FALSE; - break; - - case SCRIPT_HANHANGUL: - for (; *list != 0; list++) - { - if (*list == ucp_Hangul) break; - } - if (*list == 0) return FALSE; - break; - - /* Previously encountered one or more characters that are allowed - with a list of scripts. Build the intersection of the required list - with this character's list in intersection_list[]. This code is - written so that it still works OK if the required list is already in - that vector. */ - - case SCRIPT_LIST: - { - int i = 0; - for (rlist = require_list; *rlist != 0; rlist++) - { - for (clist = list; *clist != 0; clist++) - { - if (*rlist == *clist) - { - intersection_list[i++] = *rlist; - break; - } - } - } - if (i == 0) return FALSE; /* No scripts in common */ - - /* If there's just one script in common, we can set it as the - unique required script. Otherwise, terminate the intersection list - and make it the required list. */ - - if (i == 1) - { - require_script = intersection_list[0]; - } - else - { - intersection_list[i] = 0; - require_list = intersection_list; - } - } - break; - - /* The previously set required script is a single script, not - Han-related. Check that it is in this character's list. */ - - default: - for (; *list != 0; list++) - { - if (*list == require_script) break; - } - if (*list == 0) return FALSE; - break; - } - } /* End of handling negative scriptx */ - } /* End of checking non-Common character */ - - /* The character is in an acceptable script. We must now ensure that all - decimal digits in the string come from the same set. Some scripts (e.g. - Common, Arabic) have more than one set of decimal digits. This code does - not allow mixing sets, even within the same script. The vector called - PRIV(ucd_digit_sets)[] contains, in its first element, the number of - following elements, and then, in ascending order, the code points of the - '9' characters in every set of 10 digits. Each set is identified by the - offset in the vector of its '9' character. An initial check of the first - value picks up ASCII digits quickly. Otherwise, a binary chop is used. */ - - if (ucd->chartype == ucp_Nd) - { - uint32_t digitset; - - if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else + case SCRIPT_HANPENDING: + if (script != ucp_Han) /* Another Han does nothing */ { - int mid; - int bot = 1; - int top = PRIV(ucd_digit_sets)[0]; - for (;;) + uint32_t chspecial = 0; + + if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO; + if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA; + if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA; + if (MAPBIT(map, ucp_Hangul) != 0) chspecial |= FOUND_HANGUL; + + if (chspecial == 0) return FALSE; /* Not allowed with Han */ + + if (chspecial == FOUND_BOPOMOFO) + require_state = SCRIPT_HANBOPOMOFO; + else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA)) + require_state = SCRIPT_HANHIRAKATA; + + /* Otherwise this character must be allowed with all of them, so remain + in the pending state. */ + } + break; + + /* Previously encountered one of the "with Han" scripts. Check that + this character is appropriate. */ + + case SCRIPT_HANHIRAKATA: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) + + MAPBIT(map, ucp_Katakana) == 0) return FALSE; + break; + + case SCRIPT_HANBOPOMOFO: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE; + break; + + case SCRIPT_HANHANGUL: + if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE; + break; + + /* Previously encountered one or more characters that are allowed with a + list of scripts. */ + + case SCRIPT_MAP: + OK = FALSE; + + for (int i = 0; i < FULL_MAPSIZE; i++) + { + if ((require_map[i] & map[i]) != 0) { - if (top <= bot + 1) /* <= rather than == is paranoia */ - { - digitset = top; - break; - } - mid = (top + bot) / 2; - if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid; + OK = TRUE; + break; } } - /* A required value of 0 means "unset". */ + if (!OK) return FALSE; - if (require_digitset == 0) require_digitset = digitset; - else if (digitset != require_digitset) return FALSE; - } /* End digit handling */ - } /* End checking non-Inherited character */ + /* The rest of the string must be in this script, but we have to + allow for the Han complications. */ + + switch(script) + { + case ucp_Han: + require_state = SCRIPT_HANPENDING; + break; + + case ucp_Hiragana: + case ucp_Katakana: + require_state = SCRIPT_HANHIRAKATA; + break; + + case ucp_Bopomofo: + require_state = SCRIPT_HANBOPOMOFO; + break; + + case ucp_Hangul: + require_state = SCRIPT_HANHANGUL; + break; + + /* Compute the intersection of the required list of scripts and the + allowed scripts for this character. */ + + default: + for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i]; + break; + } + + break; + } + } /* End checking character's script and extensions. */ + + /* The character is in an acceptable script. We must now ensure that all + decimal digits in the string come from the same set. Some scripts (e.g. + Common, Arabic) have more than one set of decimal digits. This code does + not allow mixing sets, even within the same script. The vector called + PRIV(ucd_digit_sets)[] contains, in its first element, the number of + following elements, and then, in ascending order, the code points of the + '9' characters in every set of 10 digits. Each set is identified by the + offset in the vector of its '9' character. An initial check of the first + value picks up ASCII digits quickly. Otherwise, a binary chop is used. */ + + if (ucd->chartype == ucp_Nd) + { + uint32_t digitset; + + if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else + { + int mid; + int bot = 1; + int top = PRIV(ucd_digit_sets)[0]; + for (;;) + { + if (top <= bot + 1) /* <= rather than == is paranoia */ + { + digitset = top; + break; + } + mid = (top + bot) / 2; + if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid; + } + } + + /* A required value of 0 means "unset". */ + + if (require_digitset == 0) require_digitset = digitset; + else if (digitset != require_digitset) return FALSE; + } /* End digit handling */ /* If we haven't yet got to the end, pick up the next character. */ diff --git a/thirdparty/pcre2/src/pcre2_string_utils.c b/thirdparty/pcre2/src/pcre2_string_utils.c index d6be01acf55..ebfa9434e39 100644 --- a/thirdparty/pcre2/src/pcre2_string_utils.c +++ b/thirdparty/pcre2/src/pcre2_string_utils.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2018 University of Cambridge + New API code Copyright (c) 2018-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/thirdparty/pcre2/src/pcre2_study.c b/thirdparty/pcre2/src/pcre2_study.c index 9bbb37570f8..4db3ad11842 100644 --- a/thirdparty/pcre2/src/pcre2_study.c +++ b/thirdparty/pcre2/src/pcre2_study.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -908,7 +908,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) { uint32_t c; for (c = 0; c < table_limit; c++) - re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]); + re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type])); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; #endif diff --git a/thirdparty/pcre2/src/pcre2_substitute.c b/thirdparty/pcre2/src/pcre2_substitute.c index 981a106a9f7..8b2c369cccd 100644 --- a/thirdparty/pcre2/src/pcre2_substitute.c +++ b/thirdparty/pcre2/src/pcre2_substitute.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -259,6 +259,18 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0) return PCRE2_ERROR_BADOPTION; + +/* Validate length and find the end of the replacement. A NULL replacement of +zero length is interpreted as an empty string. */ + +if (replacement == NULL) + { + if (rlength != 0) return PCRE2_ERROR_NULL; + replacement = (PCRE2_SPTR)""; + } + +if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); +repend = replacement + rlength; /* Check for using a match that has already happened. Note that the subject pointer in the match data may be NULL after a no-match. */ @@ -312,11 +324,18 @@ scb.input = subject; scb.output = (PCRE2_SPTR)buffer; scb.ovector = ovector; -/* Find lengths of zero-terminated strings and the end of the replacement. */ +/* A NULL subject of zero length is treated as an empty string. */ -if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); -if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement); -repend = replacement + rlength; +if (subject == NULL) + { + if (length != 0) return PCRE2_ERROR_NULL; + subject = (PCRE2_SPTR)""; + } + +/* Find length of zero-terminated subject */ + +if (length == PCRE2_ZERO_TERMINATED) + length = subject? PRIV(strlen)(subject) : 0; /* Check UTF replacement string if necessary. */ diff --git a/thirdparty/pcre2/src/pcre2_tables.c b/thirdparty/pcre2/src/pcre2_tables.c index c164e976e0f..e00252f1ebc 100644 --- a/thirdparty/pcre2/src/pcre2_tables.c +++ b/thirdparty/pcre2/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -51,10 +51,10 @@ defined. */ #include "pcre2_internal.h" #endif /* PCRE2_PCRE2TEST */ - /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre2_internal.h. -This is mode-dependent, so is skipped when this file is included by pcre2test. */ +This is mode-dependent, so it is skipped when this file is included by +pcre2test. */ #ifndef PCRE2_PCRE2TEST const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS }; @@ -119,6 +119,9 @@ const uint8_t PRIV(utf8_table4)[] = { #endif /* UTF-8 support needed */ +/* Tables concerned with Unicode properties are relevant only when Unicode +support is enabled. See also the pcre2_ucptables.c file, which is generated by +a Python script from Unicode data files. */ #ifdef SUPPORT_UNICODE @@ -190,7 +193,7 @@ const uint32_t PRIV(ucp_gbtable)[] = { ESZ|(1u<script) == isprop) return !negated; break; + case PT_SCX: + ok = (data[1] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0); + if (ok == isprop) return !negated; + break; + case PT_ALNUM: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop) @@ -207,6 +214,17 @@ while ((t = *data++) != XCL_END) } break; + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop) + return !negated; + break; + + case PT_BOOL: + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), data[1]) != 0; + if (ok == isprop) return !negated; + break; + /* The following three properties can occur only in an XCLASS, as there is no \p or \P coding for them. */ diff --git a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h index 7bb9990a596..55e4e39f137 100644 --- a/thirdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/thirdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -60,7 +60,7 @@ extern "C" { SLJIT_LITTLE_ENDIAN : little endian architecture SLJIT_BIG_ENDIAN : big endian architecture SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information Constants: SLJIT_NUMBER_OF_REGISTERS : number of available registers @@ -148,7 +148,7 @@ extern "C" { #endif #elif defined (__aarch64__) #define SLJIT_CONFIG_ARM_64 1 -#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__)) +#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__)) #define SLJIT_CONFIG_PPC_64 1 #elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) #define SLJIT_CONFIG_PPC_32 1 @@ -156,7 +156,7 @@ extern "C" { #define SLJIT_CONFIG_MIPS_32 1 #elif defined(__mips64) #define SLJIT_CONFIG_MIPS_64 1 -#elif defined(__sparc__) || defined(__sparc) +#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64) #define SLJIT_CONFIG_SPARC_32 1 #elif defined(__s390x__) #define SLJIT_CONFIG_S390X 1 @@ -274,9 +274,13 @@ extern "C" { #ifndef SLJIT_INLINE /* Inline functions. Some old compilers do not support them. */ -#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 #define SLJIT_INLINE #else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else #define SLJIT_INLINE __inline #endif #endif /* !SLJIT_INLINE */ @@ -319,18 +323,36 @@ extern "C" { /* Instruction cache flush. */ /****************************/ +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware APPLE is known to have removed the code in iOS so + * it will need to be excempted or result in broken builds + */ #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) -#if __has_builtin(__builtin___clear_cache) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power and sparc are broken + */ +#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32) #define SLJIT_CACHE_FLUSH(from, to) \ __builtin___clear_cache((char*)(from), (char*)(to)) +#endif -#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* gcc >= 10 */ #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -340,9 +362,9 @@ extern "C" { /* Supported by all macs since Mac OS 10.5. However, it does not work on non-jailbroken iOS devices, although the compilation is successful. */ - +#include #define SLJIT_CACHE_FLUSH(from, to) \ - sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -351,18 +373,6 @@ extern "C" { ppc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) - -#define SLJIT_CACHE_FLUSH(from, to) \ - __builtin___clear_cache((char*)(from), (char*)(to)) - -#elif defined __ANDROID__ - -/* Android lacks __clear_cache; instead, cacheflush should be used. */ - -#define SLJIT_CACHE_FLUSH(from, to) \ - cacheflush((long)(from), (long)(to), 0) - #elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ @@ -370,14 +380,26 @@ extern "C" { sparc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#elif defined __ANDROID__ + +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + #elif defined _WIN32 #define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) #else -/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ #define SLJIT_CACHE_FLUSH(from, to) \ __clear_cache((char*)(from), (char*)(to)) @@ -645,18 +667,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #define SLJIT_PREF_SHIFT_REG SLJIT_R2 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_NUMBER_OF_REGISTERS 13 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 #ifndef _WIN64 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #else /* _WIN64 */ #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 -#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) #endif /* !_WIN64 */ #define SLJIT_PREF_SHIFT_REG SLJIT_R3 @@ -664,31 +691,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) #define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 -#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #define SLJIT_NUMBER_OF_REGISTERS 23 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) -#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw)) #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) /* Add +1 for double alignment. */ -#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw)) #else -#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw)) #endif /* SLJIT_CONFIG_PPC_64 || _AIX */ #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -696,19 +731,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 21 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw)) +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6 #else #define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #endif #elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) #define SLJIT_NUMBER_OF_REGISTERS 18 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* saved registers (16), return struct pointer (1), space for 6 argument words (1), 4th double arg (2), double alignment (1). */ -#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw)) #endif #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -736,12 +777,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #define SLJIT_NUMBER_OF_REGISTERS 0 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #define SLJIT_LOCALS_OFFSET_BASE 0 #endif @@ -751,13 +796,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) -#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 -#else -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 -#endif - #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) @@ -765,8 +803,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); /* CPU status flags management. */ /********************************/ -#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ - || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) diff --git a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c index 6e5bf78e45f..92d940ddc24 100644 --- a/thirdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000u /* alloc_chunk / free_chunk : @@ -112,7 +112,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) static SLJIT_INLINE int get_map_jit_flag() { - sljit_sw page_size; + size_t page_size; void *ptr; struct utsname name; static int map_jit_flag = -1; @@ -139,8 +139,9 @@ static SLJIT_INLINE int get_map_jit_flag() #endif /* MAP_ANON */ #else /* !SLJIT_CONFIG_X86 */ #if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) -#error Unsupported architecture +#error "Unsupported architecture" #endif /* SLJIT_CONFIG_ARM */ +#include #include #define SLJIT_MAP_JIT (MAP_JIT) @@ -149,7 +150,11 @@ static SLJIT_INLINE int get_map_jit_flag() static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) { +#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000 pthread_jit_write_protect_np(enable_exec); +#else +#error "Must target Big Sur or newer" +#endif /* BigSur */ } #endif /* SLJIT_CONFIG_X86 */ #else /* !TARGET_OS_OSX */ @@ -187,10 +192,13 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) if (retval == MAP_FAILED) return NULL; +#ifdef __FreeBSD__ + /* HardenedBSD's mmap lies, so check permissions again */ if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { munmap(retval, size); return NULL; } +#endif /* FreeBSD */ SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); @@ -227,7 +235,7 @@ struct free_block { #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header))) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size; diff --git a/thirdparty/pcre2/src/sljit/sljitLir.c b/thirdparty/pcre2/src/sljit/sljitLir.c index a24a99ab87e..313a061dd3f 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.c +++ b/thirdparty/pcre2/src/sljit/sljitLir.c @@ -90,26 +90,28 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SSIZE_OF(type) ((sljit_s32)sizeof(sljit_ ## type)) + #define VARIABLE_FLAG_SHIFT (10) #define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) #define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & ~(SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define HAS_FLAGS(op) \ ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) + ((op) & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) -#else +#else /* !SLJIT_64BIT_ARCHITECTURE */ #define TYPE_CAST_NEEDED(op) \ ((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) -#endif +#endif /* SLJIT_64BIT_ARCHITECTURE */ #define BUF_SIZE 4096 @@ -126,11 +128,10 @@ #define TO_OFFS_REG(reg) ((reg) << 8) /* When reg cannot be unused. */ #define FAST_IS_REG(reg) ((reg) <= REG_MASK) -/* When reg can be unused. */ -#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) /* Mask for argument types. */ -#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1) +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) /* Jump flags. */ #define JUMP_LABEL 0x1 @@ -247,8 +248,11 @@ #define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ - (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ - extra) * sizeof(sljit_sw)) + (saveds) + (sljit_s32)(extra)) * (sljit_s32)sizeof(sljit_sw)) + +#define GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, size) \ + (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ + (fsaveds)) * (sljit_s32)(size)) #define ADJUST_LOCAL_OFFSET(p, i) \ if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ @@ -379,9 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), invalid_integer_types); - SLJIT_COMPILE_ASSERT(SLJIT_I32_OP == SLJIT_F32_OP, - int_op_and_single_op_must_be_the_same); - SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), conditional_flags_must_be_even_numbers); @@ -415,7 +417,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - compiler->args = -1; + compiler->args_size = -1; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -439,6 +441,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->delay_slot = UNMOVABLE_INS; #endif +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->last_flags = 0; + compiler->last_return = -1; + compiler->logical_local_size = 0; +#endif + #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { init_compiler(); @@ -488,7 +497,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_d SLJIT_UNUSED_ARG(exec_allocator_data); /* Remove thumb mode flag. */ - SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1), exec_allocator_data); + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~(sljit_uw)0x1), exec_allocator_data); } #elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) @@ -511,7 +520,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_d SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { - jump->flags &= ~JUMP_ADDR; + jump->flags &= (sljit_uw)~JUMP_ADDR; jump->flags |= JUMP_LABEL; jump->u.label = label; } @@ -520,7 +529,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { if (SLJIT_LIKELY(!!jump)) { - jump->flags &= ~JUMP_LABEL; + jump->flags &= (sljit_uw)~JUMP_LABEL; jump->flags |= JUMP_ADDR; jump->u.target = target; } @@ -533,7 +542,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la } #define SLJIT_CURRENT_FLAGS_ALL \ - (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { @@ -547,7 +556,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_flags = 0; if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { - compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_32 | SLJIT_SET_Z)); } #endif } @@ -607,7 +616,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compile return NULL; size = (size + 3) & ~3; #endif - return ensure_abuf(compiler, size); + return ensure_abuf(compiler, (sljit_uw)size); } static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) @@ -626,20 +635,6 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } -static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types) -{ - sljit_s32 arg_count = 0; - - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - arg_count++; - arg_types >>= SLJIT_DEF_SHIFT; - } - - return arg_count; -} - - /* Only used in RISC architectures where the instruction size is constant */ #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -679,6 +674,7 @@ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -696,6 +692,7 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, compiler->fscratches = fscratches; compiler->fsaveds = fsaveds; #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; #endif } @@ -711,7 +708,7 @@ static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compi compiler->last_label = label; } -static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_s32 flags) +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_u32 flags) { jump->next = NULL; jump->flags = flags; @@ -751,6 +748,58 @@ static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches) +{ + sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type; + + curr_type = (arg_types & SLJIT_ARG_FULL_MASK); + + if (curr_type >= SLJIT_ARG_TYPE_F64) { + if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) + return 0; + } else if (curr_type >= SLJIT_ARG_TYPE_W) { + if (scratches == 0) + return 0; + } + + arg_types >>= SLJIT_ARG_SHIFT; + + word_arg_count = 0; + scratch_arg_end = 0; + saved_arg_count = 0; + float_arg_count = 0; + while (arg_types != 0) { + if (word_arg_count + float_arg_count >= 4) + return 0; + + curr_type = (arg_types & SLJIT_ARG_MASK); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P) + return 0; + + word_arg_count++; + scratch_arg_end = word_arg_count; + } else { + if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + return 0; + + if (curr_type < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + saved_arg_count++; + } else + float_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (saveds == -1) + return (word_arg_count <= scratches && float_arg_count <= fscratches); + + return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches); +} + #define FUNCTION_CHECK_IS_REG(r) \ (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) \ || ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) @@ -773,14 +822,14 @@ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s if (!(p & SLJIT_MEM)) return 0; - if (!((p & REG_MASK) == SLJIT_UNUSED || FUNCTION_CHECK_IS_REG(p & REG_MASK))) + if (!(!(p & REG_MASK) || FUNCTION_CHECK_IS_REG(p & REG_MASK))) return 0; if (CHECK_IF_VIRTUAL_REGISTER(p & REG_MASK)) return 0; if (p & OFFS_REG_MASK) { - if ((p & REG_MASK) == SLJIT_UNUSED) + if (!(p & REG_MASK)) return 0; if (!(FUNCTION_CHECK_IS_REG(OFFS_REG(p)))) @@ -819,12 +868,12 @@ static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p #define FUNCTION_CHECK_SRC(p, i) \ CHECK_ARGUMENT(function_check_src(compiler, p, i)); -static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 unused) +static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { if (compiler->scratches == -1 || compiler->saveds == -1) return 0; - if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) + if (FUNCTION_CHECK_IS_REG(p)) return (i == 0); if (p == SLJIT_MEM1(SLJIT_SP)) @@ -833,8 +882,8 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p return function_check_src_mem(compiler, p, i); } -#define FUNCTION_CHECK_DST(p, i, unused) \ - CHECK_ARGUMENT(function_check_dst(compiler, p, i, unused)); +#define FUNCTION_CHECK_DST(p, i) \ + CHECK_ARGUMENT(function_check_dst(compiler, p, i)); static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) { @@ -910,10 +959,8 @@ static void sljit_verbose_param(struct sljit_compiler *compiler, sljit_s32 p, sl } else fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); - } else if (p) + } else sljit_verbose_reg(compiler, p); - else - fprintf(compiler->verbose, "unused"); } static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) @@ -940,63 +987,61 @@ static void sljit_verbose_fparam(struct sljit_compiler *compiler, sljit_s32 p, s } static const char* op0_names[] = { - (char*)"breakpoint", (char*)"nop", (char*)"lmul.uw", (char*)"lmul.sw", - (char*)"divmod.u", (char*)"divmod.s", (char*)"div.u", (char*)"div.s", - (char*)"endbr", (char*)"skip_frames_before_return" + "breakpoint", "nop", "lmul.uw", "lmul.sw", + "divmod.u", "divmod.s", "div.u", "div.s", + "endbr", "skip_frames_before_return" }; static const char* op1_names[] = { - (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", - (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", - (char*)"", (char*)".u8", (char*)".s8", (char*)".u16", - (char*)".s16", (char*)".u32", (char*)".s32", (char*)".p", - (char*)"not", (char*)"neg", (char*)"clz", + "", ".u8", ".s8", ".u16", + ".s16", ".u32", ".s32", "32", + ".p", "not", "clz", }; static const char* op2_names[] = { - (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", - (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", - (char*)"shl", (char*)"lshr", (char*)"ashr", + "add", "addc", "sub", "subc", + "mul", "and", "or", "xor", + "shl", "lshr", "ashr", }; static const char* op_src_names[] = { - (char*)"fast_return", (char*)"skip_frames_before_fast_return", - (char*)"prefetch_l1", (char*)"prefetch_l2", - (char*)"prefetch_l3", (char*)"prefetch_once", + "fast_return", "skip_frames_before_fast_return", + "prefetch_l1", "prefetch_l2", + "prefetch_l3", "prefetch_once", }; static const char* fop1_names[] = { - (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", - (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", - (char*)"abs", + "mov", "conv", "conv", "conv", + "conv", "conv", "cmp", "neg", + "abs", }; static const char* fop2_names[] = { - (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" + "add", "sub", "mul", "div" }; #define JUMP_POSTFIX(type) \ - ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ - : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) + ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_32) ? "32" : "") \ + : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_32) ? ".f32" : ".f64") : "")) -static char* jump_names[] = { - (char*)"equal", (char*)"not_equal", - (char*)"less", (char*)"greater_equal", - (char*)"greater", (char*)"less_equal", - (char*)"sig_less", (char*)"sig_greater_equal", - (char*)"sig_greater", (char*)"sig_less_equal", - (char*)"overflow", (char*)"not_overflow", - (char*)"carry", (char*)"", - (char*)"equal", (char*)"not_equal", - (char*)"less", (char*)"greater_equal", - (char*)"greater", (char*)"less_equal", - (char*)"unordered", (char*)"ordered", - (char*)"jump", (char*)"fast_call", - (char*)"call", (char*)"call.cdecl" +static const char* jump_names[] = { + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "sig_less", "sig_greater_equal", + "sig_greater", "sig_less_equal", + "overflow", "not_overflow", + "carry", "", + "equal", "not_equal", + "less", "greater_equal", + "greater", "less_equal", + "unordered", "ordered", + "jump", "fast_call", + "call", "call.cdecl" }; -static char* call_arg_names[] = { - (char*)"void", (char*)"sw", (char*)"uw", (char*)"s32", (char*)"u32", (char*)"f32", (char*)"f64" +static const char* call_arg_names[] = { + "void", "w", "32", "p", "f64", "f32" }; #endif /* SLJIT_VERBOSE */ @@ -1032,48 +1077,40 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 types, arg_count, curr_type; -#endif - SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - CHECK_ARGUMENT((arg_types & SLJIT_DEF_MASK) == 0); - - types = (arg_types >> SLJIT_DEF_SHIFT); - arg_count = 0; - while (types != 0 && arg_count < 3) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); - arg_count++; - types >>= SLJIT_DEF_SHIFT; - } - CHECK_ARGUMENT(arg_count <= saveds && types == 0); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " enter options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - if (arg_types) - fprintf(compiler->verbose, ","); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); } - fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1084,74 +1121,94 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 types, arg_count, curr_type; -#endif - SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); - CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - - types = (arg_types >> SLJIT_DEF_SHIFT); - arg_count = 0; - while (types != 0 && arg_count < 3) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); - arg_count++; - types >>= SLJIT_DEF_SHIFT; - } - CHECK_ARGUMENT(arg_count <= saveds && types == 0); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " set_context options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - while (arg_types) { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; - if (arg_types) - fprintf(compiler->verbose, ","); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); + arg_types >>= SLJIT_ARG_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); } - fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return_void(struct sljit_compiler *compiler) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->last_return == SLJIT_ARG_TYPE_VOID); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return_void\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(compiler->scratches >= 0); - if (op != SLJIT_UNUSED) { - CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); - FUNCTION_CHECK_SRC(src, srcw); + + switch (compiler->last_return) { + case SLJIT_ARG_TYPE_W: + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_S32); + break; + case SLJIT_ARG_TYPE_32: + CHECK_ARGUMENT(op == SLJIT_MOV32 || (op >= SLJIT_MOV32_U8 && op <= SLJIT_MOV32_S16)); + break; + case SLJIT_ARG_TYPE_P: + CHECK_ARGUMENT(op == SLJIT_MOV_P); + break; + default: + /* Context not initialized, void, etc. */ + CHECK_ARGUMENT(0); + break; } - else - CHECK_ARGUMENT(src == 0 && srcw == 0); + FUNCTION_CHECK_SRC(src, srcw); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - if (op == SLJIT_UNUSED) - fprintf(compiler->verbose, " return\n"); - else { - fprintf(compiler->verbose, " return%s ", op1_names[op - SLJIT_OP1_BASE]); - sljit_verbose_param(compiler, src, srcw); - fprintf(compiler->verbose, "\n"); - } + fprintf(compiler->verbose, " return%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); } #endif CHECK_RETURN_OK; @@ -1160,7 +1217,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1177,7 +1234,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) - || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW) + || ((op & ~SLJIT_32) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_32) <= SLJIT_DIV_SW) || (op >= SLJIT_ENDBR && op <= SLJIT_SKIP_FRAMES_BEFORE_RETURN)); CHECK_ARGUMENT(GET_OPCODE(op) < SLJIT_LMUL_UW || GET_OPCODE(op) >= SLJIT_ENDBR || compiler->scratches >= 2); if ((GET_OPCODE(op) >= SLJIT_LMUL_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) || op == SLJIT_SKIP_FRAMES_BEFORE_RETURN) @@ -1188,7 +1245,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler { fprintf(compiler->verbose, " %s", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); if (GET_OPCODE(op) >= SLJIT_DIVMOD_UW && GET_OPCODE(op) <= SLJIT_DIV_SW) { - fprintf(compiler->verbose, (op & SLJIT_I32_OP) ? "32" : "w"); + fprintf(compiler->verbose, (op & SLJIT_32) ? "32" : "w"); } fprintf(compiler->verbose, "\n"); } @@ -1210,43 +1267,39 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler switch (GET_OPCODE(op)) { case SLJIT_NOT: - /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */ + /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); break; - case SLJIT_NEG: - CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) - || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); - break; case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_P: /* Nothing allowed */ - CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + CHECK_ARGUMENT(!(op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; default: - /* Only SLJIT_I32_OP is allowed. */ + /* Only SLJIT_32 is allowed. */ CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); break; } - FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + FUNCTION_CHECK_DST(dst, dstw); FUNCTION_CHECK_SRC(src, srcw); if (GET_OPCODE(op) >= SLJIT_NOT) { CHECK_ARGUMENT(src != SLJIT_IMM); - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) <= SLJIT_MOV_P) { - fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_I32_OP) ? "" : "32", - (op != SLJIT_MOV32) ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ""); + fprintf(compiler->verbose, " mov%s%s ", !(op & SLJIT_32) ? "" : "32", + op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE]); } else { - fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_32) ? "" : "32", !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); } @@ -1260,7 +1313,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 unset, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1302,24 +1355,31 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); - CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + CHECK_ARGUMENT((op & SLJIT_32) == (compiler->last_flags & SLJIT_32)); break; default: SLJIT_UNREACHABLE(); break; } - FUNCTION_CHECK_DST(dst, dstw, HAS_FLAGS(op)); + if (unset) { + CHECK_ARGUMENT(HAS_FLAGS(op)); + } else { + FUNCTION_CHECK_DST(dst, dstw); + } FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_32) ? "" : "32", !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); - sljit_verbose_param(compiler, dst, dstw); + if (unset) + fprintf(compiler->verbose, "unset"); + else + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); @@ -1376,10 +1436,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - int i; + sljit_u32 i; #endif SLJIT_UNUSED_ARG(compiler); @@ -1431,10 +1491,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONV_F64_FROM_F32 - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32.from.f64" : ".f64.from.f32"); + (op & SLJIT_32) ? ".f32.from.f64" : ".f64.from.f32"); else fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1450,7 +1510,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif if (SLJIT_UNLIKELY(compiler->skip_checks)) { @@ -1469,7 +1529,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); if (op & VARIABLE_FLAG_MASK) { fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]); } @@ -1497,13 +1557,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw); - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? ".s32" : ".sw", - (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src, srcw); @@ -1532,7 +1592,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], - (op & SLJIT_F32_OP) ? ".f32" : ".f64", + (op & SLJIT_32) ? ".f32" : ".f64", (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? ".s32" : ".sw"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); @@ -1558,7 +1618,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src1, src1w); @@ -1598,15 +1658,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); - CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32)); if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); - else + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY); + compiler->last_flags = 0; + } else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); } @@ -1623,49 +1685,27 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile sljit_s32 arg_types) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 i, types, curr_type, scratches, fscratches; - - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); - types = arg_types; - scratches = 0; - fscratches = 0; - for (i = 0; i < 5; i++) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); - if (i > 0) { - if (curr_type == 0) { - break; - } - if (curr_type >= SLJIT_ARG_TYPE_F32) - fscratches++; - else - scratches++; - } else { - if (curr_type >= SLJIT_ARG_TYPE_F32) { - CHECK_ARGUMENT(compiler->fscratches > 0); - } else if (curr_type >= SLJIT_ARG_TYPE_SW) { - CHECK_ARGUMENT(compiler->scratches > 0); - } - } - types >>= SLJIT_DEF_SHIFT; + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); } - CHECK_ARGUMENT(compiler->scratches >= scratches); - CHECK_ARGUMENT(compiler->fscratches >= fscratches); - CHECK_ARGUMENT(types == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ret[%s", jump_names[type & 0xff], - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + fprintf(compiler->verbose, " %s%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); } while (arg_types); @@ -1681,7 +1721,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); @@ -1690,7 +1730,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " cmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_I32_OP) ? "32" : ""); + jump_names[type & 0xff], (type & SLJIT_32) ? "32" : ""); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src2, src2w); @@ -1706,7 +1746,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); @@ -1715,7 +1755,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fcmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_F32_OP) ? ".f32" : ".f64"); + jump_names[type & 0xff], (type & SLJIT_32) ? ".f32" : ".f64"); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src2, src2w); @@ -1752,49 +1792,27 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - sljit_s32 i, types, curr_type, scratches, fscratches; - - CHECK_ARGUMENT(type == SLJIT_CALL || type == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); FUNCTION_CHECK_SRC(src, srcw); - types = arg_types; - scratches = 0; - fscratches = 0; - for (i = 0; i < 5; i++) { - curr_type = (types & SLJIT_DEF_MASK); - CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); - if (i > 0) { - if (curr_type == 0) { - break; - } - if (curr_type >= SLJIT_ARG_TYPE_F32) - fscratches++; - else - scratches++; - } else { - if (curr_type >= SLJIT_ARG_TYPE_F32) { - CHECK_ARGUMENT(compiler->fscratches > 0); - } else if (curr_type >= SLJIT_ARG_TYPE_SW) { - CHECK_ARGUMENT(compiler->scratches > 0); - } - } - types >>= SLJIT_DEF_SHIFT; + if (type & SLJIT_CALL_RETURN) { + CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); } - CHECK_ARGUMENT(compiler->scratches >= scratches); - CHECK_ARGUMENT(compiler->fscratches >= fscratches); - CHECK_ARGUMENT(types == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + !(type & SLJIT_CALL_RETURN) ? "" : ".ret", + call_arg_names[arg_types & SLJIT_ARG_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); - arg_types >>= SLJIT_DEF_SHIFT; + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); } while (arg_types); @@ -1812,9 +1830,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com sljit_s32 type) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); @@ -1823,19 +1840,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); if (GET_OPCODE(op) >= SLJIT_ADD) - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " flags%s %s%s, ", !(op & SLJIT_SET_Z) ? "" : ".z", GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], - GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : "")); + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : "")); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); } @@ -1848,11 +1866,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_32)); if (src != SLJIT_IMM) { CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); CHECK_ARGUMENT(srcw == 0); @@ -1867,9 +1885,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " cmov%s %s%s, ", - !(dst_reg & SLJIT_I32_OP) ? "" : "32", + !(dst_reg & SLJIT_32) ? "" : "32", jump_names[type & 0xff], JUMP_POSTFIX(type)); - sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_32); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); @@ -1884,15 +1902,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); - CHECK_ARGUMENT(!(type & SLJIT_I32_OP) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); + CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); - CHECK_ARGUMENT((mem & REG_MASK) != SLJIT_UNUSED && (mem & REG_MASK) != reg); + CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1900,7 +1918,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler fprintf(compiler->verbose, " //"); fprintf(compiler->verbose, " mem%s.%s%s%s ", - !(type & SLJIT_I32_OP) ? "" : "32", + !(type & SLJIT_32) ? "" : "32", (type & SLJIT_MEM_STORE) ? "st" : "ld", op1_names[(type & 0xff) - SLJIT_OP1_BASE], (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); @@ -1921,7 +1939,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_I32_OP | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); @@ -1933,7 +1951,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile fprintf(compiler->verbose, " fmem.%s%s%s ", (type & SLJIT_MEM_STORE) ? "st" : "ld", - !(type & SLJIT_I32_OP) ? ".f64" : ".f32", + !(type & SLJIT_32) ? ".f64" : ".f32", (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); sljit_verbose_freg(compiler, freg); fprintf(compiler->verbose, ", "); @@ -1950,7 +1968,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co SLJIT_UNUSED_ARG(offset); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1967,7 +1985,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil SLJIT_UNUSED_ARG(init_value); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1982,7 +2000,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw, 0); + FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2023,10 +2041,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { - /* Return if don't need to do anything. */ - if (op == SLJIT_UNUSED) - return SLJIT_SUCCESS; - #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) @@ -2043,6 +2057,24 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); } +#if !(defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->skip_checks = 1; +#endif + return sljit_emit_return_void(compiler); +} + +#endif + #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ @@ -2054,7 +2086,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com { struct sljit_label *label; struct sljit_jump *jump; - sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV; + sljit_s32 op = (dst_reg & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2067,7 +2099,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw)); + FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_32, 0, src, srcw)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2183,7 +2215,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler break; } - type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP)); + type = condition | (type & (SLJIT_32 | SLJIT_REWRITABLE_JUMP)); tmp_src = src1; src1 = src2; src2 = tmp_src; @@ -2201,13 +2233,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_I32_OP), - SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); + PTR_FAIL_IF(sljit_emit_op2u(compiler, + SLJIT_SUB | flags | (type & SLJIT_32), src1, src1w, src2, src2w)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_32))); } #endif @@ -2223,7 +2255,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2404,6 +2436,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { SLJIT_UNUSED_ARG(compiler); @@ -2452,6 +2491,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -2470,7 +2523,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(instruction); diff --git a/thirdparty/pcre2/src/sljit/sljitLir.h b/thirdparty/pcre2/src/sljit/sljitLir.h index 0eb62fc21b6..11626581563 100644 --- a/thirdparty/pcre2/src/sljit/sljitLir.h +++ b/thirdparty/pcre2/src/sljit/sljitLir.h @@ -163,13 +163,6 @@ extern "C" { is not available at all. */ -/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 - or sljit_emit_op2 operations the result is discarded. Some status - flags must be set when the destination is SLJIT_UNUSED, because the - operation would have no effect otherwise. Other SLJIT operations do - not support SLJIT_UNUSED as a destination operand. */ -#define SLJIT_UNUSED 0 - /* Scratch registers. */ #define SLJIT_R0 1 #define SLJIT_R1 2 @@ -231,9 +224,6 @@ extern "C" { value. The FR and FS register sets are overlap in the same way as R and S register sets. See above. */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - /* Floating point scratch registers. */ #define SLJIT_FR0 1 #define SLJIT_FR1 2 @@ -263,39 +253,38 @@ extern "C" { /* Argument type definitions */ /* --------------------------------------------------------------------- */ -/* Argument type definitions. - Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */ - -#define SLJIT_ARG_TYPE_VOID 0 -#define SLJIT_ARG_TYPE_SW 1 -#define SLJIT_ARG_TYPE_UW 2 -#define SLJIT_ARG_TYPE_S32 3 -#define SLJIT_ARG_TYPE_U32 4 -#define SLJIT_ARG_TYPE_F32 5 -#define SLJIT_ARG_TYPE_F64 6 - /* The following argument type definitions are used by sljit_emit_enter, sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. - The following return type definitions are used by sljit_emit_call - and sljit_emit_icall functions. - When a function is called, the first integer argument must be placed - in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first - floating point argument must be placed in SLJIT_FR0, the second in - SLJIT_FR1, and so on. + As for sljit_emit_call and sljit_emit_icall, the first integer argument + must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. + Similarly the first floating point argument must be placed into SLJIT_FR0, + the second one into SLJIT_FR1, and so on. + + As for sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. The first integer argument without _R postfix is + stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer + arguments with _R postfix are placed into scratch registers. The index + of the scratch register is the count of the previous integer arguments + starting from SLJIT_R0. The floating point arguments are always placed + into SLJIT_FR0, SLJIT_FR1, and so on. + + Note: if a function is called by sljit_emit_call/sljit_emit_icall and + an argument is stored in a scratch register by sljit_emit_enter, + that argument uses the same scratch register index for both + integer and floating point arguments. Example function definition: - sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a, + sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); Argument type definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32) - | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64) - | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32) + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2) + | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) Short form of argument type definition: - SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) - | SLJIT_ARG3(S32) | SLJIT_ARG4(F32) + SLJIT_ARGS4(32, P, F64, 32, F32) Argument passing: arg_a must be placed in SLJIT_R0 @@ -303,34 +292,73 @@ extern "C" { arg_b must be placed in SLJIT_FR0 arg_d must be placed in SLJIT_FR1 -Note: - The SLJIT_ARG_TYPE_VOID type is only supported by - SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the - default value when SLJIT_DEF_RET is not specified. */ -#define SLJIT_DEF_SHIFT 4 -#define SLJIT_DEF_RET(type) (type) -#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT) -#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT)) -#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT)) + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4(VOID, P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 -/* Short form of the macros above. + SLJIT_ARGS4(VOID, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 - For example the following definition: - SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32) + SLJIT_ARGS4(VOID, F64, W, F32, W_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 + + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(VOID, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 + +/* Void result, can only be used by SLJIT_ARG_RETURN. */ +#define SLJIT_ARG_TYPE_VOID 0 +/* Machine word sized integer argument or result. */ +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 32 bit integer argument or result. */ +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Pointer sized integer argument or result. */ +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) +/* 64 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F64 4 +/* 32 bit floating point argument or result. */ +#define SLJIT_ARG_TYPE_F32 5 + +#define SLJIT_ARG_SHIFT 4 +#define SLJIT_ARG_RETURN(type) (type) +#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT)) + +/* Simplified argument list definitions. + + The following definition: + SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1) can be shortened to: - SLJIT_RET(SW) | SLJIT_ARG1(F32) + SLJIT_ARGS1(W, F32) +*/ -Note: - The VOID type is only supported by SLJIT_RET, and - VOID is also the default value when SLJIT_RET is - not specified. */ -#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type) -#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type + +#define SLJIT_ARGS0(ret) \ + SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret)) + +#define SLJIT_ARGS1(ret, arg1) \ + (SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1)) + +#define SLJIT_ARGS2(ret, arg1, arg2) \ + (SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2)) + +#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \ + (SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3)) + +#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \ + (SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4)) /* --------------------------------------------------------------------- */ /* Main structures and functions */ @@ -408,7 +436,7 @@ struct sljit_compiler { /* Code size. */ sljit_uw size; /* Relative offset of the executable mapping from the writable mapping. */ - sljit_uw executable_offset; + sljit_sw executable_offset; /* Executable size for statistical purposes. */ sljit_uw executable_size; @@ -417,17 +445,13 @@ struct sljit_compiler { #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_s32 args; + sljit_s32 args_size; sljit_s32 locals_offset; - sljit_s32 saveds_offset; - sljit_s32 stack_tmp_size; + sljit_s32 scratches_offset; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 mode32; -#ifdef _WIN64 - sljit_s32 locals_offset; -#endif #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -444,10 +468,14 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ sljit_uw shift_imm; +#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__) + sljit_uw args_size; #endif #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) - sljit_sw imm; + sljit_u32 imm; #endif #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) @@ -456,6 +484,10 @@ struct sljit_compiler { sljit_sw cache_argw; #endif +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + sljit_uw args_size; +#endif + #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) sljit_s32 delay_slot; sljit_s32 cache_arg; @@ -476,7 +508,9 @@ struct sljit_compiler { /* Flags specified by the last arithmetic instruction. It contains the type of the variable flag. */ sljit_s32 last_flags; - /* Local size passed to the functions. */ + /* Return value type set by entry functions. */ + sljit_s32 last_return; + /* Local size passed to entry functions. */ sljit_s32 logical_local_size; #endif @@ -615,38 +649,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) available options are listed before sljit_emit_enter. The function argument list is the combination of SLJIT_ARGx - (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW - (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported. - The first argument goes to SLJIT_S0, the second goes to SLJIT_S1 - and so on. The register set used by the function must be declared - as well. The number of scratch and saved registers used by the - function must be passed to sljit_emit_enter. Only R registers - between R0 and "scratches" argument can be used later. E.g. if - "scratches" is set to 2, the scratch register set will be limited - to SLJIT_R0 and SLJIT_R1. The S registers and the floating point - registers ("fscratches" and "fsaveds") are specified in a similar - manner. The sljit_emit_enter is also capable of allocating a stack - space for local variables. The "local_size" argument contains the - size in bytes of this local area and its staring address is stored + (SLJIT_DEF_ARG1) macros. Currently maximum 4 arguments are + supported. The first integer argument is loaded into SLJIT_S0, + the second one is loaded into SLJIT_S1, and so on. Similarly, + the first floating point argument is loaded into SLJIT_FR0, + the second one is loaded into SLJIT_FR1, and so on. Furthermore + the register set used by the function must be declared as well. + The number of scratch and saved registers used by the function + must be passed to sljit_emit_enter. Only R registers between R0 + and "scratches" argument can be used later. E.g. if "scratches" + is set to 2, the scratch register set will be limited to SLJIT_R0 + and SLJIT_R1. The S registers and the floating point registers + ("fscratches" and "fsaveds") are specified in a similar manner. + The sljit_emit_enter is also capable of allocating a stack space + for local variables. The "local_size" argument contains the size + in bytes of this local area and its staring address is stored in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and SLJIT_SP + local_size (exclusive) can be modified freely until the function returns. The stack space is not initialized. Note: the following conditions must met: 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS - 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS - 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + Note: the compiler can use saved registers as scratch registers, + but the opposite is not supported + Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ -/* The absolute address returned by sljit_get_local_base with -offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ -#define SLJIT_F64_ALIGNMENT 0x00000001 +/* The compiled function uses cdecl calling + * convention instead of SLJIT_FUNC. */ +#define SLJIT_ENTER_CDECL 0x00000001 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 @@ -657,7 +696,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several - functions (like sljit_emit_return) requres this context to be able to generate + functions (such as sljit_emit_return) requres this context to be able to generate the appropriate code. However, some code fragments (like inline cache) may have no normal entry point so their context is unknown for the compiler. Their context can be provided to the compiler by the sljit_set_context function. @@ -669,11 +708,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); -/* Return from machine code. The op argument can be SLJIT_UNUSED which means the - function does not return with anything or any opcode between SLJIT_MOV and - SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op - is SLJIT_UNUSED, otherwise see below the description about source and - destination arguments. */ +/* Return from machine code. The sljit_emit_return_void function does not return with + any value. The sljit_emit_return function returns with a single value which stores + the result of a data move instruction. The instruction is specified by the op + argument, and must be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw); @@ -766,7 +806,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) #define SLJIT_IMM 0x40 -/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on +/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on 32 bit CPUs. When this option is set for an arithmetic operation, only the lower 32 bit of the input registers are used, and the CPU status flags are set according to the 32 bit result. Although the higher 32 bit @@ -774,12 +814,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU requirements all source registers must be the result of those operations where this option was also set. Memory loads read 32 bit values rather - than 64 bit ones. In other words 32 bit and 64 bit operations cannot - be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source - register can hold any 32 or 64 bit value, and it is converted to a 32 bit - compatible format first. This conversion is free (no instructions are - emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit - value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension). + than 64 bit ones. In other words 32 bit and 64 bit operations cannot be + mixed. The only exception is SLJIT_MOV32 whose source register can hold + any 32 or 64 bit value, and it is converted to a 32 bit compatible format + first. This conversion is free (no instructions are emitted) on most CPUs. + A 32 bit value can also be converted to a 64 bit value by SLJIT_MOV_S32 + (sign extension) or SLJIT_MOV_U32 (zero extension). + + As for floating-point operations, this option sets 32 bit single + precision mode. Similar to the integer operations, all register arguments + must be the result of those operations where this option was also set. Note: memory addressing always uses 64 bit values on 64 bit systems so the result of a 32 bit operation must not be used with SLJIT_MEMx @@ -788,22 +832,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * This option is part of the instruction name, so there is no need to manually set it. E.g: - SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */ -#define SLJIT_I32_OP 0x100 - -/* Set F32 (single) precision mode for floating-point computation. This - option is similar to SLJIT_I32_OP, it just applies to floating point - registers. When this option is passed, the CPU performs 32 bit floating - point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all - register arguments must be the result of those operations where this - option was also set. - - This option is part of the instruction name, so there is no need to - manually set it. E.g: - - SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP) - */ -#define SLJIT_F32_OP SLJIT_I32_OP + SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */ +#define SLJIT_32 0x100 /* Many CPUs (x86, ARM, PPC) have status flags which can be set according to the result of an operation. Other CPUs (MIPS) do not have status @@ -887,7 +917,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) -#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP) +#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32) /* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. @@ -895,13 +925,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), the behaviour is undefined. */ #define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) -#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP) +#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32) /* Flags: - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) -#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP) +#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32) /* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. @@ -909,7 +939,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), the behaviour is undefined. */ #define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7) -#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_I32_OP) +#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32) /* Flags: - (does not modify flags) ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64 when Intel Control-flow Enforcement Technology (CET) is enabled. @@ -941,16 +971,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #define SLJIT_MOV (SLJIT_OP1_BASE + 0) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) -#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP) +#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) -#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP) +#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) -#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP) +#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32) /* Flags: - (does not modify flags) */ #define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) -#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP) +#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32) /* Flags: - (does not modify flags) Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) @@ -958,25 +988,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) /* Flags: - (does not modify flags) */ -#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP) +#define SLJIT_MOV32 (SLJIT_OP1_BASE + 7) /* Flags: - (does not modify flags) Note: load a pointer sized data, useful on x32 (a 32 bit mode on x86-64 where all x64 features are available, e.g. 16 register) or similar compiling modes */ -#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8) /* Flags: Z Note: immediate source argument is not supported */ -#define SLJIT_NOT (SLJIT_OP1_BASE + 8) -#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP) -/* Flags: Z | OVERFLOW - Note: immediate source argument is not supported */ -#define SLJIT_NEG (SLJIT_OP1_BASE + 9) -#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP) +#define SLJIT_NOT (SLJIT_OP1_BASE + 9) +#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) /* Count leading zeroes Flags: - (may destroy flags) Note: immediate source argument is not supported */ #define SLJIT_CLZ (SLJIT_OP1_BASE + 10) -#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP) +#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -987,58 +1013,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* Flags: Z | OVERFLOW | CARRY */ #define SLJIT_ADD (SLJIT_OP2_BASE + 0) -#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP) +#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32) /* Flags: CARRY */ #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) -#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP) +#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32) /* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER SIG_LESS_EQUAL | CARRY */ #define SLJIT_SUB (SLJIT_OP2_BASE + 2) -#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP) +#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32) /* Flags: CARRY */ #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) -#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) +#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32) /* Note: integer mul Flags: OVERFLOW */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) -#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) +#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32) /* Flags: Z */ #define SLJIT_AND (SLJIT_OP2_BASE + 5) -#define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP) +#define SLJIT_AND32 (SLJIT_AND | SLJIT_32) /* Flags: Z */ #define SLJIT_OR (SLJIT_OP2_BASE + 6) -#define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP) +#define SLJIT_OR32 (SLJIT_OR | SLJIT_32) /* Flags: Z */ #define SLJIT_XOR (SLJIT_OP2_BASE + 7) -#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP) +#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_SHL (SLJIT_OP2_BASE + 8) -#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP) +#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_LSHR (SLJIT_OP2_BASE + 9) -#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP) +#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32) /* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_ASHR (SLJIT_OP2_BASE + 10) -#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_I32_OP) +#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); +/* The sljit_emit_op2u function is the same as sljit_emit_op2 except the result is discarded. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + /* Starting index of opcodes for sljit_emit_op2. */ #define SLJIT_OP_SRC_BASE 128 @@ -1082,35 +1114,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp /* Flags: - (does not modify flags) */ #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) -#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP) +#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32) /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int Rounding mode when the destination is W or I: round towards zero. */ -/* Flags: - (does not modify flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) -#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) -#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) -#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) -#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) -#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP) +#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32) /* Note: dst is the left and src is the right operand for SLJIT_CMPD. Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ #define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) -#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) -#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) -#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP) +#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1119,18 +1151,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* Starting index of opcodes for sljit_emit_fop2. */ #define SLJIT_FOP2_BASE 192 -/* Flags: - (does not modify flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) -#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) -#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) -#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP) -/* Flags: - (does not modify flags) */ +#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32) +/* Flags: - (may destroy flags) */ #define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) -#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP) +#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, @@ -1170,33 +1202,35 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 -/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ -#define SLJIT_SET_CARRY SLJIT_SET(12) +/* Unlike other flags, sljit_emit_jump may destroy this flag. */ +#define SLJIT_CARRY 12 +#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) +#define SLJIT_NOT_CARRY 13 /* Floating point comparison types. */ #define SLJIT_EQUAL_F64 14 -#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) #define SLJIT_NOT_EQUAL_F64 15 -#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) #define SLJIT_LESS_F64 16 -#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_32) #define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) #define SLJIT_GREATER_EQUAL_F64 17 -#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) #define SLJIT_GREATER_F64 18 -#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) +#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_32) #define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) #define SLJIT_LESS_EQUAL_F64 19 -#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) +#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_32) #define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) #define SLJIT_UNORDERED_F64 20 -#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_32) #define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) #define SLJIT_ORDERED_F64 21 -#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_32) #define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) /* Unconditional jump types. */ @@ -1211,6 +1245,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 +/* When this flag is passed, the execution of the current function ends and + the called function returns to the caller of the current function. The + stack usage is reduced before the call, but it is not necessarily reduced + to zero. In the latter case the compiler needs to allocate space for some + arguments and the return register must be kept as well. + + This feature is highly experimental and not supported on SPARC platform + at the moment. */ +#define SLJIT_CALL_RETURN 0x2000 /* Emit a jump instruction. The destination is not set, only the type of the jump. type must be between SLJIT_EQUAL and SLJIT_FAST_CALL @@ -1221,15 +1264,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile /* Emit a C compiler (ABI) compatible function call. type must be SLJIT_CALL or SLJIT_CALL_CDECL - type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_CALL_RETURN arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros Flags: destroy all flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); /* Basic arithmetic comparison. In most architectures it is implemented as - an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting - appropriate flags) followed by a sljit_emit_jump. However some + an compare operation followed by a sljit_emit_jump. However some architectures (i.e: ARM64 or MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL @@ -1271,6 +1313,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi Direct form: set src to SLJIT_IMM() and srcw to the address Indirect form: any other valid addressing mode type must be SLJIT_CALL or SLJIT_CALL_CDECL + type can be combined (or'ed) with SLJIT_CALL_RETURN arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros Flags: destroy all flags. */ @@ -1298,7 +1341,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64 dst_reg must be a valid register and it can be combined - with SLJIT_I32_OP to perform a 32 bit arithmetic operation + with SLJIT_32 to perform a 32 bit arithmetic operation src must be register or immediate (SLJIT_IMM) Flags: - (does not modify flags) */ @@ -1454,26 +1497,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st #if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -/* Get the entry address of a given function. */ -#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name) +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name) +#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name) #else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ /* All JIT related code should be placed in the same context (library, binary, etc.). */ -#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name) +/* Get the entry address of a given function (signed, unsigned result). */ +#define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name) +#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name) /* For powerpc64, the function pointers point to a context descriptor. */ struct sljit_function_context { - sljit_sw addr; - sljit_sw r2; - sljit_sw r11; + sljit_uw addr; + sljit_uw r2; + sljit_uw r11; }; /* Fill the context arguments using the addr and the function. If func_ptr is NULL, it will not be set to the address of context If addr is NULL, the function address also comes from the func pointer. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func); #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ @@ -1516,17 +1562,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size); + void *instruction, sljit_u32 size); /* Flags were set by a 32 bit operation. */ -#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP +#define SLJIT_CURRENT_FLAGS_32 SLJIT_32 -/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ -#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 +/* Flags were set by an ADD or ADDC operations. */ +#define SLJIT_CURRENT_FLAGS_ADD 0x01 +/* Flags were set by a SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_SUB 0x02 -/* Flags were set by a SUB with unused destination. - Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ -#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 +/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. + Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x04 /* Define the currently available CPU status flags. It is usually used after an sljit_emit_label or sljit_emit_op_custom operations to define which CPU diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c index 74cf55fcd28..7b87f5907a8 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_32.c @@ -65,12 +65,17 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; -#define RM(rm) (reg_map[rm]) -#define RD(rd) (reg_map[rd] << 12) -#define RN(rn) (reg_map[rn] << 16) +#define RM(rm) ((sljit_uw)reg_map[rm]) +#define RM8(rm) ((sljit_uw)reg_map[rm] << 8) +#define RD(rd) ((sljit_uw)reg_map[rd] << 12) +#define RN(rn) ((sljit_uw)reg_map[rn] << 16) + +#define VM(rm) ((sljit_uw)freg_map[rm]) +#define VD(rd) ((sljit_uw)freg_map[rd] << 12) +#define VN(rn) ((sljit_uw)freg_map[rn] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -107,6 +112,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SBC 0xe0c00000 #define SMULL 0xe0c00090 #define SUB 0xe0400000 +#define TST 0xe1000000 #define UMULL 0xe0800090 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 @@ -115,12 +121,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +213,7 @@ static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_u cpool_unique_ptr = compiler->cpool_unique; do { if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { - cpool_index = cpool_ptr - compiler->cpool; + cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool); break; } cpool_ptr++; @@ -293,7 +302,7 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ while (last_pc_patch < code_ptr) { /* Data transfer instruction with Rn == r15. */ if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { - diff = const_pool - last_pc_patch; + diff = (sljit_uw)(const_pool - last_pc_patch); ind = (*last_pc_patch) & 0xfff; /* Must be a load instruction with immediate offset. */ @@ -308,12 +317,12 @@ static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ SLJIT_ASSERT(diff >= 1); if (diff >= 2 || ind > 0) { - diff = (diff + ind - 2) << 2; + diff = (diff + (sljit_uw)ind - 2) << 2; SLJIT_ASSERT(diff <= 0xfff); - *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff; } else - *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004; } last_pc_patch++; } @@ -329,24 +338,24 @@ struct future_patch { static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { - sljit_s32 value; + sljit_u32 value; struct future_patch *curr_patch, *prev_patch; SLJIT_UNUSED_ARG(compiler); /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; else { curr_patch = *first_patch; prev_patch = NULL; while (1) { if (!curr_patch) { - value = (sljit_s32)cpool_start_address[cpool_current_index]; + value = cpool_start_address[cpool_current_index]; break; } if ((sljit_uw)curr_patch->index == cpool_current_index) { - value = curr_patch->value; + value = (sljit_uw)curr_patch->value; if (prev_patch) prev_patch->next = curr_patch->next; else @@ -359,8 +368,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc } } - if (value >= 0) { - if ((sljit_uw)value > cpool_current_index) { + if ((sljit_sw)value >= 0) { + if (value > cpool_current_index) { curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); if (!curr_patch) { while (*first_patch) { @@ -371,8 +380,8 @@ static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struc return SLJIT_ERR_ALLOC_FAILED; } curr_patch->next = *first_patch; - curr_patch->index = value; - curr_patch->value = cpool_start_address[value]; + curr_patch->index = (sljit_sw)value; + curr_patch->value = (sljit_sw)cpool_start_address[value]; *first_patch = curr_patch; } cpool_start_address[value] = *buf_ptr; @@ -395,8 +404,8 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst) static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { - FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); - return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff)); } #endif @@ -554,8 +563,9 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw execut } static sljit_uw get_imm(sljit_uw imm); +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -658,7 +668,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_sw addr; + sljit_uw addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -737,7 +747,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Points after the current instruction. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; next_addr = compute_next_addr(label, jump, const_, put_label); @@ -770,7 +780,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* code_ptr can be affected above. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); - label->size = (code_ptr + 1) - code; + label->size = (sljit_uw)((code_ptr + 1) - code); label = label->next; } if (const_ && const_->addr == word_count) { @@ -799,8 +809,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); if (cpool_current_index > 0) { /* Unconditional branch. */ - *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); - code_ptr = cpool_start_address + cpool_current_index; + *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); } cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; cpool_current_index = 0; @@ -822,7 +832,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_start_address = ALIGN_INSTRUCTION(code_ptr); cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); if (cpool_current_index > 0) - code_ptr = cpool_start_address + cpool_current_index; + code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index); buf_ptr = compiler->cpool; buf_end = buf_ptr + compiler->cpool_fill; @@ -845,15 +855,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr = (sljit_uw *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); if (!(jump->flags & JUMP_ADDR)) { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff; } else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff; + SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000); + *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff; } } else if (jump->flags & SLJIT_REWRITABLE_JUMP) { @@ -923,7 +933,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw); code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -972,6 +982,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALLOW_IMM 0x10 #define ALLOW_INV_IMM 0x20 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 /* s/l - store/load (1 bit) u/s - signed/unsigned (1 bit) @@ -999,7 +1010,7 @@ static const sljit_uw data_transfer_insts[16] = { }; #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \ - (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (arg)) + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg)) /* Normal ldr/str instruction. Type2: ldrsb, ldrh, ldrsh */ @@ -1008,101 +1019,10 @@ static const sljit_uw data_transfer_insts[16] = { #define TYPE2_TRANSFER_IMM(imm) \ (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) -static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2)) -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 args, size, i, tmp; - sljit_uw push; - - CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - /* Push saved registers, temporary registers - stmdb sp!, {..., lr} */ - push = PUSH | (1 << 14); - - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - push |= 1 << reg_map[i]; - - for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - push |= 1 << reg_map[i]; - - FAIL_IF(push_inst(compiler, push)); - - /* Stack must be aligned to 8 bytes: */ - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - local_size = ((size + local_size + 7) & ~7) - size; - compiler->local_size = local_size; - if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - - args = get_arg_count(arg_types); - - if (args >= 1) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2))); - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 size; - - CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((size + local_size + 7) & ~7) - size; - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 i, tmp; - sljit_uw pop; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - - /* Push saved registers, temporary registers - ldmia sp!, {..., pc} */ - pop = POP | (1 << 15); - - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - pop |= 1 << reg_map[i]; - - for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - pop |= 1 << reg_map[i]; - - return push_inst(compiler, pop); -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -/* flags: */ +/* Flags for emit_op: */ /* Arguments are swapped. */ #define ARGS_SWAPPED 0x01 /* Inverted immediate. */ @@ -1119,6 +1039,308 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ #define SRC2_IMM (1 << 25) +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w); + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw imm, offset; + sljit_s32 i, tmp, size, word_arg_count, saved_arg_count; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + imm = 0; + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + imm |= (sljit_uw)1 << reg_map[i]; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + /* Push saved and temporary registers + multiple registers: stmdb sp!, {..., lr} + single register: str reg, [sp, #-4]! */ + if (imm != 0) + FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm)); + else + FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); + + /* Stack must be aligned to 8 bytes: */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw))); + size += SSIZE_OF(sw); + } + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } + } + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); + else + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst(compiler, *(--remap_ptr))); +#endif + + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) +{ + sljit_uw imm2 = get_imm(imm); + + if (imm2 == 0) { + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + imm2 = RM(TMP_REG2); + } + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; + } + + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) + lr_dst = 0; + + reg_list = 0; + if (lr_dst != 0) + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; + + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + local_size += SSIZE_OF(sw); + + if (reg_list != 0) + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (reg_list == 0) + return SLJIT_SUCCESS; + + if (compiler->saveds > 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); + lr_dst = SLJIT_S0; + } else { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); + lr_dst = SLJIT_FIRST_SAVED_REG; + } + + return push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 + | RN(SLJIT_SP) | RD(lr_dst) | (sljit_uw)(frame_size - 2 * SSIZE_OF(sw))); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + /* Pop saved and temporary registers + multiple registers: ldmia sp!, {...} + single register: ldr reg, [sp], #4 */ + if ((reg_list & (reg_list - 1)) == 0) { + SLJIT_ASSERT(lr_dst != 0); + SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]); + + return push_inst(compiler, 0xe49d0004 | RD(lr_dst)); + } + + FAIL_IF(push_inst(compiler, POP | reg_list)); + if (frame_size > 0) + return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw))); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ if (compiler->shift_imm != 0x20) { \ @@ -1130,11 +1352,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \ return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \ } \ - return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \ - (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) \ + | RM8((flags & ARGS_SWAPPED) ? src1 : src2) | (sljit_uw)(opcode << 5) \ + | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) + sljit_uw dst, sljit_uw src1, sljit_uw src2) { switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -1184,9 +1407,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; case SLJIT_NOT: - if (src2 & SRC2_IMM) { + if (src2 & SRC2_IMM) return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); - } + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); case SLJIT_CLZ: @@ -1197,9 +1420,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1209,10 +1431,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1227,14 +1449,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl compiler->status_flags_state = 0; if (!HAS_FLAGS(op)) - return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); + return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1)); - FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); + FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1))); /* cmp TMP_REG1, dst asr #31. */ return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); case SLJIT_AND: + if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN) + return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1266,7 +1490,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl Returns with 0 if not possible. */ static sljit_uw get_imm(sljit_uw imm) { - sljit_s32 rol; + sljit_u32 rol; if (imm <= 0xff) return SRC2_IMM | imm; @@ -1307,7 +1531,7 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl sljit_uw mask; sljit_uw imm1; sljit_uw imm2; - sljit_s32 rol; + sljit_uw rol; /* Step1: Search a zero byte (8 continous zero bit). */ mask = 0xff000000; @@ -1418,7 +1642,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw tmp; #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - if (!(imm & ~0xffff)) + if (!(imm & ~(sljit_uw)0xffff)) return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); #endif @@ -1455,13 +1679,13 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit SLJIT_ASSERT (arg & SLJIT_MEM); SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); - if ((arg & REG_MASK) == SLJIT_UNUSED) { + if (!(arg & REG_MASK)) { if (is_type1_transfer) { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xfff)); argw &= 0xfff; } else { - FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xff)); argw &= 0xff; } @@ -1475,20 +1699,20 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit argw &= 0x3; if (argw != 0 && !is_type1_transfer) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } /* Bit 25: RM is offset. */ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); + RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | ((sljit_uw)argw << 7))); } arg &= REG_MASK; if (is_type1_transfer) { if (argw > 0xfff) { - imm = get_imm(argw & ~0xfff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xfff; @@ -1496,7 +1720,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xfff) { - imm = get_imm(-argw & ~0xfff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xfff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xfff); @@ -1512,7 +1736,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } else { if (argw > 0xff) { - imm = get_imm(argw & ~0xff); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); argw = argw & 0xff; @@ -1520,7 +1744,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - imm = get_imm(-argw & ~0xff); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); if (imm) { FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); argw = -(-argw & 0xff); @@ -1537,7 +1761,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); } @@ -1554,50 +1778,62 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* We prefers register and simple consts. */ sljit_s32 dst_reg; sljit_s32 src1_reg; - sljit_s32 src2_reg; + sljit_s32 src2_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; - /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + if (dst == TMP_REG2) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); - src2_reg = 0; + if (inp_flags & ALLOW_NEG_IMM) { + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } do { if (!(inp_flags & ALLOW_IMM)) break; if (src2 & SLJIT_IMM) { - src2_reg = get_imm(src2w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src2w); if (src2_reg) break; if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src2w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w); if (src2_reg) { flags |= INV_IMM; break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src2w); + if (neg_op != 0) { + src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); if (src2_reg) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB) { - src2_reg = get_imm(-src2w); - if (src2_reg) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } } if (src1 & SLJIT_IMM) { - src2_reg = get_imm(src1w); + src2_reg = (sljit_s32)get_imm((sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED; src1 = src2; @@ -1605,7 +1841,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } if (inp_flags & ALLOW_INV_IMM) { - src2_reg = get_imm(~src1w); + src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w); if (src2_reg) { flags |= ARGS_SWAPPED | INV_IMM; src1 = src2; @@ -1613,13 +1849,13 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_reg = get_imm(-src1w); + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ + src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); if (src2_reg) { - /* Note: add is commutative operation. */ src1 = src2; src1w = src2w; - op = SLJIT_SUB | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } @@ -1634,12 +1870,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 src1_reg = TMP_REG1; } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w)); src1_reg = TMP_REG1; } /* Destination. */ - dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op <= SLJIT_MOV_P) { if (dst & SLJIT_MEM) { @@ -1663,10 +1899,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 else if (src2 & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); else - FAIL_IF(load_immediate(compiler, src2_reg, src2w)); + FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w)); } - FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); + FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg)); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; @@ -1691,7 +1927,7 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_sw saved_reg_list[3]; + sljit_uw saved_reg_list[3]; sljit_sw saved_reg_count; CHECK_ERROR(); @@ -1708,10 +1944,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 16) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 8) - | reg_map[SLJIT_R1]); + | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1)); case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: @@ -1742,7 +1975,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1756,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile SLJIT_ASSERT(saved_reg_list[1] < 8); FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); } - return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) + return push_inst(compiler, 0xe49d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; @@ -1781,6 +2014,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); @@ -1799,13 +2033,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); } @@ -1819,19 +2046,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: case SLJIT_SUB: case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: case SLJIT_XOR: return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); @@ -1858,6 +2084,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1905,8 +2145,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1917,23 +2158,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ - #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs)) -#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16)) + ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs)) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7))); arg = TMP_REG2; argw = 0; } @@ -1945,12 +2183,12 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, if (!(-argw & ~0x3fc)) return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); - imm = get_imm(argw & ~0x3fc); + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm) { FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } - imm = get_imm(-argw & ~0x3fc); + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm) { argw = -argw; FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); @@ -1959,11 +2197,11 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, } if (arg) { - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); } else - FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); } @@ -1972,17 +2210,17 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0))); if (FAST_IS_REG(dst)) - return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16)); + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1)); /* Store the integer value from a VFP register. */ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); @@ -1994,23 +2232,23 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1))); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16))); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1))); } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -2018,19 +2256,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0))); return push_inst(compiler, VMRS); } @@ -2042,16 +2280,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; } @@ -2059,25 +2297,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0))); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0))); + op ^= SLJIT_32; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -2094,40 +2332,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } if (src1 & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1))); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1))); break; } if (dst_r == TMP_FREG1) - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; } @@ -2169,10 +2407,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x10000000; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x30000000; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x20000000; @@ -2198,15 +2446,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x10000000; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x00000000; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x70000000; @@ -2277,111 +2527,124 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #ifdef __SOFTFP__ -static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; - sljit_s32 word_arg_offset = 0; - sljit_s32 float_arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 float_arg_count = 0; sljit_s32 types = 0; - sljit_s32 src_offset = 4 * sizeof(sljit_sw); sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f32); - arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f64); - arg_count++; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); word_arg_offset += sizeof(sljit_sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7))); + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset)); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } /* Process arguments in reversed direction. */ while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - float_arg_count--; - stack_offset = offsets[arg_count]; - - if (stack_offset < 16) { - if (src_offset == stack_offset) { - FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); - *src = TMP_REG1; - } - FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); - } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - arg_count--; float_arg_count--; - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT((stack_offset & 0x7) == 0); + SLJIT_ASSERT((offset & 0x7) == 0); - if (stack_offset < 16) { - if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } - FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); } else - FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; default: - arg_count--; word_arg_offset -= sizeof(sljit_sw); - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT(stack_offset >= word_arg_offset); + SLJIT_ASSERT(offset >= word_arg_offset); - if (stack_offset != word_arg_offset) { - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); *src = TMP_REG1; } else if (src_offset == word_arg_offset) { - *src = 1 + (stack_offset >> 2); - src_offset = stack_offset; + *src = (sljit_s32)(SLJIT_R0 + (offset >> 2)); + src_offset = offset; } - FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2))); + FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); } else - FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16))); + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); } break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2389,83 +2652,51 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_s32 stack_size = 0; - - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) - FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_size & 0x7) - stack_size += sizeof(sljit_sw); - stack_size += sizeof(sljit_f64); - break; - default: - stack_size += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - if (stack_size <= 16) - return SLJIT_SUCCESS; - - return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7)); + return SLJIT_SUCCESS; } #else /* !__SOFTFP__ */ static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_u32 remap = 0; - sljit_u32 offset = 0; - sljit_u32 new_offset, mask; + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { - new_offset = 0; - mask = 1; - - while (remap & mask) { - new_offset++; - mask <<= 1; - } - remap |= mask; - + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: if (offset != new_offset) FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, - 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0))); + SLJIT_32, new_offset, offset, 0))); - offset += 2; - } - else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { - new_offset = 0; - mask = 3; - - while (remap & mask) { - new_offset += 2; - mask <<= 2; + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0x400000, f32_offset, offset, 0))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, new_offset, offset, 0))); + f32_offset = new_offset; + new_offset++; } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0))); - - offset += 2; + offset++; + break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2480,13 +2711,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile { #ifdef __SOFTFP__ struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; #endif CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2496,9 +2732,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); return jump; #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2535,7 +2790,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) @@ -2555,16 +2810,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#ifdef __SOFTFP__ if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2573,8 +2841,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); return softfloat_post_call_with_args(compiler, arg_types); #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2636,27 +2921,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; cc = get_cc(compiler, type & 0xff); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - tmp = get_imm(srcw); + tmp = get_imm((sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); - tmp = get_imm(~srcw); + tmp = get_imm(~(sljit_uw)srcw); if (tmp) return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - tmp = (sljit_uw) srcw; + tmp = (sljit_uw)srcw; FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); if (tmp <= 0xffff) return SLJIT_SUCCESS; return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); #else - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); src = TMP_REG1; #endif } @@ -2680,6 +2965,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -2731,7 +3017,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { memw &= 0x3; - inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | (memw << 7)); + inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7)); if (is_type1_transfer) inst |= (1 << 25); @@ -2757,7 +3043,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | memw); + return push_inst(compiler, inst | (sljit_uw)memw); } if (memw >= 0) @@ -2765,7 +3051,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst(compiler, inst | TYPE2_TRANSFER_IMM(memw)); + return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2777,10 +3063,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, + EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value)); compiler->patches++; #else PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value)); @@ -2804,7 +3091,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0)); @@ -2829,5 +3116,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, executable_offset, new_constant, 1); + inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c index 3f0f5fcc30b..96453b4abeb 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -48,19 +48,20 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31 }; -#define W_OP (1u << 31) -#define RD(rd) (reg_map[rd]) -#define RT(rt) (reg_map[rt]) -#define RN(rn) (reg_map[rn] << 5) -#define RT2(rt2) (reg_map[rt2] << 10) -#define RM(rm) (reg_map[rm] << 16) -#define VD(vd) (freg_map[vd]) -#define VT(vt) (freg_map[vt]) -#define VN(vn) (freg_map[vn] << 5) -#define VM(vm) (freg_map[vm] << 16) +#define W_OP ((sljit_ins)1 << 31) +#define RD(rd) ((sljit_ins)reg_map[rd]) +#define RT(rt) ((sljit_ins)reg_map[rt]) +#define RN(rn) ((sljit_ins)reg_map[rn] << 5) +#define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10) +#define RM(rm) ((sljit_ins)reg_map[rm] << 16) +#define VD(vd) ((sljit_ins)freg_map[vd]) +#define VT(vt) ((sljit_ins)freg_map[vt]) +#define VT2(vt) ((sljit_ins)freg_map[vt] << 10) +#define VN(vn) ((sljit_ins)freg_map[vn] << 5) +#define VM(vm) ((sljit_ins)freg_map[vm] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -96,8 +97,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FNEG 0x1e614000 #define FSUB 0x1e603800 #define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 #define LDP 0xa9400000 -#define LDP_PRE 0xa9c00000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 #define LDR_PRE 0xf8400c00 #define LSLV 0x9ac02000 #define LSRV 0x9ac02400 @@ -117,10 +120,12 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 #define STP 0xa9000000 +#define STP_F64 0x6d000000 #define STP_PRE 0xa9800000 #define STRB 0x38206800 #define STRBI 0x39000000 #define STRI 0xf9000000 +#define STRI_F64 0xfd000000 #define STR_FI 0x3d000000 #define STR_FR 0x3c206800 #define STUR_FI 0x3c000000 @@ -145,10 +150,10 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); - return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21)); } static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) @@ -171,14 +176,14 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= 0xfffff && diff >= -0x100000) { code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; jump->addr -= sizeof(sljit_ins); jump->flags |= PATCH_COND; return 5; } - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x7ffffff && diff >= -0x8000000) { @@ -231,8 +236,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; - sljit_s32 dst; + sljit_sw addr; + sljit_u32 dst; struct sljit_label *label; struct sljit_jump *jump; @@ -271,7 +276,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -300,7 +305,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -313,58 +318,58 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); - buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff); if (jump->flags & IS_COND) buf_ptr[-1] -= (4 << 5); break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); - buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5); break; } - SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); - SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff); dst = buf_ptr[0] & 0x1f; - buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); - buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21); if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) - buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21); if (jump->flags & PATCH_ABS64) - buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21); } while (0); jump = jump->next; } put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; + addr = (sljit_sw)put_label->label->addr; + buf_ptr = (sljit_ins*)put_label->addr; - buf_ptr[0] |= (addr & 0xffff) << 5; - buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5; + buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5; + buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5; if (put_label->flags >= 1) - buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5; + buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5; if (put_label->flags >= 2) - buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5; + buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5; put_label = put_label->next; } compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -426,11 +431,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) value >>= 1; \ } -#define LOGICAL_IMM_CHECK 0x100 +#define LOGICAL_IMM_CHECK (sljit_ins)0x100 -static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) +static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len) { - sljit_s32 negated, ones, right; + sljit_s32 negated; + sljit_u32 ones, right; sljit_uw mask, uimm; sljit_ins ins; @@ -497,30 +503,30 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm) { sljit_uw imm = (sljit_uw)simm; - sljit_s32 i, zeros, ones, first; + sljit_u32 i, zeros, ones, first; sljit_ins bitmask; /* Handling simple immediates first. */ if (imm <= 0xffff) - return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5)); if (simm < 0 && simm >= -0x10000) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if (imm <= 0xffffffffl) { if ((imm & 0xffff) == 0) - return push_inst(compiler, MOVZ | RD(dst) | ((imm >> 16) << 5) | (1 << 21)); + return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21)); if ((imm & 0xffff0000l) == 0xffff0000) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)); if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); bitmask = logical_imm(simm, 16); if (bitmask != 0) return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } bitmask = logical_imm(simm, 32); @@ -529,10 +535,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, if (simm < 0 && simm >= -0x100000000l) { if ((imm & 0xffff) == 0xffff) - return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); - return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21)); } /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */ @@ -558,10 +564,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -574,10 +580,10 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } if (first) { first = 0; - FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); } else - FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21))); simm >>= 16; } return SLJIT_SUCCESS; @@ -619,12 +625,11 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (op) { case SLJIT_MUL: - case SLJIT_NEG: case SLJIT_CLZ: case SLJIT_ADDC: case SLJIT_SUBC: @@ -639,40 +644,43 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); goto set_flags; case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = -imm; /* Fall through. */ case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); } if (imm > 0 && imm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10)); } nimm = -imm; if (nimm > 0 && nimm <= 0xfff) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10)); } if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)); } if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { CHECK_FLAGS(1 << 29); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)); } if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10)); } if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { - FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); - return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10)); } break; case SLJIT_AND: @@ -697,11 +705,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) + | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10))); } goto set_flags; case SLJIT_LSHR: @@ -712,11 +722,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s inv_bits |= 1 << 30; if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | ((sljit_ins)imm << 16) | (31 << 10))); } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) + | (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10))); } goto set_flags; default: @@ -766,41 +778,38 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (!(flags & INT_OP)) inv_bits |= 1 << 22; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV32: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + /* fallthrough */ case SLJIT_MOV_U32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) - return SLJIT_SUCCESS; return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if ((flags & INT_OP) && dst == arg2) - return SLJIT_SUCCESS; return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); case SLJIT_NOT: SLJIT_ASSERT(arg1 == TMP_REG1); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); break; /* Set flags. */ - case SLJIT_NEG: - SLJIT_ASSERT(arg1 == TMP_REG1); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if (flags & SET_FLAGS) - inv_bits |= 1 << 29; - return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: @@ -852,7 +861,7 @@ set_flags: #define INT_SIZE 0x2 #define WORD_SIZE 0x3 -#define MEM_SIZE_SHIFT(flags) ((flags) & 0x3) +#define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3) static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) @@ -872,35 +881,34 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, STRB | type | RT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { - if ((argw >> shift) <= 0xfff) { - return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | (argw << (10 - shift))); - } + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | (argw << 10)); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, tmp_reg, argw)); @@ -915,39 +923,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, offs, prev, saved_regs_size; + sljit_s32 prev, fprev, saved_regs_size, i, tmp; + sljit_s32 word_arg_count = 0; + sljit_ins offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size + saved_regs_size; + local_size = (local_size + saved_regs_size + 0xf) & ~0xf; + compiler->local_size = local_size; - FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + if (local_size <= 512) { + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size = 0; + } else { + saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf; -#ifdef _WIN32 - if (local_size >= 4096) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); - else if (local_size > 256) - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10))); -#endif + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10))); + offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3); + local_size -= saved_regs_size; + SLJIT_ASSERT(local_size > 0); + } - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -957,84 +970,124 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } + fprev = -1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + if (prev != -1) - FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); - - FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10))); - - args = get_arg_count(arg_types); - - if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); + arg_types >>= SLJIT_ARG_SHIFT; #ifdef _WIN32 - if (local_size >= 4096) { + if (local_size > 4096) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); +#endif /* _WIN32 */ + + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + tmp++; + } + word_arg_count++; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + +#ifdef _WIN32 + if (local_size > 4096) { if (local_size < 4 * 4096) { /* No need for a loop. */ - if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } if (local_size >= 2 * 4096) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - local_size -= 4096; - } + if (local_size >= 3 * 4096) { + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - local_size -= 4096; + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + } } else { - FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22))); - FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10))); + FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5))); + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10))); FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */)); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - - local_size &= 0xfff; } - if (local_size > 256) { - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12))); + local_size &= 0xfff; - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + if (local_size > 0) + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP))); + else + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); } - else if (local_size > 256) { - FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1))); - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10))); + + if (local_size > 0) { + if (local_size <= 512) + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); + else { + if (local_size >= 4096) + local_size = (1 << (22 - 10)); + + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } } - else if (local_size > 0) - FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12))); #else /* !_WIN32 */ /* The local_size does not include saved registers size. */ - if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); - local_size &= 0xfff; + if (local_size != 0) { + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + + if (local_size > 512 || local_size == 0) { + if (local_size != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + + FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else + FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15))); } - if (local_size != 0) - FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); #endif /* _WIN32 */ - return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, @@ -1048,57 +1101,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); - compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf); + compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_s32 local_size; - sljit_s32 i, tmp, offs, prev, saved_regs_size; + sljit_s32 local_size, prev, fprev, i, tmp; + sljit_ins offs; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + local_size = compiler->local_size; - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2); - if (saved_regs_size & 0x8) - saved_regs_size += sizeof(sljit_sw); - - local_size = compiler->local_size - saved_regs_size; - - /* Load LR as early as possible. */ - if (local_size == 0) + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); - else if (local_size < 63 * sizeof(sljit_sw)) { - FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | (local_size << (15 - 3)))); - } - else { + + if (local_size > 512) { + local_size -= 512; if (local_size > 0xfff) { - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) + | (((sljit_ins)local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; } - if (local_size) - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10))); - FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10))); + local_size = 512; } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3); prev = -1; - offs = 2 << 15; - for (i = SLJIT_S0; i >= tmp; i--) { + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) { if (prev == -1) { prev = i; continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } @@ -1108,15 +1153,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs)); - offs += 2 << 15; + offs -= (sljit_ins)2 << 15; prev = -1; } - if (prev != -1) - FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5))); + fprev = -1; + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + if (fprev == -1) { + fprev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs)); + offs -= (sljit_ins)2 << 15; + fprev = -1; + } + + if (fprev != -1) + FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10))); + + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); + + /* This and the next call/jump instruction can be executed parallelly. */ + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); - /* These two can be executed in parallel. */ - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10))); return push_inst(compiler, RET | RN(TMP_LR)); } @@ -1126,7 +1206,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { - sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1171,13 +1251,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { /* Both operands are registers. */ if (dst_r != TMP_REG1 && FAST_IS_REG(src)) - return emit_op_imm(compiler, op | ((op_flags & SLJIT_I32_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src); switch (op) { case SLJIT_MOV: @@ -1210,6 +1290,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_u32)srcw; break; case SLJIT_MOV_S32: + case SLJIT_MOV32: mem_flags = INT_SIZE | SIGNED; if (src & SLJIT_IMM) srcw = (sljit_s32)srcw; @@ -1235,14 +1316,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op_flags & SLJIT_I32_OP) { + if (op_flags & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2)); src = TMP_REG2; @@ -1263,24 +1341,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 dst_r, flags, mem_flags; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (dst == SLJIT_UNUSED) + if (dst == TMP_REG1) flags |= UNUSED_RETURN; if (src1 & SLJIT_MEM) { @@ -1310,6 +1385,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1363,8 +1452,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1391,34 +1481,34 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10))); return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1)); } arg &= REG_MASK; - if (arg == SLJIT_UNUSED) { + if (!arg) { FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift))); argw = (argw >> shift) & 0xfff; - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { if ((argw >> shift) <= 0xfff) - return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | (argw << (10 - shift))); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | ((argw >> 12) << 10))); + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); argw = ((argw & 0xfff) >> shift); - return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | (argw << 10)); + return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10)); } } if (argw <= 255 && argw >= -256) - return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1)); @@ -1429,13 +1519,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); src = TMP_FREG1; } @@ -1451,7 +1541,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) inv_bits |= W_OP; @@ -1471,7 +1561,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1479,8 +1569,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); @@ -1499,7 +1589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; sljit_ins inv_bits; CHECK_ERROR(); @@ -1507,7 +1597,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { @@ -1531,7 +1621,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_F32_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); break; } @@ -1545,8 +1635,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_r, mem_flags = (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; + sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; CHECK_ERROR(); CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1605,7 +1695,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: @@ -1616,10 +1706,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x0; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x2; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x3; @@ -1645,15 +1745,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x6; @@ -1709,9 +1811,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -1724,7 +1832,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; - sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1775,7 +1883,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; @@ -1786,8 +1894,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1825,7 +1950,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_OP; mem_flags = INT_SIZE; } @@ -1849,14 +1974,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0; + sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0; sljit_ins cc; CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - if (dst_reg & SLJIT_I32_OP) + if (dst_reg & SLJIT_32) srcw = (sljit_s32)srcw; FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; @@ -1864,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil } cc = get_cc(compiler, type & 0xff); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); } @@ -1891,17 +2016,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile break; case SLJIT_MOV_S8: sign = 1; + /* fallthrough */ case SLJIT_MOV_U8: inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S16: sign = 1; + /* fallthrough */ case SLJIT_MOV_U16: inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400; break; case SLJIT_MOV_S32: sign = 1; + /* fallthrough */ case SLJIT_MOV_U32: + case SLJIT_MOV32: inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400; break; default: @@ -1916,7 +2045,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, @@ -1936,7 +2065,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil inst = STUR_FI | 0x80000400; - if (!(type & SLJIT_F32_OP)) + if (!(type & SLJIT_32)) inst |= 0x40000000; if (!(type & SLJIT_MEM_STORE)) @@ -1945,7 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil if (type & SLJIT_MEM_PRE) inst |= 0x800; - return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); + return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) @@ -1955,11 +2084,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c CHECK_ERROR(); CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); - - SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; + /* Not all instruction forms support accessing SP register. */ if (offset <= 0xffffff && offset >= -0xffffff) { ins = ADDI; if (offset < 0) { @@ -1968,13 +2097,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c } if (offset <= 0xfff) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10))); else { - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22))); offset &= 0xfff; if (offset != 0) - FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10))); + FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10))); } } else { @@ -2002,7 +2131,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value)); if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); @@ -2034,17 +2163,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; - sljit_s32 dst; + sljit_u32 dst; SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0); dst = inst[0] & 0x1f; SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); - inst[0] = MOVZ | dst | ((new_target & 0xffff) << 5); - inst[1] = MOVK | dst | (((new_target >> 16) & 0xffff) << 5) | (1 << 21); - inst[2] = MOVK | dst | (((new_target >> 32) & 0xffff) << 5) | (2 << 21); - inst[3] = MOVK | dst | ((new_target >> 48) << 5) | (3 << 21); + inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5); + inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21); SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); @@ -2053,5 +2182,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c index e35dbe99b3a..ed21ea7daa5 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -50,40 +50,42 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 1, 2, 3, 4, 5, 6, 7 + 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7 }; #define COPY_BITS(src, from, to, bits) \ - ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to)) + +#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm)) /* Thumb16 encodings. */ -#define RD3(rd) (reg_map[rd]) -#define RN3(rn) (reg_map[rn] << 3) -#define RM3(rm) (reg_map[rm] << 6) -#define RDN3(rdn) (reg_map[rdn] << 8) -#define IMM3(imm) (imm << 6) -#define IMM8(imm) (imm) +#define RD3(rd) ((sljit_ins)reg_map[rd]) +#define RN3(rn) ((sljit_ins)reg_map[rn] << 3) +#define RM3(rm) ((sljit_ins)reg_map[rm] << 6) +#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8) +#define IMM3(imm) ((sljit_ins)imm << 6) +#define IMM8(imm) ((sljit_ins)imm) /* Thumb16 helpers. */ #define SET_REGS44(rd, rn) \ - ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) + (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4)) #define IS_2_LO_REGS(reg1, reg2) \ (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) #define IS_3_LO_REGS(reg1, reg2, reg3) \ (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) /* Thumb32 encodings. */ -#define RD4(rd) (reg_map[rd] << 8) -#define RN4(rn) (reg_map[rn] << 16) -#define RM4(rm) (reg_map[rm]) -#define RT4(rt) (reg_map[rt] << 12) -#define DD4(dd) (freg_map[dd] << 12) -#define DN4(dn) (freg_map[dn] << 16) -#define DM4(dm) (freg_map[dm]) +#define RD4(rd) ((sljit_ins)reg_map[rd] << 8) +#define RN4(rn) ((sljit_ins)reg_map[rn] << 16) +#define RM4(rm) ((sljit_ins)reg_map[rm]) +#define RT4(rt) ((sljit_ins)reg_map[rt] << 12) +#define DD4(dd) ((sljit_ins)freg_map[dd] << 12) +#define DN4(dn) ((sljit_ins)freg_map[dn] << 16) +#define DM4(dm) ((sljit_ins)freg_map[dm]) #define IMM5(imm) \ - (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) + (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6)) #define IMM12(imm) \ - (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff)) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -100,7 +102,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define ADDSI8 0x3000 #define ADD_W 0xeb000000 #define ADDWI 0xf2000000 -#define ADD_SP 0xb000 +#define ADD_SP 0x4485 +#define ADD_SP_I 0xb000 #define ADD_W 0xeb000000 #define ADD_WI 0xf1000000 #define ANDI 0xf0000000 @@ -126,6 +129,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define EORS 0x4040 #define EOR_W 0xea800000 #define IT 0xbf00 +#define LDR_SP 0x9800 +#define LDR 0xf8d00000 #define LDRI 0xf8500800 #define LSLS 0x4080 #define LSLSI 0x0000 @@ -168,13 +173,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SUBSI8 0x3800 #define SUB_W 0xeba00000 #define SUBWI 0xf2a00000 -#define SUB_SP 0xb080 +#define SUB_SP_I 0xb080 #define SUB_WI 0xf1a00000 #define SXTB 0xb240 #define SXTB_W 0xfa4ff080 #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define TSTI 0xf0000f00 +#define TST_W 0xea000f00 #define UDIV 0xfbb0f0f0 #define UMULL 0xfba00000 #define UXTB 0xb2c0 @@ -188,12 +195,15 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define VCVT_F64_F32 0xeeb70ac0 #define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 +#define VLDR_F32 0xed100a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 #define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 +#define VPOP 0xecbd0b00 +#define VPUSH 0xed2d0b00 #define VSTR_F32 0xed000a00 #define VSUB_F32 0xee300a40 @@ -204,7 +214,7 @@ static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst) ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16)); FAIL_IF(!ptr); - *ptr = inst; + *ptr = (sljit_u16)(inst); compiler->size++; return SLJIT_SUCCESS; } @@ -213,8 +223,8 @@ static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) { sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); - *ptr++ = inst >> 16; - *ptr = inst; + *ptr++ = (sljit_u16)(inst >> 16); + *ptr = (sljit_u16)(inst); compiler->size += 2; return SLJIT_SUCCESS; } @@ -229,12 +239,12 @@ static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) { - sljit_s32 dst = inst[1] & 0x0f00; + sljit_ins dst = inst[1] & 0x0f00; SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); - inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); - inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); - inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); - inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); + inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1)); + inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff)); + inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1)); + inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16)); } static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) @@ -318,24 +328,24 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw case 1: /* Encoding T1 of 'B' instruction */ SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); - jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff)); return; case 2: /* Encoding T3 of 'B' instruction */ SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); - jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); - jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1)); + jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff)); return; case 3: SLJIT_ASSERT(jump->flags & IS_COND); - *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8); diff--; type = 5; break; case 4: /* Encoding T2 of 'B' instruction */ SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); - jump_inst[0] = 0xe000 | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff)); return; } @@ -345,8 +355,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw s = (diff >> 23) & 0x1; j1 = (~(diff >> 22) ^ s) & 0x1; j2 = (~(diff >> 21) ^ s) & 0x1; - jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); - jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10)); + jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff)); /* The others have a common form. */ if (type == 5) /* Encoding T4 of 'B' instruction */ @@ -405,7 +415,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == half_count) { label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == half_count) { @@ -433,7 +443,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == half_count) { label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -457,7 +467,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16); code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -592,7 +602,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } if (flags & (ARG1_IMM | ARG2_IMM)) { - reg = (flags & ARG2_IMM) ? arg1 : arg2; + reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2); imm = (flags & ARG2_IMM) ? arg2 : arg1; switch (flags & 0xffff) { @@ -610,8 +620,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ break; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - nimm = -(sljit_sw)imm; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); @@ -633,18 +643,18 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; imm = get_imm(imm); if (imm != INVALID_IMM) return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; case SLJIT_SUB: - /* SUB operation can be replaced by ADD because of the negative carry flag. */ - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) { if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); @@ -659,11 +669,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, CMPI_W | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); + break; } - nimm = -(sljit_sw)imm; + nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); @@ -685,11 +696,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - nimm = get_imm(-(sljit_sw)imm); + nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = get_imm(imm); @@ -699,8 +711,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_AND: nimm = get_imm(imm); if (nimm != INVALID_IMM) - return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - imm = get_imm(imm); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(~imm); if (imm != INVALID_IMM) return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; @@ -708,7 +720,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(imm); if (nimm != INVALID_IMM) return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); - imm = get_imm(imm); + imm = get_imm(~imm); if (imm != INVALID_IMM) return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; @@ -752,12 +764,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (flags & ARG2_IMM) { imm = arg2; arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; - FAIL_IF(load_immediate(compiler, arg2, imm)); + FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm)); } else { imm = arg1; arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; - FAIL_IF(load_immediate(compiler, arg1, imm)); + FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm)); } SLJIT_ASSERT(arg1 != arg2); @@ -768,9 +780,10 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); - if (dst == arg2) + if (dst == (sljit_s32)arg2) return SLJIT_SUCCESS; return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); case SLJIT_MOV_U8: @@ -803,18 +816,19 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); return SLJIT_SUCCESS; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); - if (dst == arg1 && !(flags & SET_FLAGS)) + if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ADDC: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & UNUSED_RETURN) { if (IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); @@ -824,7 +838,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUBC: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MUL: @@ -836,29 +851,29 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* cmp TMP_REG2, dst asr #31. */ return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); case SLJIT_AND: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); - return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_OR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_XOR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SHL: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_LSHR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ASHR: - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); } @@ -951,20 +966,22 @@ static const sljit_ins sljit_mem32[13] = { /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) { + sljit_uw imm; + if (value >= 0) { if (value <= 0xfff) return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm); } else { value = -value; if (value <= 0xfff) return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); - value = get_imm(value); - if (value != INVALID_IMM) - return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); + imm = get_imm((sljit_uw)value); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm); } return SLJIT_ERR_UNSUPPORTED; } @@ -980,13 +997,13 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { - tmp = get_imm(argw & ~0xfff); + tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (tmp != INVALID_IMM) { FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp)); return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); } - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); @@ -999,11 +1016,11 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit if (!argw && IS_3_LO_REGS(reg, arg, other_r)) return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4)); } if (argw > 0xfff) { - tmp = get_imm(argw & ~0xfff); + tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); if (tmp != INVALID_IMM) { push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1011,7 +1028,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - tmp = get_imm(-argw & ~0xff); + tmp = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); if (tmp != INVALID_IMM) { push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1037,21 +1054,21 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } if (tmp < 3) - return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp))); + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp))); } else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) { /* SP based immediate. */ - return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)); + return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2)); } if (argw >= 0 && argw <= 0xfff) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw); else if (argw < 0 && argw >= -0xff) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw); SLJIT_ASSERT(arg != tmp_reg); - FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); if (IS_3_LO_REGS(reg, arg, tmp_reg)) return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); @@ -1065,115 +1082,204 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, size, i, tmp; - sljit_ins push = 0; -#ifdef _WIN32 - sljit_uw imm; + sljit_s32 size, i, tmp, word_arg_count, saved_arg_count; + sljit_uw offset; + sljit_uw imm = 0; +#ifdef __SOFTFP__ + sljit_u32 float_arg_count; +#else + sljit_u32 old_offset, f32_offset; + sljit_u32 remap[3]; + sljit_u32 *remap_ptr = remap; #endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - push |= 1 << reg_map[i]; + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) + imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - push |= 1 << reg_map[i]; + imm |= (sljit_uw)1 << reg_map[i]; - FAIL_IF((push & 0xff00) - ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | (1 << 8) | push)); + /* At least two registers must be set for PUSH_W and one for PUSH instruction. */ + FAIL_IF((imm & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | imm) + : push_inst16(compiler, PUSH | (1 << 8) | imm)); /* Stack must be aligned to 8 bytes: (LR, R4) */ size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - local_size = ((size + local_size + 7) & ~7) - size; - compiler->local_size = local_size; -#ifdef _WIN32 - if (local_size >= 256) { - if (local_size > 4096) - imm = get_imm(4096); - else - imm = get_imm(local_size & ~0xff); - - SLJIT_ASSERT(imm != INVALID_IMM); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm)); - } -#else - if (local_size > 0) { - if (local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); - } -#endif - - args = get_arg_count(arg_types); - - if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); - -#ifdef _WIN32 - if (local_size >= 256) { - if (local_size > 4096) { - imm = get_imm(4096); - SLJIT_ASSERT(imm != INVALID_IMM); - - if (local_size < 4 * 4096) { - if (local_size > 2 * 4096) { - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - local_size -= 4096; - } - - if (local_size > 2 * 4096) { - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - local_size -= 4096; - } - - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - local_size -= 4096; - - SLJIT_ASSERT(local_size > 0); - } - else { - FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1)); - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - SLJIT_ASSERT(reg_map[SLJIT_R3] < 7); - FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1)); - FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff))); - - local_size &= 0xfff; - - if (local_size != 0) - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1))); - } - - if (local_size >= 256) { - imm = get_imm(local_size & ~0xff); - SLJIT_ASSERT(imm != INVALID_IMM); - - FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm)); - } + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((size & SSIZE_OF(sw)) != 0) { + FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2))); + size += SSIZE_OF(sw); } - local_size &= 0xff; - FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size)); - - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1))); + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPUSH | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + } } - else if (local_size > 0) - FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size)); + + local_size = ((size + local_size + 0x7) & ~0x7) - size; + compiler->local_size = local_size; + + arg_types >>= SLJIT_ARG_SHIFT; + word_arg_count = 0; + saved_arg_count = 0; +#ifdef __SOFTFP__ + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + offset = 0; + float_arg_count = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + offset += sizeof(sljit_f64) - sizeof(sljit_sw); + break; + case SLJIT_ARG_TYPE_F32: + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10))); + else + FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + float_arg_count++; + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + SLJIT_ASSERT(reg_map[tmp] <= 7); + + if (offset < 4 * sizeof(sljit_sw)) + FAIL_IF(push_inst16(compiler, MOV | RD3(tmp) | (offset << 1))); + else + FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + break; + } + + offset += sizeof(sljit_sw); + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = offset; +#else + offset = SLJIT_FR0; + old_offset = SLJIT_FR0; + f32_offset = 0; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | SLJIT_32 | DD4(offset) | DM4(old_offset); + old_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + *remap_ptr++ = VMOV_F32 | 0x20 | DD4(offset) | DM4(f32_offset); + f32_offset = 0; + } else { + if (offset != old_offset) + *remap_ptr++ = VMOV_F32 | DD4(offset) | DM4(old_offset); + f32_offset = old_offset; + old_offset++; + } + offset++; + break; + default: + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + + word_arg_count++; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap)); + + while (remap_ptr > remap) + FAIL_IF(push_inst32(compiler, *(--remap_ptr))); #endif +#ifdef _WIN32 + if (local_size >= 4096) { + imm = get_imm(4096); + SLJIT_ASSERT(imm != INVALID_IMM); + + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + + if (local_size < 4 * 4096) { + if (local_size > 2 * 4096) { + if (local_size > 3 * 4096) { + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1)); + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm)); + FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1)); + FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff))); + } + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + local_size &= 0xfff; + } + + if (local_size >= 256) { + SLJIT_ASSERT(local_size < 4096); + + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + + FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP))); + } else if (local_size > 0) + FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size)); +#else /* !_WIN32 */ + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size)); + } +#endif /* _WIN32 */ + return SLJIT_SUCCESS; } @@ -1188,37 +1294,143 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((size + local_size + 7) & ~7) - size; + + if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) + size += SSIZE_OF(sw); + + compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) { - sljit_s32 i, tmp; - sljit_ins pop = 0; + sljit_uw imm2; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + /* The TMP_REG1 register must keep its value. */ + if (imm <= (127u << 2)) + return push_inst16(compiler, ADD_SP_I | (imm >> 2)); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm)); - if (compiler->local_size > 0) { - if (compiler->local_size <= (127 << 2)) - FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); - else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); + imm2 = get_imm(imm); + + if (imm2 != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2); + + FAIL_IF(load_immediate(compiler, TMP_REG2, imm)); + return push_inst16(compiler, ADD_SP | RN3(TMP_REG2)); +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) +{ + sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 lr_dst = TMP_PC; + sljit_uw reg_list; + + SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128); + + local_size = compiler->local_size; + fscratches = compiler->fscratches; + fsaveds = compiler->fsaveds; + + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) { + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1))); + } else { + if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) + FAIL_IF(push_inst32(compiler, VPOP | DD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1))); + if (fsaveds > 0) + FAIL_IF(push_inst32(compiler, VPOP | DD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1))); + } + + local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7; } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) - pop |= 1 << reg_map[i]; + if (frame_size < 0) { + lr_dst = TMP_REG2; + frame_size = 0; + } else if (frame_size > 0) + lr_dst = 0; + + reg_list = 0; + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) - pop |= 1 << reg_map[i]; + reg_list |= (sljit_uw)1 << reg_map[i]; - return (pop & 0xff00) - ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | (1 << 8) | pop); + if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) { + /* The local_size does not include the saved registers. */ + local_size += SSIZE_OF(sw); + + if (reg_list != 0) + local_size += SSIZE_OF(sw); + + if (frame_size > local_size) + FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)(frame_size - local_size) >> 2))); + else if (frame_size < local_size) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size))); + + if (reg_list == 0) + return SLJIT_SUCCESS; + + if (compiler->saveds > 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); + lr_dst = SLJIT_S0; + } else { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); + lr_dst = SLJIT_FIRST_SAVED_REG; + } + + frame_size -= 2 * SSIZE_OF(sw); + + if (reg_map[lr_dst] <= 7) + return push_inst16(compiler, STR_SP | 0x800 | RDN3(lr_dst) | (sljit_uw)(frame_size >> 2)); + + return push_inst32(compiler, LDR | RT4(lr_dst) | RN4(SLJIT_SP) | (sljit_uw)frame_size); + } + + if (local_size > 0) + FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size)); + + if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) { + if (lr_dst == TMP_PC) + reg_list |= 1u << 8; + + /* At least one register must be set for POP instruction. */ + SLJIT_ASSERT(reg_list != 0); + + FAIL_IF(push_inst16(compiler, POP | reg_list)); + } else { + if (lr_dst != 0) { + if (reg_list == 0) + return push_inst32(compiler, 0xf85d0b04 | RT4(lr_dst)); + + reg_list |= (sljit_uw)1 << reg_map[lr_dst]; + } + + /* At least two registers must be set for POP_W instruction. */ + SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0); + + FAIL_IF(push_inst32(compiler, POP_W | reg_list)); + } + + if (frame_size > 0) + return push_inst16(compiler, SUB_SP_I | (((sljit_uw)frame_size - sizeof(sljit_sw)) >> 2)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + return emit_stack_frame_release(compiler, 0); } /* --------------------------------------------------------------------- */ @@ -1250,8 +1462,8 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__) - sljit_sw saved_reg_list[3]; - sljit_sw saved_reg_count; + sljit_uw saved_reg_list[3]; + sljit_uw saved_reg_count; #endif CHECK_ERROR(); @@ -1266,10 +1478,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 8) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 16) - | reg_map[SLJIT_R1]); + | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)); #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__) case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: @@ -1314,10 +1523,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1))); FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1))); FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv)))); #elif defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif @@ -1356,7 +1565,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) { @@ -1364,6 +1573,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -1394,12 +1604,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } if (src & SLJIT_IMM) - FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw)); + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw)); else if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1)); } else { if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG2, src); + return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src); dst_r = src; } @@ -1409,14 +1619,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); } - if (op == SLJIT_NEG) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); - } - flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; if (src & SLJIT_MEM) { @@ -1424,7 +1626,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile src = TMP_REG1; } - emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, src); + emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src); if (SLJIT_UNLIKELY(dst & SLJIT_MEM)) return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); @@ -1439,17 +1641,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 dst_reg, flags, src2_reg; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + if (dst == TMP_REG1) + flags |= UNUSED_RETURN; + if (src1 & SLJIT_IMM) flags |= ARG1_IMM; else if (src1 & SLJIT_MEM) { @@ -1469,16 +1671,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile else src2w = src2; - if (dst == SLJIT_UNUSED) - flags |= UNUSED_RETURN; - - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w); if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1521,7 +1734,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1540,22 +1753,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_uw imm; - sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD)); SLJIT_ASSERT(arg & SLJIT_MEM); /* Fast loads and stores. */ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6))); arg = SLJIT_MEM | TMP_REG1; argw = 0; } if ((arg & REG_MASK) && (argw & 0x3) == 0) { if (!(argw & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)argw >> 2)); if (!(-argw & ~0x3fc)) - return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | ((sljit_uw)-argw >> 2)); } if (arg & REG_MASK) { @@ -1563,20 +1776,22 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, FAIL_IF(compiler->error); return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); } - imm = get_imm(argw & ~0x3fc); + + imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } - imm = get_imm(-argw & ~0x3fc); + + imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc); if (imm != INVALID_IMM) { argw = -argw; FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); - return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2)); } } - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw)); if (arg & REG_MASK) FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); @@ -1586,14 +1801,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; } - FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | DD4(TMP_FREG1) | DM4(src))); if (FAST_IS_REG(dst)) return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); @@ -1608,7 +1823,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (FAST_IS_REG(src)) FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); @@ -1617,14 +1832,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw)); FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); } - FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_32) | DD4(dst_r) | DM4(TMP_FREG1))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1632,19 +1847,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_F32_OP) | DD4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | DD4(src1) | DM4(src2))); return push_inst32(compiler, VMRS); } @@ -1656,16 +1871,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw); src = dst_r; } @@ -1673,25 +1888,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DM4(src))); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_32; break; } if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -1708,36 +1923,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w); src2 = TMP_FREG2; } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_F32_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; } if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); } #undef FPU_LOAD @@ -1776,10 +1991,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x1; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x3; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x2; @@ -1805,15 +2030,17 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; + /* fallthrough */ case SLJIT_UNORDERED_F64: return 0x6; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; + /* fallthrough */ case SLJIT_ORDERED_F64: return 0x7; @@ -1874,113 +2101,126 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #ifdef __SOFTFP__ -static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; - sljit_s32 word_arg_offset = 0; - sljit_s32 float_arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; + sljit_u32 word_arg_offset = 0; + sljit_u32 float_arg_count = 0; sljit_s32 types = 0; - sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u32 src_offset = 4 * sizeof(sljit_sw); sljit_u8 offsets[4]; + sljit_u8 *offset_ptr = offsets; if (src && FAST_IS_REG(*src)) - src_offset = reg_map[*src] * sizeof(sljit_sw); + src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f32); - arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) + offset += sizeof(sljit_sw); + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_f64); - arg_count++; + case SLJIT_ARG_TYPE_F32: + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + *offset_ptr++ = (sljit_u8)offset; + offset += sizeof(sljit_sw); word_arg_offset += sizeof(sljit_sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } - if (stack_offset > 16) - FAIL_IF(push_inst16(compiler, SUB_SP | (((stack_offset - 16) + 0x7) & ~0x7) >> 2)); + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + /* Keep lr register on the stack. */ + if (is_tail_call) + offset += sizeof(sljit_sw); + + offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7; + + *extra_space = offset; + + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset)); + else + FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2))); + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, -1)); + *extra_space = 0; + } SLJIT_ASSERT(reg_map[TMP_REG1] == 12); /* Process arguments in reversed direction. */ while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - float_arg_count--; - stack_offset = offsets[arg_count]; - - if (stack_offset < 16) { - if (src_offset == stack_offset) { - FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); - *src = TMP_REG1; - } - FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); - } else - FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - arg_count--; float_arg_count--; - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT((stack_offset & 0x7) == 0); + SLJIT_ASSERT((offset & 0x7) == 0); - if (stack_offset < 16) { - if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) { FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); *src = TMP_REG1; } - FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count)); } else - FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count--; + offset = *(--offset_ptr); + + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10))); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) + | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); break; default: - arg_count--; word_arg_offset -= sizeof(sljit_sw); - stack_offset = offsets[arg_count]; + offset = *(--offset_ptr); - SLJIT_ASSERT(stack_offset >= word_arg_offset); + SLJIT_ASSERT(offset >= word_arg_offset); - if (stack_offset != word_arg_offset) { - if (stack_offset < 16) { - if (src_offset == stack_offset) { + if (offset != word_arg_offset) { + if (offset < 4 * sizeof(sljit_sw)) { + if (src_offset == offset) { FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); *src = TMP_REG1; } else if (src_offset == word_arg_offset) { - *src = 1 + (stack_offset >> 2); - src_offset = stack_offset; + *src = (sljit_s32)(1 + (offset >> 2)); + src_offset = offset; } - FAIL_IF(push_inst16(compiler, MOV | (stack_offset >> 2) | (word_arg_offset << 1))); + FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1))); } else - FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((stack_offset - 16) >> 2))); + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2))); } break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -1988,83 +2228,48 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_s32 stack_size = 0; - - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) - FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_size & 0x7) - stack_size += sizeof(sljit_sw); - stack_size += sizeof(sljit_f64); - break; - default: - stack_size += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - if (stack_size <= 16) - return SLJIT_SUCCESS; - - return push_inst16(compiler, ADD_SP | ((((stack_size - 16) + 0x7) & ~0x7) >> 2)); + return SLJIT_SUCCESS; } #else static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) { - sljit_u32 remap = 0; - sljit_u32 offset = 0; - sljit_u32 new_offset, mask; + sljit_u32 offset = SLJIT_FR0; + sljit_u32 new_offset = SLJIT_FR0; + sljit_u32 f32_offset = 0; /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { - new_offset = 0; - mask = 1; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | DD4(new_offset) | DM4(offset))); - while (remap & mask) { + new_offset++; + offset++; + break; + case SLJIT_ARG_TYPE_F32: + if (f32_offset != 0) { + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(f32_offset) | DM4(offset))); + f32_offset = 0; + } else { + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | DD4(new_offset) | DM4(offset))); + f32_offset = new_offset; new_offset++; - mask <<= 1; } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | DD4((new_offset >> 1) + 1) - | ((new_offset & 0x1) ? 0x400000 : 0) | DM4((offset >> 1) + 1))); - - offset += 2; + offset++; + break; } - else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { - new_offset = 0; - mask = 3; - - while (remap & mask) { - new_offset += 2; - mask <<= 2; - } - remap |= mask; - - if (offset != new_offset) - FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_F32_OP | DD4((new_offset >> 1) + 1) | DM4((offset >> 1) + 1))); - - offset += 2; - } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -2077,13 +2282,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile { #ifdef __SOFTFP__ struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; #endif CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2093,9 +2303,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); + return jump; + } + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); return jump; #else + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2132,7 +2362,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); jump->addr = compiler->size; @@ -2143,16 +2373,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { +#ifdef __SOFTFP__ + sljit_u32 extra_space = (sljit_u32)type; +#endif + CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#ifdef __SOFTFP__ if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src))); + src = TMP_REG1; + } + +#ifdef __SOFTFP__ + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); + + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -2161,8 +2404,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + + FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + + if (type & SLJIT_CALL_RETURN) + return push_inst16(compiler, BX | RN3(TMP_REG2)); + } + + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); return softfloat_post_call_with_args(compiler, arg_types); #else /* !__SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { + /* ldmia sp!, {..., lr} */ + FAIL_IF(emit_stack_frame_release(compiler, -1)); + type = SLJIT_JUMP; + } + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -2236,7 +2497,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; cc = get_cc(compiler, type & 0xff); @@ -2254,13 +2515,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); } - tmp = get_imm(srcw); + tmp = get_imm((sljit_uw)srcw); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); } - tmp = get_imm(~srcw); + tmp = get_imm(~(sljit_uw)srcw); if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); @@ -2295,6 +2556,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: case SLJIT_MOV_P: flags = WORD_SIZE; break; @@ -2329,7 +2591,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile else memw = -memw; - return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | memw); + return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2346,7 +2608,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; - PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value)); if (dst & SLJIT_MEM) PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); @@ -2388,5 +2650,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c index a90345f1f80..1a06b17d129 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -73,50 +73,49 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); #endif /* SLJIT_MIPS_REV >= 1 */ - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: @@ -438,92 +437,119 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space) { - sljit_s32 stack_offset = 0; - sljit_s32 arg_count = 0; + sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN; + sljit_u32 offset = 0; sljit_s32 float_arg_count = 0; sljit_s32 word_arg_count = 0; sljit_s32 types = 0; - sljit_s32 arg_count_save, types_save; sljit_ins prev_ins = NOP; sljit_ins ins = NOP; sljit_u8 offsets[4]; + sljit_u8 *offsets_ptr = offsets; SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + + /* See ABI description in sljit_emit_enter. */ while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + *offsets_ptr = (sljit_u8)offset; - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - offsets[arg_count] = (sljit_u8)stack_offset; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (offset & 0x7) { + offset += sizeof(sljit_sw); + *offsets_ptr = (sljit_u8)offset; + } - if (word_arg_count == 0 && arg_count <= 1) - offsets[arg_count] = 254 + arg_count; + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - stack_offset += sizeof(sljit_f32); - arg_count++; + offset += sizeof(sljit_f64); float_arg_count++; break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - offsets[arg_count] = (sljit_u8)stack_offset; + case SLJIT_ARG_TYPE_F32: + if (word_arg_count == 0 && float_arg_count <= 1) + *offsets_ptr = (sljit_u8)(254 + float_arg_count); - if (word_arg_count == 0 && arg_count <= 1) - offsets[arg_count] = 254 + arg_count; - - stack_offset += sizeof(sljit_f64); - arg_count++; + offset += sizeof(sljit_f32); float_arg_count++; break; default: - offsets[arg_count] = (sljit_u8)stack_offset; - stack_offset += sizeof(sljit_sw); - arg_count++; + offset += sizeof(sljit_sw); word_arg_count++; break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; + offsets_ptr++; } - /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ - if (stack_offset > 16) - FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + /* Stack is aligned to 16 bytes. */ + SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw)); - types_save = types; - arg_count_save = arg_count; + if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) { + if (is_tail_call) { + offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf; + FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins)); + *extra_space = offset; + } else { + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + *extra_space = 16; + } + } else { + if (is_tail_call) + FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins)); + *extra_space = 0; + } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - if (offsets[arg_count] < 254) - ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + --offsets_ptr; + + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + + /* Must be preceded by at least one other argument, + * and its starting offset must be 8 because of alignment. */ + SLJIT_ASSERT((*offsets_ptr >> 2) == 2); + + prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11); + ins = MFC1 | TA(7) | FS(float_arg_count); + } else if (*offsets_ptr < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + float_arg_count--; break; - case SLJIT_ARG_TYPE_F64: - arg_count--; - if (offsets[arg_count] < 254) - ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + case SLJIT_ARG_TYPE_F32: + if (*offsets_ptr < 4 * sizeof (sljit_sw)) + ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count); + else if (*offsets_ptr < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr); + else if (*offsets_ptr == 254) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + float_arg_count--; break; default: - if (offsets[arg_count - 1] >= 16) - ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]); - else if (arg_count != word_arg_count) - ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); - else if (arg_count == 1) + if (*offsets_ptr >= 4 * sizeof (sljit_sw)) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr); + else if ((*offsets_ptr >> 2) != word_arg_count - 1) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2)); + else if (*offsets_ptr == 0) ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4); - arg_count--; word_arg_count--; break; } @@ -535,45 +561,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t ins = NOP; } - types >>= SLJIT_DEF_SHIFT; - } - - types = types_save; - arg_count = arg_count_save; - - while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - arg_count--; - if (offsets[arg_count] == 254) - ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); - else if (offsets[arg_count] < 16) - ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); - break; - case SLJIT_ARG_TYPE_F64: - arg_count--; - if (offsets[arg_count] == 254) - ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); - else if (offsets[arg_count] < 16) { - if (prev_ins != NOP) - FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); - prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); - ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw)); - } - break; - default: - arg_count--; - break; - } - - if (ins != NOP) { - if (prev_ins != NOP) - FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); - prev_ins = ins; - ins = NOP; - } - - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } *ins_ptr = prev_ins; @@ -581,41 +569,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t return SLJIT_SUCCESS; } -static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) -{ - sljit_s32 stack_offset = 0; - - arg_types >>= SLJIT_DEF_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_offset += sizeof(sljit_f32); - break; - case SLJIT_ARG_TYPE_F64: - if (stack_offset & 0x7) - stack_offset += sizeof(sljit_sw); - stack_offset += sizeof(sljit_f64); - break; - default: - stack_offset += sizeof(sljit_sw); - break; - } - - arg_types >>= SLJIT_DEF_SHIFT; - } - - /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ - if (stack_offset > 16) - return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP)); - - return SLJIT_SUCCESS; -} - SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { struct sljit_jump *jump; + sljit_u32 extra_space = (sljit_u32)type; sljit_ins ins; CHECK_ERROR_PTR(); @@ -624,21 +582,34 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); - jump->flags |= IS_JAL | IS_CALL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) { + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); - PTR_FAIL_IF(post_call_with_args(compiler, arg_types)); + if (extra_space == 0) + return jump; + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP))); return jump; } @@ -646,6 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + sljit_u32 extra_space = (sljit_u32)type; sljit_ins ins; CHECK_ERROR(); @@ -662,10 +634,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); } - FAIL_IF(call_with_args(compiler, arg_types, &ins)); + FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); /* Register input. */ - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); - return post_call_with_args(compiler, arg_types); + + if (extra_space == 0) + return SLJIT_SUCCESS; + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, + SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw)))); + + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space), + (type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c index 1f22e49ed98..c2b3d839c23 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -46,9 +46,9 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a } /* Zero extended number. */ - uimm = imm; + uimm = (sljit_uw)imm; if (imm < 0) { - uimm = ~imm; + uimm = ~(sljit_uw)imm; inv = 1; } @@ -119,7 +119,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a } #define SELECT_OP(a, b) \ - (!(op & SLJIT_I32_OP) ? a : b) + (!(op & SLJIT_32) ? a : b) #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ @@ -138,19 +138,19 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a #define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ if (flags & SRC2_IMM) { \ if (src2 >= 32) { \ - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \ + SLJIT_ASSERT(!(op & SLJIT_32)); \ ins = op_dimm32; \ src2 -= 32; \ } \ else \ - ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ + ins = (op & SLJIT_32) ? op_imm : op_dimm; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ - ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ + ins = (op & SLJIT_32) ? op_v : op_dv; \ if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ if (!(flags & UNUSED_DEST)) \ @@ -165,50 +165,71 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); return SLJIT_SUCCESS; case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S8) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_S16) { - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else { - SLJIT_ASSERT(dst == src2); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); } + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U32: - SLJIT_ASSERT(!(op & SLJIT_I32_OP)); - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); - return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -234,7 +255,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); /* Check zero. */ FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); /* Loop for searching the highest bit. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); @@ -462,7 +483,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); return push_inst(compiler, MFLO | D(dst), DR(dst)); @@ -528,10 +549,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0); - inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); @@ -539,7 +560,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) @@ -548,19 +569,19 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_s32 word_arg_count = 0; sljit_s32 float_arg_count = 0; sljit_s32 types = 0; - sljit_ins prev_ins = NOP; + sljit_ins prev_ins = *ins_ptr; sljit_ins ins = NOP; SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12); - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; float_arg_count++; break; @@ -570,19 +591,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - if (arg_count != float_arg_count) - ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); - else if (arg_count == 1) - ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); - arg_count--; - float_arg_count--; - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: if (arg_count != float_arg_count) ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); @@ -591,6 +604,14 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t arg_count--; float_arg_count--; break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; default: if (arg_count != word_arg_count) ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); @@ -608,7 +629,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t ins = NOP; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } *ins_ptr = prev_ins; @@ -620,7 +641,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile sljit_s32 arg_types) { struct sljit_jump *jump; - sljit_ins ins; + sljit_ins ins = NOP; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -628,7 +649,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; + + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); @@ -636,8 +659,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); - jump->flags |= IS_JAL | IS_CALL; - PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN)) { + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + } else + PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); @@ -648,7 +675,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { - sljit_ins ins; + sljit_ins ins = NOP; CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); @@ -664,9 +691,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); } + if (type & SLJIT_CALL_RETURN) + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + FAIL_IF(call_with_args(compiler, arg_types, &ins)); /* Register input. */ - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (!(type & SLJIT_CALL_RETURN)) + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); return push_inst(compiler, ins, UNMOVABLE_INS); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c index fd747695a72..be5cb22a232 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -86,13 +86,13 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { - 0, 0, 14, 2, 4, 6, 8, 12, 10, 16 + 0, 0, 14, 2, 4, 6, 8, 18, 30, 28, 26, 24, 22, 20, 12, 10, 16 }; #else static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { - 0, 0, 13, 14, 15, 16, 17, 12, 18, 10 + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 2, 3, 4, 5, 6, 7, 8, 9, 31, 30, 29, 28, 27, 26, 25, 24, 12, 11, 10 }; #endif @@ -101,23 +101,23 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define S(s) (reg_map[s] << 21) -#define T(t) (reg_map[t] << 16) -#define D(d) (reg_map[d] << 11) -#define FT(t) (freg_map[t] << 16) -#define FS(s) (freg_map[s] << 11) -#define FD(d) (freg_map[d] << 6) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define T(t) ((sljit_ins)reg_map[t] << 16) +#define D(d) ((sljit_ins)reg_map[d] << 11) +#define FT(t) ((sljit_ins)freg_map[t] << 16) +#define FS(s) ((sljit_ins)freg_map[s] << 11) +#define FD(d) ((sljit_ins)freg_map[d] << 6) /* Absolute registers. */ -#define SA(s) ((s) << 21) -#define TA(t) ((t) << 16) -#define DA(d) ((d) << 11) -#define IMM(imm) ((imm) & 0xffff) -#define SH_IMM(imm) ((imm) << 6) +#define SA(s) ((sljit_ins)(s) << 21) +#define TA(t) ((sljit_ins)(t) << 16) +#define DA(d) ((sljit_ins)(d) << 11) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define SH_IMM(imm) ((sljit_ins)(imm) << 6) #define DR(dr) (reg_map[dr]) #define FR(dr) (freg_map[dr]) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) (opcode) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) /* CMP.cond.fmt */ /* S = (20 << 21) D = (21 << 21) */ @@ -186,6 +186,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define DMULTU (HI(0) | LO(29)) #endif /* SLJIT_MIPS_REV >= 6 */ #define DIV_S (HI(17) | FMT_S | LO(3)) +#define DINSU (HI(31) | LO(6)) #define DSLL (HI(0) | LO(56)) #define DSLL32 (HI(0) | LO(60)) #define DSLLV (HI(0) | LO(20)) @@ -205,8 +206,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define JR (HI(0) | LO(8)) #endif /* SLJIT_MIPS_REV >= 6 */ #define LD (HI(55)) +#define LDC1 (HI(53)) #define LUI (HI(15)) #define LW (HI(35)) +#define LWC1 (HI(49)) #define MFC1 (HI(17)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) #define MOD (HI(0) | (3 << 6) | LO(26)) @@ -292,7 +295,8 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS - || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); + || (sljit_ins)delay_slot == ((ins >> 11) & 0x1f) + || (sljit_ins)delay_slot == ((ins >> 16) & 0x1f)); FAIL_IF(!ptr); *ptr = ins; compiler->size++; @@ -300,7 +304,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) +static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) { if (flags & IS_BIT26_COND) return (1 << 26); @@ -371,7 +375,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i inst[1] = NOP; return inst + 1; } - inst[0] = inst[0] ^ invert_branch(jump->flags); + inst[0] ^= invert_branch(jump->flags); inst[1] = NOP; jump->addr -= sizeof(sljit_ins); return inst + 1; @@ -379,7 +383,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } if (jump->flags & IS_COND) { - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; saved_inst = inst[0]; inst[0] = inst[-1]; @@ -388,7 +392,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i inst[2] = NOP; return inst + 2; } - else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + else if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = (inst[0] & 0xffff0000) | 3; inst[1] = NOP; @@ -400,7 +404,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } else { /* J instuctions. */ - if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~(sljit_uw)0xfffffff) == (jump->addr & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = inst[-1]; inst[-1] = (jump->flags & IS_JAL) ? JAL : J; @@ -408,7 +412,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i return inst; } - if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = (jump->flags & IS_JAL) ? JAL : J; inst[1] = NOP; @@ -472,7 +476,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x80000000l); @@ -548,7 +552,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -584,7 +588,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)code_ptr; - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -601,39 +605,46 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; + addr = (sljit_uw)((sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) - sizeof(sljit_ins)) >> 2); SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((sljit_ins)addr & 0xffff); break; } if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); - buf_ptr[0] |= (addr >> 2) & 0x03ffffff; + SLJIT_ASSERT((addr & ~(sljit_uw)0xfffffff) + == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)); + buf_ptr[0] |= (sljit_ins)(addr >> 2) & 0x03ffffff; break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; + break; } - else if (jump->flags & PATCH_ABS48) { + + if (jump->flags & PATCH_ABS48) { SLJIT_ASSERT(addr <= 0x7fffffffffffl); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); - } - else { - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[3] |= (sljit_ins)addr & 0xffff; + break; } + + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[5]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[5] |= (sljit_ins)addr & 0xffff; #endif } while (0); jump = jump->next; @@ -656,7 +667,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -673,7 +684,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { +#if defined(__GNUC__) && !defined(SLJIT_IS_FPU_AVAILABLE) sljit_sw fir = 0; +#endif /* __GNUC__ && !SLJIT_IS_FPU_AVAILABLE */ switch (feature_type) { case SLJIT_HAS_FPU: @@ -696,7 +709,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #endif /* SLJIT_MIPS_REV >= 1 */ default: - return fir; + return 0; } } @@ -723,15 +736,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define CUMULATIVE_OP 0x00080 #define LOGICAL_OP 0x00100 #define IMM_OP 0x00200 -#define SRC2_IMM 0x00400 +#define MOVE_OP 0x00400 +#define SRC2_IMM 0x00800 -#define UNUSED_DEST 0x00800 -#define REG_DEST 0x01000 -#define REG1_SOURCE 0x02000 -#define REG2_SOURCE 0x04000 -#define SLOW_SRC1 0x08000 -#define SLOW_SRC2 0x10000 -#define SLOW_DEST 0x20000 +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define STACK_STORE SW @@ -741,7 +755,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define STACK_LOAD LD #endif -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #include "sljitNativeMIPS_32.c" @@ -754,56 +769,195 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_ins base; - sljit_s32 args, i, tmp, offs; + sljit_s32 i, tmp, offset; + sljit_s32 arg_count, word_arg_count, saved_arg_count, float_arg_count; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - local_size = (local_size + 15) & ~0xf; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; #else - local_size = (local_size + 31) & ~0x1f; + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; #endif compiler->local_size = local_size; - if (local_size <= SIMM_MAX) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + tmp = arg_types >> SLJIT_ARG_SHIFT; + arg_count = 0; + offset = 0; + + while (tmp) { + offset = arg_count; + if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { + if ((arg_count & 0x1) != 0) + arg_count++; + arg_count++; + } + + arg_count++; + tmp >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = (sljit_uw)arg_count << 2; + offset = (offset >= 4) ? (offset << 2) : 0; +#else /* !SLJIT_CONFIG_MIPS_32 */ + offset = 0; +#endif /* SLJIT_CONFIG_MIPS_32 */ + + if (local_size + offset <= -SIMM_MIN) { /* Frequent case. */ FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); base = S(SLJIT_SP); - offs = local_size - (sljit_sw)sizeof(sljit_sw); - } - else { + offset = local_size - SSIZE_OF(sw); + } else { FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size)); FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); + offset = -SSIZE_OF(sw); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = 0; - offs = -(sljit_sw)sizeof(sljit_sw); +#endif } - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offset), MOVABLE_INS)); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offset), MOVABLE_INS)); } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offset), MOVABLE_INS)); } - args = get_arg_count(arg_types); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif - if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); + } + + arg_types >>= SLJIT_ARG_SHIFT; + arg_count = 0; + word_arg_count = 0; + saved_arg_count = 0; + float_arg_count = 0; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* The first maximum two floating point arguments are passed in floating point + registers if no integer argument precedes them. The first 16 byte data is + passed in four integer registers, the rest is placed onto the stack. + The floating point registers are also part of the first 16 byte data, so + their corresponding integer registers are not used when they are present. */ + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if ((arg_count & 0x1) != 0) + arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) { + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MTC1 | TA(5 + arg_count) | FS(float_arg_count) | (1 << 11), MOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, LDC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + + if (word_arg_count == 0 && float_arg_count <= 2) { + if (float_arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + } else if (arg_count < 4) + FAIL_IF(push_inst(compiler, MTC1 | TA(4 + arg_count) | FS(float_arg_count), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count + 1 || arg_count == 0) + tmp = word_arg_count; + else + break; + + if (arg_count < 4) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp))); + else + FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp))); + break; + } + arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; + } + + SLJIT_ASSERT(compiler->args_size == (sljit_uw)arg_count << 2); +#else /* !SLJIT_CONFIG_MIPS_32 */ + while (arg_types) { + arg_count++; + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_D | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + if (arg_count != float_arg_count) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(arg_count) | FD(float_arg_count), MOVABLE_INS)); + else if (arg_count == 1) + FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); + break; + default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count || word_arg_count <= 1) + tmp = word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp))); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } +#endif /* SLJIT_CONFIG_MIPS_32 */ return SLJIT_SUCCESS; } @@ -816,57 +970,110 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - compiler->local_size = (local_size + 15) & ~0xf; + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; #else - compiler->local_size = (local_size + 31) & ~0x1f; + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 31) & ~0x1f; #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr) { - sljit_s32 local_size, i, tmp, offs; - sljit_ins base; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + sljit_s32 local_size, i, tmp, offset; + sljit_s32 scratches = compiler->scratches; + sljit_s32 saveds = compiler->saveds; + sljit_s32 fsaveds = compiler->fsaveds; + sljit_s32 fscratches = compiler->fscratches; local_size = compiler->local_size; - if (local_size <= SIMM_MAX) - base = S(SLJIT_SP); - else { - FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); - base = S(TMP_REG1); - local_size = 0; + + tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((tmp & SSIZE_OF(sw)) != 0) + tmp += SSIZE_OF(sw); + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + tmp += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + + if (local_size <= SIMM_MAX) { + if (local_size < frame_size) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size), DR(SLJIT_SP))); + local_size = frame_size; + } + } else { + if (tmp < frame_size) + tmp = frame_size; + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + local_size = tmp; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_s32)sizeof(sljit_sw)), RETURN_ADDR_REG)); - offs = local_size - (sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + SLJIT_ASSERT(local_size >= frame_size); - tmp = compiler->scratches; - for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_s32)(sizeof(sljit_sw)); + offset = local_size - SSIZE_OF(sw); + if (frame_size == 0) + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | TA(RETURN_ADDR_REG) | IMM(offset), RETURN_ADDR_REG)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); - offs += (sljit_s32)(sizeof(sljit_sw)); + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); } - SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LDC1 | S(SLJIT_SP) | FT(i) | IMM(offset), MOVABLE_INS)); + } + + if (local_size > frame_size) + *ins_ptr = ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(local_size - frame_size); + else + *ins_ptr = NOP; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + emit_stack_frame_release(compiler, 0, &ins); FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); - if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); - else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); + return push_inst(compiler, ins, UNMOVABLE_INS); } #undef STACK_STORE @@ -1041,7 +1248,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) { sljit_s32 tmp_ar, base, delay_slot; @@ -1104,14 +1311,14 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (dst == TMP_REG2) { SLJIT_ASSERT(HAS_FLAGS(op)); flags |= UNUSED_DEST; } else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if (flags & MOVE_OP) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) @@ -1165,8 +1372,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) - dst_r = src2_r; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; } else if (src2 & SLJIT_IMM) { if (!(flags & SRC2_IMM)) { @@ -1176,8 +1383,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) - dst_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } } } } @@ -1221,7 +1432,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - sljit_s32 int_op = op & SLJIT_I32_OP; + sljit_s32 int_op = op & SLJIT_32; #endif CHECK_ERROR(); @@ -1326,11 +1537,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#else sljit_s32 flags = 0; -#endif CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -1338,58 +1545,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src, srcw); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) - flags |= INT_DATA | SIGNED_DATA; + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; #endif switch (GET_OPCODE(op)) { case SLJIT_MOV: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_U32: #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); -#else - return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); case SLJIT_MOV_S32: -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); -#else - return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); #endif case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); } SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags -#endif } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, @@ -1397,23 +1596,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 -#else sljit_s32 flags = 0; -#endif CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) src1w = (sljit_s32)src1w; @@ -1425,12 +1617,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: @@ -1450,7 +1642,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile src2w &= 0x1f; #else if (src2 & SLJIT_IMM) { - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) src2w &= 0x1f; else src2w &= 0x3f; @@ -1461,10 +1653,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; +} -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef flags +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; #endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, @@ -1512,7 +1714,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1524,17 +1726,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) -#define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8)) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) << (21 - 8)) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 +# define flags (sljit_u32)0 #else - sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; #endif if (src & SLJIT_MEM) { @@ -1560,9 +1762,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define flags 0 +# define flags (sljit_u32)0 #else - sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; #endif sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1582,7 +1784,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); @@ -1640,11 +1842,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1669,8 +1871,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_F32_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); - op ^= SLJIT_F32_OP; + FAIL_IF(push_inst(compiler, CVT_S_S | (sljit_ins)((op & SLJIT_32) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_32; break; } @@ -1841,7 +2043,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile { struct sljit_jump *jump; sljit_ins inst; - sljit_s32 flags = 0; + sljit_u32 flags = 0; sljit_s32 delay_check = UNMOVABLE_INS; CHECK_ERROR_PTR(); @@ -1864,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: + case SLJIT_CARRY: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -1871,6 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: BR_NZ(OTHER_FLAG); break; case SLJIT_NOT_EQUAL_F64: @@ -1947,7 +2151,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler compiler->cache_arg = 0; compiler->cache_argw = 0; - flags = ((type & SLJIT_I32_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; if (src1 & SLJIT_MEM) { PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); src1 = TMP_REG1; @@ -2074,7 +2278,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; if (compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -2103,7 +2307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) sljit_s32 mem_type = WORD_DATA; #else - sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; #endif CHECK_ERROR(); @@ -2111,10 +2315,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op == SLJIT_MOV_S32) - mem_type = INT_DATA | SIGNED_DATA; -#endif dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); compiler->cache_arg = 0; @@ -2131,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_OVERFLOW: case SLJIT_NOT_OVERFLOW: - if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { src_ar = OTHER_FLAG; break; } @@ -2142,6 +2342,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_GREATER_F64: case SLJIT_LESS_EQUAL_F64: type ^= 0x1; /* Flip type bit for the XORI below. */ + /* fallthrough */ case SLJIT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64: case SLJIT_LESS_F64: @@ -2203,7 +2404,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (dst_reg & SLJIT_I32_OP) + if (dst_reg & SLJIT_32) srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); @@ -2211,7 +2412,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil srcw = 0; } - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; switch (type & 0xff) { case SLJIT_EQUAL: @@ -2298,7 +2499,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); #else - PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); compiler->size += 5; #endif diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c index 6ddb5508ec0..95fe6bbe0ec 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_32.c @@ -86,11 +86,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, CNTLZW | S(src2) | A(dst)); @@ -158,7 +153,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); } if (flags & ALT_FORM4) { @@ -167,11 +164,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); @@ -277,5 +280,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c index cbdf2dd8a2b..d104f6d75ff 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_64.c @@ -57,20 +57,20 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, } /* Count leading zeroes. */ - tmp = (imm >= 0) ? imm : ~imm; + tmp = (sljit_uw)((imm >= 0) ? imm : ~imm); ASM_SLJIT_CLZ(tmp, shift); SLJIT_ASSERT(shift > 0); shift--; - tmp = (imm << shift); + tmp = ((sljit_uw)imm << shift); if ((tmp & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift += 15; return PUSH_RLDICR(reg, shift); } if ((tmp & ~0xffffffff00000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); shift += 31; return PUSH_RLDICR(reg, shift); @@ -78,18 +78,18 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, /* Cut out the 16 bit from immediate. */ shift += 15; - tmp2 = imm & ((1ul << (63 - shift)) - 1); + tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1); if (tmp2 <= 0xffff) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2); } if (tmp2 <= 0xffffffff) { FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; } @@ -97,16 +97,16 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, tmp2 <<= shift2; if ((tmp2 & ~0xffff000000000000ul) == 0) { - FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48))); shift2 += 15; shift += (63 - shift2); FAIL_IF(PUSH_RLDICR(reg, shift)); - FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48))); return PUSH_RLDICR(reg, shift2); } /* The general version. */ - FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48))); FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); FAIL_IF(PUSH_RLDICR(reg, 31)); FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); @@ -199,19 +199,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl UN_EXTS(); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - - if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); - return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); - } - - UN_EXTS(); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); if (flags & ALT_FORM1) @@ -299,13 +286,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); - src1 = TMP_REG1; - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - src2 = TMP_REG2; + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } } + /* Setting XER SO is not enough, CR SO is also needed. */ - FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + if (flags & ALT_SIGN_EXT) return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); return SLJIT_SUCCESS; @@ -317,12 +313,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + BIN_EXTS(); if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: BIN_EXTS(); @@ -432,14 +434,14 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t if (src) reg = *src & REG_MASK; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; break; default: @@ -453,13 +455,13 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count--; break; default: @@ -471,7 +473,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -492,10 +494,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UNUSED_ARG(executable_offset); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); - inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff); + inst[0] = (inst[0] & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000u) | ((sljit_ins)new_target & 0xffff); SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); @@ -503,5 +505,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c index 2174dbb07b2..8bfdc69522a 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativePPC_common.c @@ -109,32 +109,32 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 1, 2, 3, 4, 5, 6, 0, 7 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 0, 13 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_map[d] << 21) -#define S(s) (reg_map[s] << 21) -#define A(a) (reg_map[a] << 16) -#define B(b) (reg_map[b] << 11) -#define C(c) (reg_map[c] << 6) -#define FD(fd) (freg_map[fd] << 21) -#define FS(fs) (freg_map[fs] << 21) -#define FA(fa) (freg_map[fa] << 16) -#define FB(fb) (freg_map[fb] << 11) -#define FC(fc) (freg_map[fc] << 6) -#define IMM(imm) ((imm) & 0xffff) -#define CRD(d) ((d) << 21) +#define D(d) ((sljit_ins)reg_map[d] << 21) +#define S(s) ((sljit_ins)reg_map[s] << 21) +#define A(a) ((sljit_ins)reg_map[a] << 16) +#define B(b) ((sljit_ins)reg_map[b] << 11) +#define C(c) ((sljit_ins)reg_map[c] << 6) +#define FD(fd) ((sljit_ins)freg_map[fd] << 21) +#define FS(fs) ((sljit_ins)freg_map[fs] << 21) +#define FA(fa) ((sljit_ins)freg_map[fa] << 16) +#define FB(fb) ((sljit_ins)freg_map[fb] << 11) +#define FC(fc) ((sljit_ins)freg_map[fc] << 6) +#define IMM(imm) ((sljit_ins)(imm) & 0xffff) +#define CRD(d) ((sljit_ins)(d) << 21) /* Instruction bit sections. OE and Rc flag (see ALT_SET_FLAGS). */ #define OE(flags) ((flags) & ALT_SET_FLAGS) /* Rc flag (see ALT_SET_FLAGS). */ #define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) -#define HI(opcode) ((opcode) << 26) -#define LO(opcode) ((opcode) << 1) +#define HI(opcode) ((sljit_ins)(opcode) << 26) +#define LO(opcode) ((sljit_ins)(opcode) << 1) #define ADD (HI(31) | LO(266)) #define ADDC (HI(31) | LO(10)) @@ -182,6 +182,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FSUB (HI(63) | LO(20)) #define FSUBS (HI(59) | LO(20)) #define LD (HI(58) | 0) +#define LFD (HI(50)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) #define MFLR (HI(31) | LO(339) | 0x80000) @@ -215,6 +216,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) #define STFIWX (HI(31) | LO(983)) #define STW (HI(36)) #define STWU (HI(37)) @@ -232,15 +234,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define UIMM_MAX (0xffff) #define RLDI(dst, src, sh, mb, type) \ - (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + (HI(30) | S(src) | A(dst) | ((sljit_ins)(type) << 2) | (((sljit_ins)(sh) & 0x1f) << 11) \ + | (((sljit_ins)(sh) & 0x20) >> 4) | (((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20)) #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func) { - sljit_sw* ptrs; + sljit_uw* ptrs; + if (func_ptr) *func_ptr = (void*)context; - ptrs = (sljit_sw*)func; + + ptrs = (sljit_uw*)func; context->addr = addr ? addr : ptrs[0]; context->r2 = ptrs[1]; context->r11 = ptrs[2]; @@ -260,7 +265,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in { sljit_sw diff; sljit_uw target_addr; - sljit_sw extra_jump_flags; + sljit_uw extra_jump_flags; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) @@ -296,7 +301,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in } extra_jump_flags = REMOVE_COND; - diff -= sizeof(sljit_ins); + diff -= SSIZE_OF(ins); } if (diff <= 0x01ffffff && diff >= -0x02000000) { @@ -349,7 +354,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) { sljit_uw addr = put_label->label->addr; sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_s32 reg = *inst; + sljit_u32 reg = *inst; if (put_label->flags == 0) { SLJIT_ASSERT(addr < 0x100000000l); @@ -433,7 +438,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -501,7 +506,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -511,7 +516,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!put_label); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) - SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)(compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)))); #else SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); #endif @@ -527,22 +532,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } else { SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + *buf_ptr = BCx | ((sljit_ins)addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); } } else { if (!(jump->flags & PATCH_ABS_B)) { addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } else { SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + *buf_ptr = Bx | ((sljit_ins)addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); } } break; @@ -550,26 +555,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; #else if (jump->flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= 0x7fffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[1] |= (sljit_ins)addr & 0xffff; break; } + if (jump->flags & PATCH_ABS48) { SLJIT_ASSERT(addr <= 0x7fffffffffff); - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[3] |= (sljit_ins)addr & 0xffff; break; } - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); + + SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[4]) & 0xffff) == 0); + buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; + buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; + buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; + buf_ptr[4] |= (sljit_ins)addr & 0xffff; #endif } while (0); jump = jump->next; @@ -592,7 +603,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -601,7 +612,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (((sljit_sw)code_ptr) & 0x4) code_ptr++; #endif - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_uw)code, (void*)sljit_generate_code); #endif code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -696,69 +707,116 @@ ALT_FORM5 0x010000 */ #define STACK_LOAD LD #endif +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) +#define LR_SAVE_OFFSET 2 * SSIZE_OF(sw) +#else +#define LR_SAVE_OFFSET SSIZE_OF(sw) +#endif + +#define STACK_MAX_DISTANCE (0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET) + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args, i, tmp, offs; + sljit_s32 i, tmp, base, offset; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 arg_count = 0; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - FAIL_IF(push_inst(compiler, MFLR | D(0))); - offs = -(sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + FAIL_IF(push_inst(compiler, MFLR | D(0))); + + base = SLJIT_SP; + offset = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#else + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); +#endif + } else { + base = TMP_REG1; + FAIL_IF(push_inst(compiler, OR | S(SLJIT_SP) | A(TMP_REG1) | B(SLJIT_SP))); + FAIL_IF(load_immediate(compiler, TMP_REG2, -local_size)); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#else + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(TMP_REG2))); +#endif + local_size = 0; + offset = 0; + } + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); + } + + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); } for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - offs -= (sljit_s32)(sizeof(sljit_sw)); - FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); } - SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); - -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); -#else - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); -#endif - + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET))); FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); - args = get_arg_count(arg_types); + arg_types >>= SLJIT_ARG_SHIFT; - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + do { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (arg_count != word_arg_count) + tmp = SLJIT_R0 + word_arg_count; + else + break; - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; - local_size = (local_size + 15) & ~0xf; - compiler->local_size = local_size; - -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); - } + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count))); + } while (0); #else - if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); - } + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } #endif + word_arg_count++; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + arg_count++; +#endif + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -771,54 +829,74 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; - compiler->local_size = (local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_s32 i, tmp, offs; + sljit_s32 i, tmp, base, offset; + sljit_s32 local_size = compiler->local_size; + base = SLJIT_SP; + if (local_size > STACK_MAX_DISTANCE) { + base = TMP_REG1; + if (local_size > 2 * STACK_MAX_DISTANCE + LR_SAVE_OFFSET) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(base) | A(SLJIT_SP) | IMM(0))); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(SLJIT_SP) | IMM(local_size - STACK_MAX_DISTANCE))); + local_size = STACK_MAX_DISTANCE; + } + } + + offset = local_size; + FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET))); + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); + } + + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); + } + + push_inst(compiler, MTLR | S(0)); + + if (local_size > 0) + return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size)); + + SLJIT_ASSERT(base == TMP_REG1); + return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + CHECK(check_sljit_emit_return_void(compiler)); - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); - else { - FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); - } - -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); -#else - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); -#endif - - offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); - - tmp = compiler->scratches; - for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_s32)(sizeof(sljit_sw)); - } - - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { - FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); - offs += (sljit_s32)(sizeof(sljit_sw)); - } - - FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); - SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); - - FAIL_IF(push_inst(compiler, MTLR | S(0))); - FAIL_IF(push_inst(compiler, BLR)); - - return SLJIT_SUCCESS; + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, BLR); } #undef STACK_STORE @@ -843,11 +921,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #define ARCH_32_64(a, b) a #define INST_CODE_AND_DST(inst, flags, reg) \ - ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) + ((sljit_ins)(inst) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #else #define ARCH_32_64(a, b) b #define INST_CODE_AND_DST(inst, flags, reg) \ - (((inst) & ~INT_ALIGNED) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) + (((sljit_ins)(inst) & ~(sljit_ins)INT_ALIGNED) | (sljit_ins)(((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif static const sljit_ins data_transfer_insts[64 + 16] = { @@ -1000,7 +1078,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag if (argw != 0) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | (argw << 11) | ((31 - argw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | ((sljit_ins)argw << 11) | ((31 - (sljit_ins)argw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1))); #endif @@ -1073,8 +1151,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); /* Destination check. */ - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = dst; + /* The REG_DEST is only used by SLJIT_MOV operations, although + * it is set for op2 operations with unset destination. */ flags |= REG_DEST; if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) @@ -1087,8 +1167,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG1_SOURCE; } else if (src1 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); @@ -1104,8 +1187,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = src2_r; } else if (src2 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; + src2_r = TMP_ZERO; + if (src2w != 0) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); @@ -1123,7 +1209,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_s32 int_op = op & SLJIT_I32_OP; + sljit_s32 int_op = op & SLJIT_32; #endif CHECK_ERROR(); @@ -1174,7 +1260,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) { if (!(src & OFFS_REG_MASK)) { - if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED) + if (srcw == 0 && (src & REG_MASK)) return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); @@ -1188,7 +1274,7 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | ((sljit_ins)srcw << 11) | ((31 - (sljit_ins)srcw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1))); #endif @@ -1211,8 +1297,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src, srcw); op = GET_OPCODE(op); - if ((src & SLJIT_IMM) && srcw == 0) - src = TMP_ZERO; if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); @@ -1223,7 +1307,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op_flags & SLJIT_I32_OP) { + if (op_flags & SLJIT_32) { if (op < SLJIT_NOT) { if (src & SLJIT_MEM) { if (op == SLJIT_MOV_S32) @@ -1245,11 +1329,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile switch (op) { case SLJIT_MOV: - case SLJIT_MOV_P: #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif + case SLJIT_MOV_P: return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1257,6 +1342,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32)); case SLJIT_MOV_S32: + case SLJIT_MOV32: return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32)); #endif @@ -1275,12 +1361,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); #else return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); #endif @@ -1306,7 +1389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #endif #define TEST_UH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) + (((src) & SLJIT_IMM) && !((srcw) & ~(sljit_sw)0xffff0000)) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_IMM(src, srcw) \ @@ -1327,13 +1410,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z | SLJIT_SET_CARRY)) + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z | SLJIT_SET_CARRY)) #define TEST_SUB_FORM2(op) \ ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z)) + || (op & (SLJIT_32 | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_32 | SLJIT_SET_Z)) #define TEST_SUB_FORM3(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ - || (op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) + || (op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) #else #define TEST_ADD_FORM1(op) \ (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) @@ -1351,21 +1434,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - if ((src1 & SLJIT_IMM) && src1w == 0) - src1 = TMP_ZERO; - if ((src2 & SLJIT_IMM) && src2w == 0) - src2 = TMP_ZERO; - #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) { + if (op & SLJIT_32) { /* Most operations expect sign extended arguments. */ flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) @@ -1381,45 +1456,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffffffff; + compiler->imm = (sljit_ins)src2w & 0xffffffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_ADD_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffffffff; + compiler->imm = (sljit_ins)src1w & 0xffffffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if ((op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) { + if ((op & (SLJIT_32 | SLJIT_SET_Z)) == (SLJIT_32 | SLJIT_SET_Z)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); @@ -1427,39 +1504,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile #endif if (HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { - if (dst == SLJIT_UNUSED) { + if (dst == TMP_REG2) { if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); } if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (dst == TMP_REG2 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); @@ -1467,7 +1547,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (TEST_SUB_FORM2(op)) { if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); @@ -1477,45 +1557,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; + compiler->imm = (sljit_ins)(-src2w) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } if (!HAS_FLAGS(op)) { if (TEST_SH_IMM(src2, -src2w)) { - compiler->imm = ((-src2w) >> 16) & 0xffff; + compiler->imm = (sljit_ins)((-src2w) >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, -src2w)) { - compiler->imm = -src2w & 0xffffffff; + compiler->imm = (sljit_ins)-src2w; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } - /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ + /* We know ALT_SIGN_EXT is set if it is an SLJIT_32 on 64 bit systems. */ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) flags |= ALT_FORM2; #endif if (!HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; + compiler->imm = (sljit_ins)src2w & 0xffff; return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; + compiler->imm = (sljit_ins)src1w & 0xffff; return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } } @@ -1529,30 +1610,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile /* Commutative unsigned operations. */ if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UL_IMM(src1, src1w)) { - compiler->imm = src1w; + compiler->imm = (sljit_ins)src1w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_UH_IMM(src2, src2w)) { - compiler->imm = (src2w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src2w >> 16) & 0xffff; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UH_IMM(src1, src1w)) { - compiler->imm = (src1w >> 16) & 0xffff; + compiler->imm = (sljit_ins)(src1w >> 16) & 0xffff; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } - if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) { - /* Unlike or and xor, and resets unwanted bits as well. */ + if (GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, the and resets unwanted bits as well. */ if (TEST_UI_IMM(src2, src2w)) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UI_IMM(src1, src1w)) { - compiler->imm = src1w; + compiler->imm = (sljit_ins)src1w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } @@ -1562,11 +1643,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_LSHR: case SLJIT_ASHR: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) flags |= ALT_FORM2; #endif if (src2 & SLJIT_IMM) { - compiler->imm = src2w; + compiler->imm = (sljit_ins)src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); @@ -1575,6 +1656,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + #undef TEST_ADD_FORM1 #undef TEST_SUB_FORM2 #undef TEST_SUB_FORM3 @@ -1621,7 +1716,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1633,8 +1728,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 6)) +#define SELECT_FOP(op, single, double) ((sljit_ins)((op & SLJIT_32) ? single : double)) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) @@ -1688,7 +1783,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp dstw &= 0x3; if (dstw) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | ((sljit_ins)dstw << 11) | ((31 - (sljit_ins)dstw) << 1))); #else FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); #endif @@ -1745,7 +1840,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp if (dst & SLJIT_MEM) return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - if (op & SLJIT_F32_OP) + if (op & SLJIT_32) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; @@ -1755,7 +1850,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 invert_sign = 1; if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ (sljit_sw)0x80000000)); src = TMP_REG1; invert_sign = 0; } @@ -1783,7 +1878,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp if (dst & SLJIT_MEM) return emit_op_mem(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, TMP_REG1); - if (op & SLJIT_F32_OP) + if (op & SLJIT_32) return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); return SLJIT_SUCCESS; @@ -1815,11 +1910,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil CHECK_ERROR(); - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1830,8 +1925,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_CONV_F64_FROM_F32: - op ^= SLJIT_F32_OP; - if (op & SLJIT_F32_OP) { + op ^= SLJIT_32; + if (op & SLJIT_32) { FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); break; } @@ -1946,12 +2041,22 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_bo_bi_flags(sljit_s32 type) +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (4 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_EQUAL: return (12 << 21) | (2 << 16); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (12 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_NOT_EQUAL: return (4 << 21) | (2 << 16); @@ -2015,15 +2120,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - bo_bi_flags = get_bo_bi_flags(type & 0xff); + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); if (!bo_bi_flags) return NULL; jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); type &= 0xff; + if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY) + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + /* In PPC, we don't need to touch the arguments. */ if (type < SLJIT_JUMP) jump->flags |= IS_COND; @@ -2049,6 +2157,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); #endif + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2068,25 +2181,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (FAST_IS_REG(src)) { #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL) { + if (type >= SLJIT_CALL && src != TMP_CALL_REG) { FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); src_r = TMP_CALL_REG; } else src_r = src; -#else +#else /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ src_r = src; -#endif +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ } else if (src & SLJIT_IMM) { /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; + #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) if (type >= SLJIT_CALL) jump->flags |= IS_CALL; -#endif +#endif /* SLJIT_PASS_ENTRY_ADDR_TO_CALL */ + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); src_r = TMP_CALL_REG; } @@ -2108,13 +2223,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (src & SLJIT_MEM) { ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); src = TMP_CALL_REG; } + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src = TMP_CALL_REG; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) FAIL_IF(call_with_args(compiler, arg_types, &src)); #endif @@ -2130,20 +2255,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 reg, input_flags, cr_bit, invert; + sljit_s32 reg, invert; + sljit_u32 bit, from_xer; sljit_s32 saved_op = op; sljit_sw saved_dstw = dstw; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_s32 input_flags = ((op & SLJIT_32) || op == SLJIT_MOV32) ? INT_DATA : WORD_DATA; +#else + sljit_s32 input_flags = WORD_DATA; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; -#else - input_flags = WORD_DATA; -#endif - op = GET_OPCODE(op); reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; @@ -2151,7 +2276,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); invert = 0; - cr_bit = 0; + bit = 0; + from_xer = 0; switch (type & 0xff) { case SLJIT_LESS: @@ -2165,66 +2291,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_GREATER: case SLJIT_SIG_GREATER: - cr_bit = 1; + bit = 1; break; case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: - cr_bit = 1; + bit = 1; invert = 1; break; case SLJIT_EQUAL: - cr_bit = 2; + bit = 2; break; case SLJIT_NOT_EQUAL: - cr_bit = 2; + bit = 2; invert = 1; break; case SLJIT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; break; case SLJIT_NOT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; invert = 1; break; + case SLJIT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0; + break; + + case SLJIT_NOT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; + break; + case SLJIT_LESS_F64: - cr_bit = 4 + 0; + bit = 4 + 0; break; case SLJIT_GREATER_EQUAL_F64: - cr_bit = 4 + 0; + bit = 4 + 0; invert = 1; break; case SLJIT_GREATER_F64: - cr_bit = 4 + 1; + bit = 4 + 1; break; case SLJIT_LESS_EQUAL_F64: - cr_bit = 4 + 1; + bit = 4 + 1; invert = 1; break; case SLJIT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; break; case SLJIT_NOT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; invert = 1; break; case SLJIT_UNORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; break; case SLJIT_ORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; invert = 1; break; @@ -2233,8 +2373,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; } - FAIL_IF(push_inst(compiler, MFCR | D(reg))); - FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1))); + FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg))); + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1))); if (invert) FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); @@ -2283,19 +2423,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif mem_flags = WORD_DATA; break; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) case SLJIT_MOV_U32: + case SLJIT_MOV32: mem_flags = INT_DATA; break; case SLJIT_MOV_S32: mem_flags = INT_DATA; - if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_I32_OP)) { + if (!(type & SLJIT_MEM_STORE) && !(type & SLJIT_32)) { if (mem & OFFS_REG_MASK) mem_flags |= SIGNED_DATA; else @@ -2436,7 +2578,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); #else - PTR_FAIL_IF(push_inst(compiler, dst_r)); + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); compiler->size += 4; #endif diff --git a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c index 716491ec72d..8eef910c425 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -44,6 +44,9 @@ typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) + static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; @@ -97,20 +100,37 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac * link register doesn't need to change */ +/* When reg cannot be unused. */ +#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP) + /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ static const sljit_gpr link_r = 14; /* r14 */ static const sljit_gpr fast_link_r = 0; /* r0 */ -/* Flag register layout: +#define TMP_FREG1 (0) - 0 32 33 34 36 64 - +---------------+---+---+-------+-------+ - | ZERO | 0 | 0 | C C |///////| - +---------------+---+---+-------+-------+ -*/ -static const sljit_gpr flag_r = 14; /* r14 */ +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, +}; + +#define R0A(r) (r) +#define R4A(r) ((r) << 4) +#define R8A(r) ((r) << 8) +#define R12A(r) ((r) << 12) +#define R16A(r) ((r) << 16) +#define R20A(r) ((r) << 20) +#define R28A(r) ((r) << 28) +#define R32A(r) ((r) << 32) +#define R36A(r) ((r) << 36) + +#define R0(r) ((sljit_ins)reg_map[r]) + +#define F0(r) ((sljit_ins)freg_map[r]) +#define F4(r) (R4A((sljit_ins)freg_map[r])) +#define F20(r) (R20A((sljit_ins)freg_map[r])) +#define F36(r) (R36A((sljit_ins)freg_map[r])) struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -124,19 +144,25 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) return reg_map[r]; } +static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r) +{ + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0]))); + return freg_map[r]; +} + /* Size of instruction in bytes. Tags must already be cleared. */ static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins) { /* keep faulting instructions */ if (ins == 0) - return 2; + return 2; if ((ins & 0x00000000ffffL) == ins) - return 2; + return 2; if ((ins & 0x0000ffffffffL) == ins) - return 4; + return 4; if ((ins & 0xffffffffffffL) == ins) - return 6; + return 6; SLJIT_UNREACHABLE(); return (sljit_uw)-1; @@ -172,7 +198,8 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) } #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ - (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) /* Map the given type to a 4-bit condition code mask. */ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { @@ -191,6 +218,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc0 | cc3); return (cc0 | cc2); } + /* fallthrough */ case SLJIT_EQUAL_F64: return cc0; @@ -204,6 +232,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc1 | cc2); return (cc1 | cc3); } + /* fallthrough */ case SLJIT_NOT_EQUAL_F64: return (cc1 | cc2 | cc3); @@ -228,10 +257,20 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_LESS_F64: return cc1; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: return (cc0 | cc1); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + case SLJIT_SIG_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -242,6 +281,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc2 | cc3); + /* fallthrough */ case SLJIT_UNORDERED_F64: return cc3; @@ -249,6 +289,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_NOT_OVERFLOW: if (compiler->status_flags_state & SLJIT_SET_Z) return (cc0 | cc1); + /* fallthrough */ case SLJIT_ORDERED_F64: return (cc0 | cc1 | cc2); @@ -444,7 +485,7 @@ SLJIT_S390X_RR(or, 0x1600) #define SLJIT_S390X_RRE(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ { \ - return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ + return (pattern) | R4A(dst) | R0A(src); \ } /* AND */ @@ -504,7 +545,7 @@ SLJIT_S390X_RRE(sgr, 0xb9090000) #define SLJIT_S390X_RIA(name, pattern, imm_type) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ - return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \ + return (pattern) | R20A(reg) | (imm & 0xffff); \ } /* ADD HALFWORD IMMEDIATE */ @@ -534,7 +575,7 @@ SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ { \ SLJIT_ASSERT(have_eimm()); \ - return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \ + return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \ } /* ADD IMMEDIATE */ @@ -567,17 +608,11 @@ SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) /* RX-a form instructions */ #define SLJIT_S390X_RXA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT((d & 0xfff) == d); \ - ri = (sljit_ins)(r & 0xf) << 20; \ - xi = (sljit_ins)(x & 0xf) << 16; \ - bi = (sljit_ins)(b & 0xf) << 12; \ - di = (sljit_ins)(d & 0xfff); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \ } /* LOAD */ @@ -607,15 +642,9 @@ SLJIT_S390X_RXA(sth, 0x40000000) #define SLJIT_S390X_RXYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \ { \ - sljit_ins ri, xi, bi, di; \ -\ SLJIT_ASSERT(cond); \ - ri = (sljit_ins)(r & 0xf) << 36; \ - xi = (sljit_ins)(x & 0xf) << 32; \ - bi = (sljit_ins)(b & 0xf) << 28; \ - di = disp_s20(d); \ \ - return (pattern) | ri | xi | bi | di; \ + return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \ } /* LOAD */ @@ -660,17 +689,11 @@ SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) /* RSY-a instructions */ #define SLJIT_S390X_RSYA(name, pattern, cond) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ +SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \ { \ - sljit_ins r1, r3, b2, d2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r3 = (sljit_ins)(src & 0xf) << 32; \ - b2 = (sljit_ins)(b & 0xf) << 28; \ - d2 = disp_s20(d); \ \ - return (pattern) | r1 | r3 | b2 | d2; \ + return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \ } /* LOAD MULTIPLE */ @@ -691,16 +714,14 @@ SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) #define SLJIT_S390X_RIEF(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \ { \ - sljit_ins r1, r2, i3, i4, i5; \ + sljit_ins i3, i4, i5; \ \ SLJIT_ASSERT(have_genext()); \ - r1 = (sljit_ins)(dst & 0xf) << 36; \ - r2 = (sljit_ins)(src & 0xf) << 32; \ i3 = (sljit_ins)start << 24; \ i4 = (sljit_ins)end << 16; \ i5 = (sljit_ins)rot << 8; \ \ - return (pattern) | r1 | r2 | i3 | i4 | i5; \ + return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \ } /* ROTATE THEN AND SELECTED BITS */ @@ -728,14 +749,12 @@ SLJIT_S390X_RIEF(risbhg, 0xec000000005d) #define SLJIT_S390X_RRFC(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ { \ - sljit_ins r1, r2, m3; \ + sljit_ins m3; \ \ SLJIT_ASSERT(have_lscond1()); \ - r1 = (sljit_ins)(dst & 0xf) << 4; \ - r2 = (sljit_ins)(src & 0xf); \ m3 = (sljit_ins)(mask & 0xf) << 12; \ \ - return (pattern) | m3 | r1 | r2; \ + return (pattern) | m3 | R4A(dst) | R0A(src); \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -748,14 +767,13 @@ SLJIT_S390X_RRFC(locgr, 0xb9e20000) #define SLJIT_S390X_RIEG(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \ { \ - sljit_ins r1, m3, i2; \ + sljit_ins m3, i2; \ \ SLJIT_ASSERT(have_lscond2()); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ m3 = (sljit_ins)(mask & 0xf) << 32; \ i2 = (sljit_ins)(imm & 0xffffL) << 16; \ \ - return (pattern) | r1 | m3 | i2; \ + return (pattern) | R36A(reg) | m3 | i2; \ } /* LOAD HALFWORD IMMEDIATE ON CONDITION */ @@ -767,13 +785,9 @@ SLJIT_S390X_RIEG(locghi, 0xec0000000046) #define SLJIT_S390X_RILB(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \ { \ - sljit_ins r1, ri2; \ -\ SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(reg & 0xf) << 36; \ - ri2 = (sljit_ins)(ri & 0xffffffff); \ \ - return (pattern) | r1 | ri2; \ + return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \ } /* BRANCH RELATIVE AND SAVE LONG */ @@ -808,22 +822,20 @@ SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src) { - sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8; - sljit_ins r2 = ((sljit_ins)src & 0xf); SLJIT_ASSERT(have_eimm()); - return 0xb9830000 | r1 | r2; + return 0xb9830000 | R8A(dst) | R0A(src); } /* INSERT PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) { - return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); + return 0xb2220000 | R4A(dst); } /* SET PROGRAM MASK */ SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) { - return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); + return 0x0400 | R4A(dst); } /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ @@ -842,12 +854,12 @@ static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 1 (non-zero and no overflow) : unchanged 2 (zero and overflow) : decreased by 1 3 (non-zero and overflow) : decreased by 1 if non-zero */ - FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); - FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); - FAIL_IF(push_inst(compiler, spm(flag_r))); + FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(tmp1))); return SLJIT_SUCCESS; } @@ -858,16 +870,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s16(v)) return push_inst(compiler, lghi(target, (sljit_s16)v)); - if ((sljit_uw)v == (v & 0x000000000000ffffU)) + if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0) return push_inst(compiler, llill(target, (sljit_u16)v)); - if ((sljit_uw)v == (v & 0x00000000ffff0000U)) + if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0) return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16))); - if ((sljit_uw)v == (v & 0x0000ffff00000000U)) + if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0) return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32))); - if ((sljit_uw)v == (v & 0xffff000000000000U)) + if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0) return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48))); /* 6 byte instructions (requires extended immediate facility) */ @@ -875,15 +887,16 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t if (is_s32(v)) return push_inst(compiler, lgfi(target, (sljit_s32)v)); - if ((sljit_uw)v == (v & 0x00000000ffffffffU)) + if (((sljit_uw)v >> 32) == 0) return push_inst(compiler, llilf(target, (sljit_u32)v)); - if ((sljit_uw)v == (v & 0xffffffff00000000U)) - return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32))); + if (((sljit_uw)v << 32) == 0) + return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32))); FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v))); return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32))); } + /* TODO(mundaym): instruction sequences that don't use extended immediates */ abort(); } @@ -891,7 +904,7 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t struct addr { sljit_gpr base; sljit_gpr index; - sljit_sw offset; + sljit_s32 offset; }; /* transform memory operand into D(X,B) form with a signed 20-bit offset */ @@ -911,7 +924,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -923,7 +936,7 @@ static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -944,7 +957,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, if (off != 0) { /* shift and put the result into tmp */ SLJIT_ASSERT(0 <= off && off < 64); - FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0))); + FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0))); index = tmp; off = 0; /* clear offset */ } @@ -956,7 +969,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, } addr->base = base; addr->index = index; - addr->offset = off; + addr->offset = (sljit_s32)off; return SLJIT_SUCCESS; } @@ -1014,16 +1027,16 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler, sljit_gpr dst_r, sljit_s32 src, sljit_sw srcw) { - SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK)); if (src & SLJIT_IMM) return push_load_imm_inst(compiler, dst_r, srcw); if (src & SLJIT_MEM) - return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0); sljit_gpr src_r = gpr(src & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); } static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, @@ -1035,8 +1048,8 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr src_r = tmp1; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1050,17 +1063,32 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (FAST_IS_REG(src2)) - src_r = gpr(src2 & REG_MASK); + src_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r))); if (needs_move != 2) return SLJIT_SUCCESS; dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); } static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, @@ -1068,21 +1096,21 @@ static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src1_r = tmp0; sljit_gpr src2_r = tmp1; if (FAST_IS_REG(src1)) - src1_r = gpr(src1 & REG_MASK); + src1_r = gpr(src1); else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (FAST_IS_REG(src2)) - src2_r = gpr(src2 & REG_MASK); + src2_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); - return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); + return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r)); } typedef enum { @@ -1099,8 +1127,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1110,8 +1138,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == RIL_A) - return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); - return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); + return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff)); } static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, @@ -1119,15 +1147,15 @@ static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_sw src2w) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; sljit_gpr src_r = tmp0; - if (!SLOW_IS_REG(src1)) + if (!FAST_IS_REG(src1)) FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); else src_r = gpr(src1 & REG_MASK); - return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); + return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16); } typedef enum { @@ -1147,7 +1175,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, SLJIT_ASSERT(src2 & SLJIT_MEM); - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = gpr(dst); if (dst == src1) @@ -1183,9 +1211,9 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, } if (type == RX_A) - ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w; else - ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w); FAIL_IF(push_inst(compiler, ins)); @@ -1193,7 +1221,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, return SLJIT_SUCCESS; dst_r = gpr(dst); - return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); + return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); } static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, @@ -1226,7 +1254,7 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, else dst_r = gpr(dst & REG_MASK); - return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); + return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw)); } struct ins_forms { @@ -1240,7 +1268,7 @@ struct ins_forms { }; static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1250,7 +1278,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct if ((src1 | src2) & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1297,7 +1325,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } } - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins = forms->op_r; ins_k = forms->op_rk; } @@ -1308,7 +1336,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct SLJIT_ASSERT(ins != 0 || ins_k != 0); - if (ins && SLOW_IS_REG(dst)) { + if (ins && FAST_IS_REG(dst)) { if (dst == src1) return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); @@ -1323,7 +1351,7 @@ static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct } static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -1333,7 +1361,7 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str if (src2 & SLJIT_MEM) { sljit_ins ins12, ins20; - if (mode & SLJIT_I32_OP) { + if (mode & SLJIT_32) { ins12 = forms->op; ins20 = forms->op_y; } @@ -1354,10 +1382,10 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); } - ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk; - if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) - return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + if (ins == 0 || (FAST_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); } @@ -1376,9 +1404,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_memory_fragment *buf; void *code, *code_ptr; sljit_uw *pool, *pool_ptr; - - sljit_uw source; - sljit_sw offset; /* TODO(carenas): only need 32 bit */ + sljit_sw source, offset; /* TODO(carenas): only need 32 bit */ CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_generate_code(compiler)); @@ -1489,38 +1515,41 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil ins &= ~sljit_ins_const; /* update instruction with relative address of constant */ - source = (sljit_uw)code_ptr; - offset = (sljit_uw)pool_ptr - source; + source = (sljit_sw)code_ptr; + offset = (sljit_sw)pool_ptr - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; /* halfword (not byte) offset */ SLJIT_ASSERT(is_s32(offset)); + ins |= (sljit_ins)offset & 0xffffffff; /* update address */ const_->const_.addr = (sljit_uw)pool_ptr; /* store initial value into pool and update pool address */ - *(pool_ptr++) = const_->init_value; + *(pool_ptr++) = (sljit_uw)const_->init_value; /* move to next constant */ const_ = (struct sljit_s390x_const *)const_->const_.next; } if (jump && jump->addr == j) { - sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) { jump->addr = (sljit_uw)pool_ptr; /* load address into tmp1 */ - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + SLJIT_ASSERT(!(offset & 1)); offset >>= 1; SLJIT_ASSERT(is_s32(offset)); - encode_inst(&code_ptr, - lgrl(tmp1, offset & 0xffffffff)); + + encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff)); /* store jump target into pool and update pool address */ - *(pool_ptr++) = target; + *(pool_ptr++) = (sljit_uw)target; /* branch to tmp1 */ sljit_ins op = (ins >> 32) & 0xf; @@ -1538,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { jump->addr = (sljit_uw)code_ptr + 2; - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); offset = target - source; /* offset must be halfword aligned */ @@ -1552,14 +1581,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = jump->next; } if (put_label && put_label->addr == j) { - source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); SLJIT_ASSERT(put_label->label); put_label->addr = (sljit_uw)code_ptr; /* store target into pool */ *pool_ptr = put_label->label->addr; - offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; + offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source; pool_ptr++; SLJIT_ASSERT(!(offset & 1)); @@ -1594,7 +1623,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) case SLJIT_HAS_CMOV: return have_lscond1() ? 1 : 0; case SLJIT_HAS_FPU: - return 0; + return 1; } return 0; } @@ -1607,36 +1636,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args = get_arg_count(arg_types); - sljit_sw frame_size; + sljit_s32 word_arg_count = 0; + sljit_s32 offset, i, tmp; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* saved registers go in callee allocated save area */ - compiler->local_size = (local_size + 0xf) & ~0xf; - frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE; + /* Saved registers are stored in callee allocated save area. */ + SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13); - FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */ - if (frame_size != 0) { - if (is_s16(-frame_size)) - FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size)))); - else if (is_s32(-frame_size)) - FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size)))); - else { - FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); } } - if (args >= 1) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0)))); - if (args >= 2) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1)))); - if (args >= 3) - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2)))); - SLJIT_ASSERT(args < 4); + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; + compiler->local_size = local_size; + + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + + arg_types >>= SLJIT_ARG_SHIFT; + tmp = 0; + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count)))); + tmp++; + } + word_arg_count++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -1649,37 +1709,67 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - /* TODO(mundaym): stack space for saved floating point registers */ - compiler->local_size = (local_size + 0xf) & ~0xf; + compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf; return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { - sljit_sw size; - sljit_gpr end; + sljit_s32 offset, i, tmp; + sljit_s32 local_size = compiler->local_size; + sljit_s32 saveds = compiler->saveds; + sljit_s32 scratches = compiler->scratches; - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw)); - if (!is_s20(size)) { - FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE)); - FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15))); - size = r6 * sizeof(sljit_sw); - end = r14; /* r15 has been restored already */ - } + if (is_u12(local_size)) + FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); else - end = r15; + FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size))); - FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */ - FAIL_IF(push_inst(compiler, br(r14))); /* return */ + offset = 2 * SSIZE_OF(sw); + if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { + FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ + offset += 9 * SSIZE_OF(sw); + } else { + if (scratches == SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else if (scratches > SLJIT_FIRST_SAVED_REG) { + FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15))); + offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); + } + + if (saveds == 0) { + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); + offset += SSIZE_OF(sw); + } return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, br(r14)); /* return */ +} + /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ @@ -1692,7 +1782,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); - op = GET_OPCODE(op) | (op & SLJIT_I32_OP); + op = GET_OPCODE(op) | (op & SLJIT_32); switch (op) { case SLJIT_BREAKPOINT: /* The following invalid instruction is emitted by gdb. */ @@ -1786,17 +1876,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) { - /* TODO(carenas): implement prefetch? */ - return SLJIT_SUCCESS; - } - if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { /* LOAD REGISTER */ if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { dst_r = gpr(dst); src_r = gpr(src); - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { /* 32-bit */ case SLJIT_MOV32_U8: ins = llcr(dst_r, src_r); @@ -1811,6 +1896,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lhr(dst_r, src_r); break; case SLJIT_MOV32: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lr(dst_r, src_r); break; /* 64-bit */ @@ -1834,11 +1921,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV: case SLJIT_MOV_P: + if (dst_r == src_r) + return SLJIT_SUCCESS; ins = lgr(dst_r, src_r); break; default: ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1862,6 +1952,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_sw)((sljit_u32)(srcw)); break; case SLJIT_MOV_S32: + case SLJIT_MOV32: srcw = (sljit_sw)((sljit_s32)(srcw)); break; } @@ -1875,7 +1966,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1)); /* TODO(carenas): convert all calls below to LEVAL */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_MOV32_U8: ins = llc(reg, mem.offset, mem.index, mem.base); break; @@ -1914,7 +2005,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ins = lg(reg, mem.offset, mem.index, mem.base); break; default: + ins = 0; SLJIT_UNREACHABLE(); + break; } FAIL_IF(push_inst(compiler, ins)); return SLJIT_SUCCESS; @@ -1940,6 +2033,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile WHEN2(is_u12(mem.offset), sth, sthy)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: return push_inst(compiler, WHEN2(is_u12(mem.offset), st, sty)); case SLJIT_MOV_P: @@ -1972,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile EVAL(sthy, tmp0, mem)); case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: FAIL_IF(push_inst(compiler, EVAL(ly, tmp0, mem))); FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); @@ -1994,15 +2089,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */ - dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; + dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32)); compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); /* TODO(mundaym): optimize loads and stores */ - switch (opcode | (op & SLJIT_I32_OP)) { + switch (opcode | (op & SLJIT_32)) { case SLJIT_NOT: /* emulate ~x with x^-1 */ FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); @@ -2014,7 +2109,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT32: /* emulate ~x with x^-1 */ if (have_eimm()) - FAIL_IF(push_inst(compiler, xilf(dst_r, -1))); + FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff))); else { FAIL_IF(push_load_imm_inst(compiler, tmp1, -1)); if (src_r != dst_r) @@ -2023,14 +2118,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); } break; - case SLJIT_NEG: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); - break; - case SLJIT_NEG32: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); - break; case SLJIT_CLZ: if (have_eimm()) { FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ @@ -2059,8 +2146,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(update_zero_overflow(compiler, op, dst_r)); /* TODO(carenas): doesn't need FAIL_IF */ - if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + if (dst & SLJIT_MEM) + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2084,20 +2171,6 @@ static SLJIT_INLINE int is_shift(sljit_s32 op) { return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; } -static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) -{ - switch (GET_FLAG_TYPE(op)) { - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - return 1; - } - return 0; -} - static const struct ins_forms add_forms = { 0x1a00, /* ar */ 0xb9080000, /* agr */ @@ -2131,24 +2204,24 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, if (src2 & SLJIT_IMM) { if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, src2w); } if (is_s16(src2w)) { if (sets_overflow) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); goto done; } if (!sets_overflow) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2157,22 +2230,22 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } } forms = sets_overflow ? &add_forms : &logical_add_forms; - FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_zero_overflow) - FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2202,78 +2275,85 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - int sets_signed = sets_signed_flag(op); + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); const struct ins_forms *forms; sljit_ins ins; - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; if (src2 & SLJIT_IMM) { if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) { - if ((op & SLJIT_I32_OP) || is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + if ((op & SLJIT_32) || is_s32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } if (is_s16(src2w)) - return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w); } } else if (src2 & SLJIT_MEM) { - if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; else - ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); } if (compare_signed) - ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; else - ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); } + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + if (src2 & SLJIT_IMM) { sljit_sw neg_src2w = -src2w; if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; else - ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; return emit_siy(compiler, ins, dst, dstw, neg_src2w); } if (is_s16(neg_src2w)) { if (sets_signed) - ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; else - ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); goto done; } } if (!sets_signed) { - if ((op & SLJIT_I32_OP) || is_u32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + if ((op & SLJIT_32) || is_u32(src2w)) { + ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); goto done; } @@ -2282,19 +2362,19 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, goto done; } } - else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + else if ((op & SLJIT_32) || is_s32(neg_src2w)) { + ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); goto done; } } forms = sets_signed ? &sub_forms : &logical_sub_forms; - FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w)); done: if (sets_signed) { - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { /* In case of overflow, the sign bit of the two source operands must be different, and @@ -2303,14 +2383,14 @@ done: The -result operation sets the corrent sign, because the result cannot be zero. The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); - FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } else if (op & SLJIT_SET_Z) FAIL_IF(update_zero_overflow(compiler, op, dst_r)); } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } @@ -2336,7 +2416,7 @@ static const struct ins_forms multiply_overflow_forms = { }; static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { @@ -2351,29 +2431,29 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 } FAIL_IF(push_inst(compiler, aih(tmp0, 1))); FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + FAIL_IF(push_inst(compiler, ipm(tmp1))); + FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */ - return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w); } if (src2 & SLJIT_IMM) { if (is_s16(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); } if (is_s32(src2w)) { - ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); } } - return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_uw imm, sljit_s32 count16) { @@ -2381,7 +2461,7 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (SLOW_IS_REG(dst)) { + if (IS_GPR_REG(dst)) { dst_r = gpr(dst & REG_MASK); if (dst == src1) needs_move = 0; @@ -2391,38 +2471,38 @@ static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (type == SLJIT_AND) { - if (!(mode & SLJIT_I32_OP)) - FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + if (!(mode & SLJIT_32)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff)); } else if (type == SLJIT_OR) { if (count16 >= 3) { - FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if (count16 >= 2) { if ((imm & 0x00000000ffffffffull) == 0) - return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)); if ((imm & 0xffffffff00000000ull) == 0) - return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff)); } if ((imm & 0xffff000000000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48))); if ((imm & 0x0000ffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff))); if ((imm & 0x00000000ffff0000ull) != 0) - FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff))); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff)); return SLJIT_SUCCESS; } if ((imm & 0xffffffff00000000ull) != 0) - FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32))); if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) - return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff)); return SLJIT_SUCCESS; } @@ -2457,18 +2537,18 @@ static const struct ins_forms bitwise_xor_forms = { }; static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); const struct ins_forms *forms; - if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) { sljit_s32 count16 = 0; sljit_uw imm = (sljit_uw)src2w; - if (op & SLJIT_I32_OP) + if (op & SLJIT_32) imm &= 0xffffffffull; if ((imm & 0x000000000000ffffull) != 0 || imm == 0) @@ -2480,7 +2560,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o if ((imm & 0xffff000000000000ull) != 0) count16++; - if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) { sljit_gpr src_r = tmp0; if (FAST_IS_REG(src1)) @@ -2489,16 +2569,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if ((imm & 0x000000000000ffffull) != 0 || imm == 0) - return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm); if ((imm & 0x00000000ffff0000ull) != 0) - return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16)); if ((imm & 0x0000ffff00000000ull) != 0) - return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); - return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); + return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48)); } if (!(op & SLJIT_SET_Z)) - return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); + return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16); } if (type == SLJIT_AND) @@ -2508,16 +2588,16 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o else forms = &bitwise_xor_forms; - return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); + return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w); } static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, + sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_s32 type = GET_OPCODE(op); - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr src_r = tmp0; sljit_gpr base_r = tmp0; sljit_ins imm = 0; @@ -2529,7 +2609,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (src2 & SLJIT_IMM) - imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f)); else if (FAST_IS_REG(src2)) base_r = gpr(src2 & REG_MASK); else { @@ -2537,7 +2617,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, base_r = tmp1; } - if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if ((op & SLJIT_32) && dst_r == src_r) { if (type == SLJIT_SHL) ins = 0x89000000 /* sll */; else if (type == SLJIT_LSHR) @@ -2545,21 +2625,21 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, else ins = 0x8a000000 /* sra */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); + FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm)); } else { if (type == SLJIT_SHL) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; else if (type == SLJIT_LSHR) - ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; else - ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; - FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); + FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16))); } if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) - return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); return SLJIT_SUCCESS; } @@ -2590,20 +2670,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - compiler->mode = op & SLJIT_I32_OP; + compiler->mode = op & SLJIT_32; compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); - if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { src1 ^= src2; src2 ^= src1; @@ -2616,39 +2690,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_MUL: - FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: - FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w)); break; case SLJIT_SHL: case SLJIT_LSHR: case SLJIT_ASHR: - FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w)); break; } if (dst & SLJIT_MEM) - return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) @@ -2686,17 +2778,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return gpr(reg); + return (sljit_s32)gpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - abort(); + return (sljit_s32)fgpr(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { sljit_ins ins = 0; @@ -2711,21 +2803,254 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ +#define FLOAT_LOAD 0 +#define FLOAT_STORE 1 + +static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + struct addr addr; + sljit_ins ins; + + SLJIT_ASSERT(mem & SLJIT_MEM); + + if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) { + FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */; + else + ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */; + + return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset); + } + + FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1)); + + if (op & FLOAT_STORE) + ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */; + else + ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */; + + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)); +} + +static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins, + sljit_s32 reg, + sljit_s32 src, sljit_sw srcw) +{ + struct addr addr; + + if (!(src & SLJIT_MEM)) + return push_inst(compiler, ins_r | F4(reg) | F0(src)); + + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); + return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16)); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_ins ins; + + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + /* M3 is set to 5 */ + if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) + ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */; + else + ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */; + + FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src))); + + if (dst & SLJIT_MEM) + return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins ins; + + if (src & SLJIT_IMM) { + FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw)); + src = (sljit_s32)tmp0; + } + else if (src & SLJIT_MEM) { + FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32)); + src = (sljit_s32)tmp0; + } + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) + ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */; + else + ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */; + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src))); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins_r, ins; + + if (src1 & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (op & SLJIT_32) { + ins_r = 0xb3090000 /* cebr */; + ins = 0xed0000000009 /* ceb */; + } else { + ins_r = 0xb3190000 /* cdbr */; + ins = 0xed0000000019 /* cdb */; + } + + return emit_float(compiler, ins_r, ins, src1, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { + sljit_s32 dst_r; + sljit_ins ins; + CHECK_ERROR(); - abort(); + + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (op == SLJIT_CONV_F64_FROM_F32) + FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw)); + else { + if (src & SLJIT_MEM) { + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (FAST_IS_REG(dst)) { + if (dst == src) + return SLJIT_SUCCESS; + + ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */; + break; + } + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw); + case SLJIT_CONV_F64_FROM_F32: + /* Only SLJIT_CONV_F32_FROM_F64. */ + ins = 0xb3440000 /* ledbr */; + break; + case SLJIT_NEG_F64: + ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64); + ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */; + break; + } + + FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src))); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + SLJIT_ASSERT(dst_r == TMP_FREG1); + + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); } +#define FLOAT_MOV(op, dst_r, src_r) \ + (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r)) + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_s32 dst_r = TMP_FREG1; + sljit_ins ins_r, ins; + CHECK_ERROR(); - abort(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + do { + if (FAST_IS_REG(dst)) { + dst_r = dst; + + if (dst == src1) + break; + + if (dst == src2) { + if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) { + src2 = src1; + src2w = src1w; + src1 = dst; + break; + } + + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2))); + src2 = TMP_FREG1; + } + } + + if (src1 & SLJIT_MEM) + FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w)); + else + FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1))); + } while (0); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */; + ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */; + break; + case SLJIT_SUB_F64: + ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */; + ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */; + break; + case SLJIT_MUL_F64: + ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */; + ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */; + break; + default: + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64); + ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */; + ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */; + break; + } + + FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w)); + + if (dst & SLJIT_MEM) + return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw); + + SLJIT_ASSERT(dst_r != TMP_FREG1); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ @@ -2795,6 +3120,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2809,14 +3139,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); if (src & SLJIT_IMM) { SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } - else if (src & SLJIT_MEM) + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); + } /* emit jump instruction */ if (type >= SLJIT_FAST_CALL) @@ -2832,6 +3163,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + SLJIT_ASSERT(gpr(TMP_REG2) == tmp1); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */)); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src)))); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -2859,11 +3208,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* dst is also source operand */ if (dst & SLJIT_MEM) - FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32)); break; + case SLJIT_MOV32: + op |= SLJIT_32; + /* fallthrough */ case SLJIT_MOV: - case (SLJIT_MOV32 & ~SLJIT_I32_OP): /* can write straight into destination */ loc_r = dst_r; break; @@ -2876,7 +3227,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (have_lscond2()) { FAIL_IF(push_load_imm_inst(compiler, loc_r, 0)); FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lochi, locghi))); + WHEN2(op & SLJIT_32, lochi, locghi))); } else { /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */ abort(); @@ -2888,22 +3239,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #define LEVAL(i) i(dst_r, loc_r) case SLJIT_AND: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, nr, ngr))); + WHEN2(op & SLJIT_32, nr, ngr))); break; case SLJIT_OR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, or, ogr))); + WHEN2(op & SLJIT_32, or, ogr))); break; case SLJIT_XOR: FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, xr, xgr))); + WHEN2(op & SLJIT_32, xr, xgr))); break; #undef LEVAL } /* store result to memory if required */ if (dst & SLJIT_MEM) - return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); + return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32)); return SLJIT_SUCCESS; } @@ -2913,7 +3264,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 src, sljit_sw srcw) { sljit_u8 mask = get_cc(compiler, type & 0xff); - sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); + sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; CHECK_ERROR(); @@ -2927,7 +3278,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #define LEVAL(i) i(dst_r, src_r, mask) if (have_lscond1()) return push_inst(compiler, - WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr)); + WHEN2(dst_reg & SLJIT_32, locr, locgr)); #undef LEVAL @@ -2991,7 +3342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c index 28886405afc..218992b3554 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c @@ -35,16 +35,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV_P: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if (dst != src2) return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); @@ -59,8 +56,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -70,13 +66,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); } - else if (dst != src2) - SLJIT_UNREACHABLE(); + SLJIT_ASSERT(dst == src2); return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags)); case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); @@ -89,22 +84,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl /* Loop. */ FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS)); return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_ADDC: - return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUBC: - return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_MUL: compiler->status_flags_state = 0; @@ -116,13 +113,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: - return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_OR: - return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_XOR: - return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SHL: FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); @@ -147,7 +144,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t sljit_s32 word_reg_index = 8; sljit_s32 float_arg_index = 1; sljit_s32 double_arg_count = 0; - sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw); + sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw); sljit_s32 types = 0; sljit_s32 reg = 0; sljit_s32 move_to_tmp2 = 0; @@ -155,18 +152,12 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t if (src) reg = reg_map[*src & REG_MASK]; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index++; - if (reg_index == reg) - move_to_tmp2 = 1; - reg_index++; - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index++; double_arg_count++; @@ -174,36 +165,37 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t move_to_tmp2 = 1; reg_index += 2; break; + case SLJIT_ARG_TYPE_F32: + float_arg_index++; + if (reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + break; default: - if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg) + if (reg_index != word_reg_index && reg_index == reg) move_to_tmp2 = 1; reg_index++; word_reg_index++; break; } - if (move_to_tmp2) { - move_to_tmp2 = 0; - if (reg < 14) - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); - *src = TMP_REG1; - } + arg_types >>= SLJIT_ARG_SHIFT; + } - arg_types >>= SLJIT_DEF_SHIFT; + if (move_to_tmp2) { + if (reg < 14) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); + *src = TMP_REG1; } arg_types = types; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_index--; - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset -= sizeof(sljit_f64); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_index--; if (float_arg_index == 4 && double_arg_count == 4) { + /* The address is not doubleword aligned, so two instructions are required to store the double. */ FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); } @@ -211,36 +203,41 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); float_offset -= sizeof(sljit_f64); break; + case SLJIT_ARG_TYPE_F32: + float_arg_index--; + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; default: break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } float_offset = (16 + 6) * sizeof(sljit_sw); while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - reg_index--; - if (reg_index < 14) - FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - float_offset -= sizeof(sljit_f64); - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: reg_index -= 2; if (reg_index < 14) { if ((reg_index & 0x1) != 0) { FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - if (reg_index < 13) + if (reg_index < 8 + 6 - 1) FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1)); } - else + else FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); } float_offset -= sizeof(sljit_f64); break; + case SLJIT_ARG_TYPE_F32: + reg_index--; + if (reg_index < 8 + 6) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + float_offset -= sizeof(sljit_f64); + break; default: reg_index--; word_reg_index--; @@ -254,7 +251,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -282,5 +279,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_set_jump_addr(addr, new_constant, executable_offset); + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c index e833f09d7a5..c8d19e16c6c 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c @@ -98,36 +98,37 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15 + 0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15 }; static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 2, 4, 6, 8, 10, 12, 14 + 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }; /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define D(d) (reg_map[d] << 25) -#define FD(d) (freg_map[d] << 25) -#define FDN(d) ((freg_map[d] | 0x1) << 25) -#define DA(d) ((d) << 25) -#define S1(s1) (reg_map[s1] << 14) -#define FS1(s1) (freg_map[s1] << 14) -#define S1A(s1) ((s1) << 14) -#define S2(s2) (reg_map[s2]) -#define FS2(s2) (freg_map[s2]) -#define FS2N(s2) (freg_map[s2] | 0x1) -#define S2A(s2) (s2) +#define D(d) ((sljit_ins)reg_map[d] << 25) +#define FD(d) ((sljit_ins)freg_map[d] << 25) +#define FDN(d) (((sljit_ins)freg_map[d] | 0x1) << 25) +#define DA(d) ((sljit_ins)(d) << 25) +#define S1(s1) ((sljit_ins)reg_map[s1] << 14) +#define FS1(s1) ((sljit_ins)freg_map[s1] << 14) +#define S1A(s1) ((sljit_ins)(s1) << 14) +#define S2(s2) ((sljit_ins)reg_map[s2]) +#define FS2(s2) ((sljit_ins)freg_map[s2]) +#define FS2N(s2) ((sljit_ins)freg_map[s2] | 0x1) +#define S2A(s2) ((sljit_ins)(s2)) #define IMM_ARG 0x2000 -#define DOP(op) ((op) << 5) -#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) +#define DOP(op) ((sljit_ins)(op) << 5) +#define IMM(imm) (((sljit_ins)(imm) & 0x1fff) | IMM_ARG) #define DR(dr) (reg_map[dr]) -#define OPC1(opcode) ((opcode) << 30) -#define OPC2(opcode) ((opcode) << 22) -#define OPC3(opcode) ((opcode) << 19) +#define DRF(dr, flags) ((sljit_s32)(reg_map[dr] | ((flags) & SET_FLAGS))) +#define OPC1(opcode) ((sljit_ins)(opcode) << 30) +#define OPC2(opcode) ((sljit_ins)(opcode) << 22) +#define OPC3(opcode) ((sljit_ins)(opcode) << 19) #define SET_FLAGS OPC3(0x10) #define ADD (OPC1(0x2) | OPC3(0x00)) @@ -156,6 +157,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) #define LDD (OPC1(0x3) | OPC3(0x03)) +#define LDDF (OPC1(0x3) | OPC3(0x23)) +#define LDF (OPC1(0x3) | OPC3(0x20)) #define LDUW (OPC1(0x3) | OPC3(0x00)) #define NOP (OPC1(0x0) | OPC2(0x04)) #define OR (OPC1(0x2) | OPC3(0x02)) @@ -170,6 +173,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) #define SRL (OPC1(0x2) | OPC3(0x26)) #define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define STD (OPC1(0x3) | OPC3(0x07)) #define STDF (OPC1(0x3) | OPC3(0x27)) #define STF (OPC1(0x3) | OPC3(0x24)) #define STW (OPC1(0x3) | OPC3(0x04)) @@ -183,7 +187,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #define MAX_DISP (0x1fffff) #define MIN_DISP (-0x200000) -#define DISP_MASK (0x3fffff) +#define DISP_MASK ((sljit_ins)0x3fffff) #define BICC (OPC1(0x0) | OPC2(0x2)) #define FBFCC (OPC1(0x0) | OPC2(0x6)) @@ -274,7 +278,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } - diff += sizeof(sljit_ins); + diff += SSIZE_OF(ins); if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; @@ -300,7 +304,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw word_count; sljit_uw next_addr; sljit_sw executable_offset; - sljit_uw addr; + sljit_sw addr; struct sljit_label *label; struct sljit_jump *jump; @@ -340,7 +344,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { /* Just recording the address. */ label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } if (jump && jump->addr == word_count) { @@ -373,7 +377,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (label && label->size == word_count) { label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; } @@ -386,27 +390,27 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { do { - addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); - buf_ptr[0] = CALL | (addr & 0x3fffffff); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000); + buf_ptr[0] = CALL | ((sljit_ins)addr & 0x3fffffff); break; } if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); - buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); + addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; + SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP); + buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK); break; } /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); - buf_ptr[0] |= (addr >> 10) & 0x3fffff; - buf_ptr[1] |= addr & 0x3ff; + buf_ptr[0] |= (sljit_ins)(addr >> 10) & 0x3fffff; + buf_ptr[1] |= (sljit_ins)addr & 0x3ff; #else #error "Implementation required" #endif @@ -416,7 +420,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil put_label = compiler->put_labels; while (put_label) { - addr = put_label->label->addr; + addr = (sljit_sw)put_label->label->addr; buf_ptr = (sljit_ins *)put_label->addr; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) @@ -431,7 +435,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); @@ -487,13 +491,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALT_KEEP_CACHE 0x00040 #define CUMULATIVE_OP 0x00080 #define IMM_OP 0x00100 -#define SRC2_IMM 0x00200 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 -#define REG_DEST 0x00400 -#define REG2_SOURCE 0x00800 -#define SLOW_SRC1 0x01000 -#define SLOW_SRC2 0x02000 -#define SLOW_DEST 0x04000 +#define REG_DEST 0x00800 +#define REG2_SOURCE 0x01000 +#define SLOW_SRC1 0x02000 +#define SLOW_SRC2 0x04000 +#define SLOW_DEST 0x08000 /* SET_FLAGS (0x10 << 19) also belong here! */ @@ -507,6 +512,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { + sljit_s32 reg_index, types, tmp; + sljit_u32 float_offset, args_offset; + sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index; + CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); @@ -514,7 +523,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; compiler->local_size = local_size; - if (local_size <= SIMM_MAX) { + if (local_size <= -SIMM_MIN) { FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); } else { @@ -522,7 +531,88 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); } - /* Arguments are in their appropriate registers. */ + arg_types >>= SLJIT_ARG_SHIFT; + + types = arg_types; + float_offset = 16 * sizeof(sljit_sw); + reg_index = 24; + + while (types && reg_index < 24 + 6) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (reg_index & 0x1) { + FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + if (reg_index >= 24 + 6 - 1) + break; + FAIL_IF(push_inst(compiler, STW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), MOVABLE_INS)); + } else + FAIL_IF(push_inst(compiler, STD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + + float_offset += sizeof(sljit_f64); + reg_index++; + break; + case SLJIT_ARG_TYPE_F32: + FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset += sizeof(sljit_f64); + break; + } + + reg_index++; + types >>= SLJIT_ARG_SHIFT; + } + + args_offset = (16 + 1 + 6) * sizeof(sljit_sw); + float_offset = 16 * sizeof(sljit_sw); + reg_index = 24; + saved_arg_index = 24; + scratch_arg_index = 8 - 1; + float_arg_index = 1; + + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + if (reg_index < 24 + 6 - 1) { + FAIL_IF(push_inst(compiler, LDDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + } else if (reg_index < 24 + 6) { + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + } else { + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset + sizeof(sljit_sw)), MOVABLE_INS)); + } + + float_arg_index++; + float_offset += sizeof(sljit_f64); + reg_index++; + break; + case SLJIT_ARG_TYPE_F32: + if (reg_index < 24 + 6) + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); + float_arg_index++; + float_offset += sizeof(sljit_f64); + break; + default: + scratch_arg_index++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = saved_arg_index++; + if (tmp == reg_index) + break; + } else + tmp = scratch_arg_index; + + if (reg_index < 24 + 6) + FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp)); + else + FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp)); + break; + } + + reg_index++; + arg_types >>= SLJIT_ARG_SHIFT; + } return SLJIT_SUCCESS; } @@ -539,12 +629,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(SLJIT_R0) | S2(0), UNMOVABLE_INS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - if (op != SLJIT_MOV || !FAST_IS_REG(src)) { + if (TYPE_CAST_NEEDED(op) || !FAST_IS_REG(src)) { FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); src = SLJIT_R0; } @@ -591,7 +690,7 @@ static const sljit_ins data_transfer_insts[16 + 4] = { #undef ARCH_32_64 /* Can perform an operation using at most 1 instruction. */ -static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { SLJIT_ASSERT(arg & SLJIT_MEM); @@ -632,7 +731,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj } /* Emit the necessary instructions. See can_cache above. */ -static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 base, arg2, delay_slot; sljit_ins dest; @@ -660,7 +759,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl arg2 = reg; else /* It must be a mov operation, so tmp1 must be free to use. */ arg2 = TMP_REG1; - FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2))); + FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | (sljit_ins)argw, DR(arg2))); } } else { @@ -692,7 +791,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); } -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { if (getput_arg_fast(compiler, flags, reg, arg, argw)) return compiler->error; @@ -701,14 +800,14 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) { if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) return compiler->error; return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } -static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -727,11 +826,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (dst != SLJIT_UNUSED) { + if (dst != TMP_REG2) { if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if (flags & MOVE_OP) sugg_src2_r = dst_r; } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) @@ -782,7 +881,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P) + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) dst_r = src2_r; } else if (src2 & SLJIT_IMM) { @@ -793,8 +892,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } else { src2_r = 0; - if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM)) - dst_r = 0; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } } } } @@ -888,7 +991,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); @@ -898,33 +1001,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile op = GET_OPCODE(op); switch (op) { case SLJIT_MOV: - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) case SLJIT_MOV_U32: - return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_MOV_S32: - return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); case SLJIT_NOT: case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); } return SLJIT_SUCCESS; @@ -935,17 +1034,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - op = GET_OPCODE(op); switch (op) { case SLJIT_ADD: @@ -975,6 +1071,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1015,7 +1125,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1027,8 +1137,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) +#define FLOAT_DATA(op) ((sljit_ins)DOUBLE_DATA | (((sljit_ins)(op) & SLJIT_32) >> 7)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_32) ? single : double) #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, @@ -1108,11 +1218,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1126,7 +1236,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (src != dst_r) { if (dst_r != TMP_FREG1) { FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (!(op & SLJIT_F32_OP)) + if (!(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); } else @@ -1135,17 +1245,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil break; case SLJIT_NEG_F64: FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_F32_OP)) + if (dst_r != src && !(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_ABS_F64: FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_F32_OP)) + if (dst_r != src && !(op & SLJIT_32)) FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS)); - op ^= SLJIT_F32_OP; + op ^= SLJIT_32; break; } @@ -1288,10 +1398,12 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_LESS: case SLJIT_GREATER_F64: /* Unordered. */ + case SLJIT_CARRY: return DA(0x5); case SLJIT_GREATER_EQUAL: case SLJIT_LESS_EQUAL_F64: + case SLJIT_NOT_CARRY: return DA(0xd); case SLJIT_GREATER: @@ -1315,15 +1427,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return DA(0x2); case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x9); + /* fallthrough */ case SLJIT_UNORDERED_F64: return DA(0x7); case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x1); + /* fallthrough */ case SLJIT_ORDERED_F64: return DA(0xf); @@ -1412,7 +1526,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = srcw; + jump->u.target = (sljit_uw)srcw; if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; @@ -1460,7 +1574,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 reg; + sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c index 79a7e8bba59..b9a7b397899 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_32.c @@ -26,6 +26,10 @@ /* x86 32-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm) { sljit_u8 *inst; @@ -38,314 +42,8 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s return SLJIT_SUCCESS; } -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) -{ - sljit_s32 type = jump->flags >> TYPE_SHIFT; - - if (type == SLJIT_JUMP) { - *code_ptr++ = JMP_i32; - jump->addr++; - } - else if (type >= SLJIT_FAST_CALL) { - *code_ptr++ = CALL_i32; - jump->addr++; - } - else { - *code_ptr++ = GROUP_0F; - *code_ptr++ = get_jump_code(type); - jump->addr += 2; - } - - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MW; - else - sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); - code_ptr += 4; - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 args, size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - /* Emit ENDBR32 at function entry if needed. */ - FAIL_IF(emit_endbranch(compiler)); - - args = get_arg_count(arg_types); - compiler->args = args; - - /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * sizeof(sljit_sw); - -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * sizeof(sljit_sw); -#endif - - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); - - compiler->locals_offset = compiler->saveds_offset; - - if (saveds > 3) - compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); - - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); - - size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); -#else - size += (args > 0 ? (2 + args * 3) : 0); -#endif - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - PUSH_REG(reg_map[TMP_REG1]); -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; - } -#endif - if (saveds > 2 || scratches > 9) - PUSH_REG(reg_map[SLJIT_S2]); - if (saveds > 1 || scratches > 10) - PUSH_REG(reg_map[SLJIT_S1]); - if (saveds > 0 || scratches > 11) - PUSH_REG(reg_map[SLJIT_S0]); - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args > 0) { - inst[0] = MOV_r_rm; - inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; - inst += 2; - } - if (args > 1) { - inst[0] = MOV_r_rm; - inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; - inst += 2; - } - if (args > 2) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; - inst[2] = 0x24; - inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ - } -#else - if (args > 0) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 2; - inst += 3; - } - if (args > 1) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 3; - inst += 3; - } - if (args > 2) { - inst[0] = MOV_r_rm; - inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; - inst[2] = sizeof(sljit_sw) * 4; - } -#endif - - SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); - -#if defined(__APPLE__) - /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); - else - local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); -#endif - - compiler->local_size = local_size; - -#ifdef _WIN32 - if (local_size > 0) { - if (local_size <= 4 * 4096) { - if (local_size > 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); - if (local_size > 2 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); - if (local_size > 3 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); - } - else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); - - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - - INC_SIZE(2); - inst[0] = JNE_i8; - inst[1] = (sljit_s8) -16; - } - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); - } -#endif - - SLJIT_ASSERT(local_size > 0); - -#if !defined(__APPLE__) - if (options & SLJIT_F64_ALIGNMENT) { - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); - - /* Some space might allocated during sljit_grow_stack() above on WIN32. */ - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); - -#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->local_size > 1024) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - - INC_SIZE(6); - inst[0] = GROUP_BINARY_81; - inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; - sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1)); - - /* The real local size must be used. */ - return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); - } -#endif - return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - compiler->args = get_arg_count(arg_types); - - /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * sizeof(sljit_sw); - -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * sizeof(sljit_sw); -#endif - - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); - - compiler->locals_offset = compiler->saveds_offset; - - if (saveds > 3) - compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); - - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); - -#if defined(__APPLE__) - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); - else - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); -#endif - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - SLJIT_ASSERT(compiler->args >= 0); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - - SLJIT_ASSERT(compiler->local_size > 0); - -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) - EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) - else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#endif - - size = 2 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + - (compiler->saveds <= 3 ? compiler->saveds : 3); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - size += 2; -#endif - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - - if (compiler->saveds > 0 || compiler->scratches > 11) - POP_REG(reg_map[SLJIT_S0]); - if (compiler->saveds > 1 || compiler->scratches > 10) - POP_REG(reg_map[SLJIT_S1]); - if (compiler->saveds > 2 || compiler->scratches > 9) - POP_REG(reg_map[SLJIT_S2]); - POP_REG(reg_map[TMP_REG1]); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args > 2) - RET_I16(sizeof(sljit_sw)); - else - RET(); -#else - RET(); -#endif - - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - /* Size contains the flags as well. */ -static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, /* The register or immediate operand. */ sljit_s32 a, sljit_sw imma, /* The general operand (not immediate). */ @@ -353,8 +51,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 { sljit_u8 *inst; sljit_u8 *buf_ptr; - sljit_s32 flags = size & ~0xf; - sljit_s32 inst_size; + sljit_u8 reg_map_b; + sljit_uw flags = size; + sljit_uw inst_size; /* Both cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); @@ -367,8 +66,6 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); - /* We don't support (%ebp). */ - SLJIT_ASSERT(!(b & SLJIT_MEM) || immb || reg_map[b & REG_MASK] != 5); size &= 0xf; inst_size = size; @@ -381,7 +78,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 /* Calculate size of b. */ inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { - if ((b & REG_MASK) == SLJIT_UNUSED) + if (!(b & REG_MASK)) inst_size += sizeof(sljit_sw); else if (immb != 0 && !(b & OFFS_REG_MASK)) { /* Immediate operand. */ @@ -390,11 +87,13 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 else inst_size += sizeof(sljit_sw); } + else if (reg_map[b & REG_MASK] == 5) + inst_size += sizeof(sljit_s8); if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) b |= TO_OFFS_REG(SLJIT_SP); - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) + if (b & OFFS_REG_MASK) inst_size += 1; /* SIB byte. */ } @@ -445,9 +144,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (a & SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) - *buf_ptr = reg_map[a] << 3; + *buf_ptr = U8(reg_map[a] << 3); else - *buf_ptr = a << 3; + *buf_ptr = U8(a << 3); } else { if (a & SLJIT_IMM) { @@ -460,27 +159,30 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 *buf_ptr = 0; } - if (!(b & SLJIT_MEM)) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); - else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { - if (immb != 0) { + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b)); + buf_ptr++; + } else if (b & REG_MASK) { + reg_map_b = reg_map[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) { + if (immb != 0 || reg_map_b == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else *buf_ptr |= 0x80; } - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) - *buf_ptr++ |= reg_map[b & REG_MASK]; + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_map_b; else { *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3); + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3)); } - if (immb != 0) { + if (immb != 0 || reg_map_b == 5) { if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ else { sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); @@ -489,7 +191,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } else { *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6); + *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); } } else { @@ -500,9 +202,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (a & SLJIT_IMM) { if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; + *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) - sljit_unaligned_store_s16(buf_ptr, imma); + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); else if (!(flags & EX86_SHIFT_INS)) sljit_unaligned_store_sw(buf_ptr, imma); } @@ -510,35 +212,450 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); } +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset)); + code_ptr += 4; + + return code_ptr; +} + +#define ENTER_R2_USED 0x00001 +#define ENTER_R2_TO_S 0x00002 +#define ENTER_R2_TO_R0 0x00004 +#define ENTER_R1_TO_S 0x00008 +#define ENTER_TMP_TO_R4 0x00010 +#define ENTER_TMP_TO_S 0x00020 + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 size, locals_offset, args_size, types, status; + sljit_u8 *inst; +#ifdef _WIN32 + sljit_s32 r2_offset = -1; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR32 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + + arg_types >>= SLJIT_ARG_SHIFT; + types = arg_types; + word_arg_count = 0; + saved_arg_count = 0; + float_arg_count = 0; + args_size = SSIZE_OF(sw); + status = 0; + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + saved_arg_count++; + if (saved_arg_count == 4) + status |= ENTER_TMP_TO_S; + } else { + if (word_arg_count == 4) + status |= ENTER_TMP_TO_R4; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count == 3) + status |= ENTER_R2_USED; +#endif + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL)) + break; +#endif + + args_size += SSIZE_OF(sw); + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + args_size -= SSIZE_OF(sw); + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and function calls. */ + locals_offset = 2 * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#endif + + compiler->scratches_offset = locals_offset; + + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + + if (saveds > 3) + locals_offset += (saveds - 3) * SSIZE_OF(sw); + + compiler->locals_offset = locals_offset; + + size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); + FAIL_IF(!inst); + + INC_SIZE((sljit_uw)size); + PUSH_REG(reg_map[TMP_REG1]); + if (saveds > 2 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + + size *= SSIZE_OF(sw); + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); + + size += SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size += args_size; +#endif + + local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size; + compiler->local_size = local_size; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size -= args_size; +#endif + + word_arg_count = 0; + saved_arg_count = 0; + args_size = size; + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) { + if (word_arg_count == 1) { + if (status & ENTER_R2_USED) { + EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0); + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R2_TO_S; + saved_arg_count++; + } else + status |= ENTER_R2_TO_R0; + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R1_TO_S; + saved_arg_count++; + } + break; + } +#endif + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4)); + + if (word_arg_count <= 3) { +#ifdef _WIN32 + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else +#endif + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + } + } else { + SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S)); + + if (saved_arg_count <= 3) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + saved_arg_count++; + } + args_size += SSIZE_OF(sw); + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) { + if (status & ENTER_R2_TO_R0) + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0); + + saved_arg_count = 0; + if (status & ENTER_R2_TO_S) { + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); + saved_arg_count++; + } + + if (status & ENTER_R1_TO_S) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0); + } +#endif + + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + +#ifdef _WIN32 + SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096); + + if (local_size > 4096) { + if (local_size <= 4 * 4096) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + + if (local_size > 2 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); + + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = LOOP_i8; + inst[1] = (sljit_u8)-16; + local_size &= 0xfff; + } + } + + if (local_size > 0) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + } + + if (r2_offset != -1) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + +#else /* !_WIN32 */ + + SLJIT_ASSERT(local_size > 0); + + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#endif /* _WIN32 */ + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { + size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 args_size, locals_offset; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + sljit_s32 word_arg_count = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + arg_types >>= SLJIT_ARG_SHIFT; + args_size = 0; + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count >= 2) + args_size += SSIZE_OF(sw); + word_arg_count++; +#else + args_size += SSIZE_OF(sw); +#endif + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + compiler->args_size = args_size; + + /* [esp+0] for saving temporaries and function calls. */ + locals_offset = 2 * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#endif + + compiler->scratches_offset = locals_offset; + + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + + if (saveds > 3) + locals_offset += (saveds - 3) * SSIZE_OF(sw); + + compiler->locals_offset = locals_offset; + + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + saveds += args_size; +#endif + + compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +{ + sljit_uw size; + sljit_u8 *inst; + + size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)); + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + if (compiler->saveds > 0 || compiler->scratches > 11) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 10) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S2]); + POP_REG(reg_map[TMP_REG1]); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_uw size; + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + SLJIT_ASSERT(compiler->args_size >= 0); + SLJIT_ASSERT(compiler->local_size > 0); + + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); + + FAIL_IF(emit_stack_frame_release(compiler)); + + size = 1; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) + size = 3; +#endif + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) { + RET_I16(U8(compiler->args_size)); + return SLJIT_SUCCESS; + } +#endif + + RET(); + return SLJIT_SUCCESS; +} + /* --------------------------------------------------------------------- */ /* Call / return instructions */ /* --------------------------------------------------------------------- */ #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) { - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; if (word_arg_count > 2) - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count_ptr) @@ -548,12 +665,12 @@ static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word } static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, - sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) { sljit_u8 *inst; sljit_s32 float_arg_count; - if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) { + if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); @@ -561,41 +678,40 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, } else if (stack_size > 0) { if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; float_arg_count = 0; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_count++; FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; if (word_arg_count == 3) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); } else if (word_arg_count == 4) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); } break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } } @@ -605,7 +721,7 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, FAIL_IF(!inst); INC_SIZE(1); - *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2]; + *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]); } else { inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); @@ -613,7 +729,7 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, INC_SIZE(2); *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; + *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]); } } @@ -624,77 +740,73 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) { - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count_ptr) *word_arg_count_ptr = word_arg_count; - if (stack_size <= compiler->stack_tmp_size) + if (stack_size <= compiler->scratches_offset) return 0; -#if defined(__APPLE__) - return ((stack_size - compiler->stack_tmp_size + 15) & ~15); -#else - return stack_size - compiler->stack_tmp_size; -#endif + return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf); } static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, - sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count) + sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count) { sljit_s32 float_arg_count = 0; + sljit_u8 *inst; if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); if (stack_size > 0) - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; word_arg_count = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f32); - break; + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: float_arg_count++; FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += sizeof(sljit_f64); + stack_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += SSIZE_OF(f32); break; default: word_arg_count++; EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0); - stack_size += sizeof(sljit_sw); + stack_size += SSIZE_OF(sw); break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -707,13 +819,12 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 single; if (stack_size > 0) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0); - if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) return SLJIT_SUCCESS; - single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32); + single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32); inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); FAIL_IF(!inst); @@ -725,16 +836,399 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); } +static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, + sljit_s32 *extra_space, sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_sw args_size, prev_args_size, saved_regs_size; + sljit_sw types, word_arg_count, float_arg_count; + sljit_sw stack_size, prev_stack_size, min_size, offset; + sljit_sw word_arg4_offset; + sljit_u8 r2_offset = 0; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL; +#endif + sljit_u8* inst; + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + word_arg_count = 0; + float_arg_count = 0; + arg_types >>= SLJIT_ARG_SHIFT; + types = 0; + args_size = 0; + + while (arg_types != 0) { + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); + + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + float_arg_count++; + break; + default: + word_arg_count++; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!fast_call || word_arg_count > 2) + args_size += SSIZE_OF(sw); +#else + args_size += SSIZE_OF(sw); +#endif + break; + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (args_size <= compiler->args_size +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call) +#endif /* SLJIT_X86_32_FASTCALL */ + && 1) { +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + *extra_space = fast_call ? 0 : args_size; + prev_args_size = compiler->args_size; + stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; +#else /* !SLJIT_X86_32_FASTCALL */ + *extra_space = 0; + stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; +#endif /* SLJIT_X86_32_FASTCALL */ + + offset = stack_size + compiler->local_size; + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + r2_offset = sizeof(sljit_sw); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(compiler->options & SLJIT_ENTER_CDECL)) { + if (!fast_call) + offset -= SSIZE_OF(sw); + + if (word_arg_count >= 3) { + word_arg4_offset = SSIZE_OF(sw); + + if (word_arg_count + float_arg_count >= 4) { + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw); + if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64); + } + + /* In cdecl mode, at least one more word value must + * be present on the stack before the return address. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0); + } + + if (fast_call) { + if (args_size < prev_args_size) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + } + } else if (prev_args_size > 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + } +#endif /* SLJIT_X86_32_FASTCALL */ + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); + break; + } +#endif + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) + break; +#endif + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + break; + case 4: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); + FAIL_IF(emit_stack_frame_release(compiler)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args_size < prev_args_size) + BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0); +#endif + + return SLJIT_SUCCESS; + } + + stack_size = args_size + SSIZE_OF(sw); + + if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) { + r2_offset = SSIZE_OF(sw); + stack_size += SSIZE_OF(sw); + } + + if (word_arg_count >= 3) + stack_size += SSIZE_OF(sw); + + prev_args_size = 0; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(compiler->options & SLJIT_ENTER_CDECL)) + prev_args_size = compiler->args_size; +#endif + + prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; + min_size = prev_stack_size + compiler->local_size; + + word_arg4_offset = compiler->scratches_offset; + + if (stack_size > min_size) { + BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0); + if (src == SLJIT_MEM1(SLJIT_SP)) + srcw += stack_size - min_size; + word_arg4_offset += stack_size - min_size; + } + else + stack_size = min_size; + + if (word_arg_count >= 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); + + if (word_arg_count >= 4) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); + } + + if (!(src & SLJIT_IMM) && src != SLJIT_R0) { + if (word_arg_count >= 1) { + SLJIT_ASSERT(r2_offset == sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0); + } + EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); + } + + /* Restore saved registers. */ + offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); + + if (compiler->saveds > 2 || compiler->scratches > 9) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if (compiler->saveds > 1 || compiler->scratches > 10) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + if (compiler->saveds > 0 || compiler->scratches > 11) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset); + } + + /* Copy fourth argument and return address. */ +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + offset = stack_size; + *extra_space = 0; + + if (word_arg_count >= 4 && prev_args_size == 0) { + offset -= SSIZE_OF(sw); + inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + + SLJIT_ASSERT(args_size != prev_args_size); + } else { + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + if (args_size != prev_args_size) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); + } + + if (args_size != prev_args_size) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + } else { +#endif /* SLJIT_X86_32_FASTCALL */ + offset = stack_size - SSIZE_OF(sw); + *extra_space = args_size; + + if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) { + offset -= SSIZE_OF(sw); + inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + + SLJIT_ASSERT(prev_args_size > 0); + } else { + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } + + if (prev_args_size > 0) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); + } + + /* Copy return address. */ + if (prev_args_size > 0) + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + } +#endif /* SLJIT_X86_32_FASTCALL */ + + while (types != 0) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + offset -= SSIZE_OF(f64); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F32: + offset -= SSIZE_OF(f32); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); + float_arg_count--; + break; + default: + switch (word_arg_count) { + case 1: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) { + EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); + break; + } +#endif + offset -= SSIZE_OF(sw); + if (r2_offset != 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + } else + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); + break; + case 2: +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (fast_call) + break; +#endif + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); + break; + case 3: + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); + break; + } + word_arg_count--; + break; + } + types >>= SLJIT_ARG_SHIFT; + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + /* Skip return address. */ + if (fast_call) + offset -= SSIZE_OF(sw); +#endif + + SLJIT_ASSERT(offset >= 0); + + if (offset == 0) + return SLJIT_SUCCESS; + + BINARY_IMM32(ADD, offset, SLJIT_SP, 0); + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) +{ + /* Called when stack consumption cannot be reduced to 0. */ + sljit_u8 *inst; + + BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { struct sljit_jump *jump; - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + if (type & SLJIT_CALL_RETURN) { + stack_size = type; + PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + if (stack_size == 0) { + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + return sljit_emit_jump(compiler, type); + } + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size)); + return jump; + } + #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if ((type & 0xff) == SLJIT_CALL) { stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); @@ -772,7 +1266,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { - sljit_s32 stack_size = 0; + sljit_sw stack_size = 0; sljit_s32 word_arg_count; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) sljit_s32 swap_args; @@ -781,6 +1275,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + if (type & SLJIT_CALL_RETURN) { + stack_size = type; + FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); + + if (!(src & SLJIT_IMM)) { + src = SLJIT_R0; + srcw = 0; + } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + if (stack_size == 0) + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + return emit_tail_call_end(compiler, stack_size); + } + #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); @@ -800,7 +1315,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -809,7 +1324,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, 0); @@ -819,7 +1334,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -828,7 +1343,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, stack_size); @@ -844,10 +1359,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK_EXTRA_REGS(dst, dstw, (void)0); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { /* Unused dest is possible here. */ inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); @@ -895,34 +1406,18 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) { - sljit_s32 size, saved_size; - sljit_s32 has_f64_aligment; + sljit_sw size; /* Don't adjust shadow stack if it isn't enabled. */ - if (!cpu_has_shadow_stack ()) + if (!cpu_has_shadow_stack()) return SLJIT_SUCCESS; - SLJIT_ASSERT(compiler->args >= 0); + SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->local_size > 0); -#if !defined(__APPLE__) - has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT; -#else - has_f64_aligment = 0; -#endif - size = compiler->local_size; - saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * sizeof(sljit_uw); - if (has_f64_aligment) { - /* mov TMP_REG1, [esp + local_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size); - /* mov TMP_REG1, [TMP_REG1+ saved_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size); - /* Move return address to [esp]. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0); - size = 0; - } else - size += saved_size; + size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); - return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c index e85b56a61a5..f37df6e1bf0 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_64.c @@ -26,6 +26,10 @@ /* x86 64-bit arch dependent functions. */ +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm) { sljit_u8 *inst; @@ -34,351 +38,15 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); + *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7)); sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) -{ - sljit_s32 type = jump->flags >> TYPE_SHIFT; - - int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); - - /* The relative jump below specialized for this case. */ - SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); - - if (type < SLJIT_JUMP) { - /* Invert type. */ - *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; - *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); - } - - *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); - *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; - jump->addr = (sljit_uw)code_ptr; - - if (jump->flags & JUMP_LABEL) - jump->flags |= PATCH_MD; - else if (short_addr) - sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); - else - sljit_unaligned_store_sw(code_ptr, jump->u.target); - - code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); - - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]; - - return code_ptr; -} - -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) -{ - if (max_label > HALFWORD_MAX) { - put_label->addr -= put_label->flags; - put_label->flags = PATCH_MD; - return code_ptr; - } - - if (put_label->flags == 0) { - /* Destination is register. */ - code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); - - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); - - if ((code_ptr[0] & 0x07) != 0) { - code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08); - code_ptr += 2 + sizeof(sljit_s32); - } - else { - code_ptr[0] = code_ptr[1]; - code_ptr += 1 + sizeof(sljit_s32); - } - - put_label->addr = (sljit_uw)code_ptr; - return code_ptr; - } - - code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); - SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); - - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - - if ((code_ptr[1] & 0xf8) == MOV_r_i32) { - code_ptr += 2 + sizeof(sljit_uw); - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - } - - SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); - - code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4); - code_ptr[1] = MOV_rm_i32; - code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3)); - - code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); - put_label->addr = (sljit_uw)code_ptr; - put_label->flags = 0; - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 args, i, tmp, size, saved_register_size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - /* Emit ENDBR64 at function entry if needed. */ - FAIL_IF(emit_endbranch(compiler)); - - compiler->mode32 = 0; - -#ifdef _WIN64 - /* Two/four register slots for parameters plus space for xmm6 register if needed. */ - if (fscratches >= 6 || fsaveds >= 1) - compiler->locals_offset = 6 * sizeof(sljit_sw); - else - compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); -#endif - - /* Including the return address saved by the call instruction. */ - saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; - for (i = SLJIT_S0; i >= tmp; i--) { - size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); - if (reg_map[i] >= 8) - *inst++ = REX_B; - PUSH_REG(reg_lmap[i]); - } - - for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { - size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); - if (reg_map[i] >= 8) - *inst++ = REX_B; - PUSH_REG(reg_lmap[i]); - } - - args = get_arg_count(arg_types); - - if (args > 0) { - size = args * 3; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - -#ifndef _WIN64 - if (args > 0) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; - inst += 3; - } - if (args > 1) { - inst[0] = REX_W | REX_R; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; - inst += 3; - } - if (args > 2) { - inst[0] = REX_W | REX_R; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; - } -#else - if (args > 0) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; - inst += 3; - } - if (args > 1) { - inst[0] = REX_W; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; - inst += 3; - } - if (args > 2) { - inst[0] = REX_W | REX_B; - inst[1] = MOV_r_rm; - inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; - } -#endif - } - - local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; - compiler->local_size = local_size; - -#ifdef _WIN64 - if (local_size > 0) { - if (local_size <= 4 * 4096) { - if (local_size > 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); - if (local_size > 2 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); - if (local_size > 3 * 4096) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); - } - else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12); - - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1)); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - - INC_SIZE(2); - inst[0] = JNE_i8; - inst[1] = (sljit_s8) -19; - } - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); - } -#endif - - if (local_size > 0) { - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - } - -#ifdef _WIN64 - /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ - if (fscratches >= 6 || fsaveds >= 1) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - sljit_unaligned_store_s32(inst, 0x20247429); - } -#endif - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 saved_register_size; - - CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - -#ifdef _WIN64 - /* Two/four register slots for parameters plus space for xmm6 register if needed. */ - if (fscratches >= 6 || fsaveds >= 1) - compiler->locals_offset = 6 * sizeof(sljit_sw); - else - compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); -#endif - - /* Including the return address saved by the call instruction. */ - saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); - compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 i, tmp, size; - sljit_u8 *inst; - - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - -#ifdef _WIN64 - /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ - if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - sljit_unaligned_store_s32(inst, 0x20247428); - } -#endif - - if (compiler->local_size > 0) { - if (compiler->local_size <= 127) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | ADD | 4; - *inst = compiler->local_size; - } - else { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | ADD | 4; - sljit_unaligned_store_s32(inst, compiler->local_size); - } - } - - tmp = compiler->scratches; - for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { - size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); - if (reg_map[i] >= 8) - *inst++ = REX_B; - POP_REG(reg_lmap[i]); - } - - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { - size = reg_map[i] >= 8 ? 2 : 1; - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - INC_SIZE(size); - if (reg_map[i] >= 8) - *inst++ = REX_B; - POP_REG(reg_lmap[i]); - } - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm) { sljit_u8 *inst; - sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32); + sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32); inst = (sljit_u8*)ensure_buf(compiler, 1 + length); FAIL_IF(!inst); @@ -386,11 +54,11 @@ static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sl if (rex) *inst++ = rex; *inst++ = opcode; - sljit_unaligned_store_s32(inst, imm); + sljit_unaligned_store_s32(inst, (sljit_s32)imm); return SLJIT_SUCCESS; } -static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size, +static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size, /* The register or immediate operand. */ sljit_s32 a, sljit_sw imma, /* The general operand (not immediate). */ @@ -399,8 +67,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 sljit_u8 *inst; sljit_u8 *buf_ptr; sljit_u8 rex = 0; - sljit_s32 flags = size & ~0xf; - sljit_s32 inst_size; + sljit_u8 reg_lmap_b; + sljit_uw flags = size; + sljit_uw inst_size; /* The immediate operand must be 32 bit. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); @@ -445,7 +114,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 b |= TO_OFFS_REG(SLJIT_SP); } - if ((b & REG_MASK) == SLJIT_UNUSED) + if (!(b & REG_MASK)) inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ else { if (reg_map[b & REG_MASK] >= 8) @@ -461,7 +130,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 else if (reg_lmap[b & REG_MASK] == 5) inst_size += sizeof(sljit_s8); - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { + if (b & OFFS_REG_MASK) { inst_size += 1; /* SIB byte. */ if (reg_map[OFFS_REG(b)] >= 8) rex |= REX_X; @@ -533,9 +202,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (a & SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) - *buf_ptr = reg_lmap[a] << 3; + *buf_ptr = U8(reg_lmap[a] << 3); else - *buf_ptr = freg_lmap[a] << 3; + *buf_ptr = U8(freg_lmap[a] << 3); } else { if (a & SLJIT_IMM) { @@ -548,104 +217,490 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 *buf_ptr = 0; } - if (!(b & SLJIT_MEM)) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]); - else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { - if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (!(b & SLJIT_MEM)) { + *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b])); + buf_ptr++; + } else if (b & REG_MASK) { + reg_lmap_b = reg_lmap[b & REG_MASK]; + + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) { + if (immb != 0 || reg_lmap_b == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else *buf_ptr |= 0x80; } - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) - *buf_ptr++ |= reg_lmap[b & REG_MASK]; + if (!(b & OFFS_REG_MASK)) + *buf_ptr++ |= reg_lmap_b; else { *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); } - if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (immb != 0 || reg_lmap_b == 5) { if (immb <= 127 && immb >= -128) - *buf_ptr++ = immb; /* 8 bit displacement. */ + *buf_ptr++ = U8(immb); /* 8 bit displacement. */ else { - sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } } } else { - if (reg_lmap[b & REG_MASK] == 5) - *buf_ptr |= 0x40; *buf_ptr++ |= 0x04; - *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); - if (reg_lmap[b & REG_MASK] == 5) - *buf_ptr++ = 0; + *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); } } else { *buf_ptr++ |= 0x04; *buf_ptr++ = 0x25; - sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } if (a & SLJIT_IMM) { if (flags & EX86_BYTE_ARG) - *buf_ptr = imma; + *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) - sljit_unaligned_store_s16(buf_ptr, imma); + sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma); else if (!(flags & EX86_SHIFT_INS)) - sljit_unaligned_store_s32(buf_ptr, imma); + sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); } return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); } +/* --------------------------------------------------------------------- */ +/* Enter / return */ +/* --------------------------------------------------------------------- */ + +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +{ + sljit_uw type = jump->flags >> TYPE_SHIFT; + + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + + /* The relative jump below specialized for this case. */ + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + + if (type < SLJIT_JUMP) { + /* Invert type. */ + *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); + *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + } + + *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); + *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MD; + else if (short_addr) + sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); + else + sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target); + + code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); + + *code_ptr++ = REX_B; + *code_ptr++ = GROUP_FF; + *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + + return code_ptr; +} + +static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +{ + if (max_label > HALFWORD_MAX) { + put_label->addr -= put_label->flags; + put_label->flags = PATCH_MD; + return code_ptr; + } + + if (put_label->flags == 0) { + /* Destination is register. */ + code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + + if ((code_ptr[0] & 0x07) != 0) { + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } + else { + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); + } + + put_label->addr = (sljit_uw)code_ptr; + return code_ptr; + } + + code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); + SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + + if ((code_ptr[1] & 0xf8) == MOV_r_i32) { + code_ptr += 2 + sizeof(sljit_uw); + SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + } + + SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); + + code_ptr[0] = U8(code_ptr[0] & ~0x4); + code_ptr[1] = MOV_rm_i32; + code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); + + code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); + put_label->addr = (sljit_uw)code_ptr; + put_label->flags = 0; + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_uw size; + sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; + sljit_s32 saved_regs_size, tmp, i; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; + sljit_s32 saved_float_regs_offset = 0; + sljit_s32 float_arg_count = 0; +#endif /* _WIN64 */ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + /* Emit ENDBR64 at function entry if needed. */ + FAIL_IF(emit_endbranch(compiler)); + + compiler->mode32 = 0; + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0; i > tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_size > 0) { + saved_float_regs_offset = ((local_size + 0xf) & ~0xf); + local_size = saved_float_regs_offset + saved_float_regs_size; + } +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + tmp = 0; +#ifndef _WIN64 + switch (word_arg_count) { + case 0: + tmp = SLJIT_R2; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = TMP_REG1; + break; + default: + tmp = SLJIT_R3; + break; + } +#else /* !_WIN64 */ + switch (word_arg_count + float_arg_count) { + case 0: + tmp = SLJIT_R3; + break; + case 1: + tmp = SLJIT_R1; + break; + case 2: + tmp = SLJIT_R2; + break; + default: + tmp = TMP_REG1; + break; + } +#endif /* _WIN64 */ + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (tmp != SLJIT_R0 + word_arg_count) + EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0); + saved_arg_count++; + } + word_arg_count++; + } else { +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); + float_arg_count++; + if (float_arg_count != float_arg_count + word_arg_count) + FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32, + float_arg_count, float_arg_count + word_arg_count, 0)); +#endif /* _WIN64 */ + } + arg_types >>= SLJIT_ARG_SHIFT; + } + + local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + compiler->local_size = local_size; + +#ifdef _WIN64 + if (local_size > 0) { + if (local_size <= 4 * 4096) { + if (local_size > 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + if (local_size > 3 * 4096) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12); + + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + BINARY_IMM32(SUB, 1, TMP_REG1, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + + INC_SIZE(2); + inst[0] = JNE_i8; + inst[1] = (sljit_u8)-21; + local_size &= 0xfff; + } + + if (local_size > 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + } +#endif /* _WIN64 */ + + if (local_size > 0) + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + +#ifdef _WIN64 + if (saved_float_regs_size > 0) { + compiler->mode32 = 1; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_xm_x; + saved_float_regs_offset += 16; + } + } +#endif /* _WIN64 */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 saved_regs_size; +#ifdef _WIN64 + sljit_s32 saved_float_regs_size; +#endif /* _WIN64 */ + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + +#ifdef _WIN64 + local_size += SLJIT_LOCALS_OFFSET; + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_size > 0) + local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size; +#else /* !_WIN64 */ + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0); +#endif /* _WIN64 */ + + /* Including the return address saved by the call instruction. */ + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; + return SLJIT_SUCCESS; +} + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +{ + sljit_uw size; + sljit_s32 i, tmp; + sljit_u8 *inst; +#ifdef _WIN64 + sljit_s32 saved_float_regs_offset; + sljit_s32 fscratches = compiler->fscratches; + sljit_s32 fsaveds = compiler->fsaveds; +#endif /* _WIN64 */ + +#ifdef _WIN64 + saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + + if (saved_float_regs_offset > 0) { + compiler->mode32 = 1; + saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf; + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); + *inst++ = GROUP_0F; + *inst = MOVAPS_x_xm; + saved_float_regs_offset += 16; + } + } +#endif /* _WIN64 */ + + if (compiler->local_size > 0) { + if (compiler->local_size <= 127) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | ADD | 4; + *inst = U8(compiler->local_size); + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | ADD | 4; + sljit_unaligned_store_s32(inst, compiler->local_size); + } + } + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + sljit_u8 *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; +} + /* --------------------------------------------------------------------- */ /* Call / return instructions */ /* --------------------------------------------------------------------- */ #ifndef _WIN64 -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) { sljit_s32 src = src_ptr ? (*src_ptr) : 0; sljit_s32 word_arg_count = 0; SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); - - compiler->mode32 = 0; + SLJIT_ASSERT(!(src & SLJIT_MEM)); /* Remove return value. */ - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) word_arg_count++; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (word_arg_count == 0) return SLJIT_SUCCESS; - if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); - *src_ptr = TMP_REG2; - } - else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2) - *src_ptr = TMP_REG1; - - if (word_arg_count >= 3) + if (word_arg_count >= 3) { + if (src == SLJIT_R2) + *src_ptr = TMP_REG1; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); + } + return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); } #else -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr) { sljit_s32 src = src_ptr ? (*src_ptr) : 0; sljit_s32 arg_count = 0; @@ -656,16 +711,16 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + SLJIT_ASSERT(!(src & SLJIT_MEM)); - compiler->mode32 = 0; - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; while (arg_types) { - types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - switch (arg_types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: + switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: + case SLJIT_ARG_TYPE_F32: arg_count++; float_arg_count++; @@ -687,32 +742,26 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - arg_types >>= SLJIT_DEF_SHIFT; + arg_types >>= SLJIT_ARG_SHIFT; } if (!data_trandfer) return SLJIT_SUCCESS; - if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); - *src_ptr = TMP_REG2; - } - while (types) { - switch (types & SLJIT_DEF_MASK) { - case SLJIT_ARG_TYPE_F32: - if (arg_count != float_arg_count) - FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); - arg_count--; - float_arg_count--; - break; + switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: if (arg_count != float_arg_count) FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); arg_count--; float_arg_count--; break; + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; default: if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0); @@ -721,7 +770,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t break; } - types >>= SLJIT_DEF_SHIFT; + types >>= SLJIT_ARG_SHIFT; } return SLJIT_SUCCESS; @@ -735,13 +784,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); - PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0)); + compiler->mode32 = 0; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, type); } @@ -752,7 +807,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); - FAIL_IF(call_with_args(compiler, arg_types, &src, srcw)); + compiler->mode32 = 0; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -770,10 +843,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { if (reg_map[dst] < 8) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); @@ -850,9 +919,6 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { @@ -903,16 +969,16 @@ static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) sljit_s32 tmp, size; /* Don't adjust shadow stack if it isn't enabled. */ - if (!cpu_has_shadow_stack ()) + if (!cpu_has_shadow_stack()) return SLJIT_SUCCESS; size = compiler->local_size; tmp = compiler->scratches; if (tmp >= SLJIT_FIRST_SAVED_REG) - size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * sizeof(sljit_uw); + size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw); tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; if (SLJIT_S0 >= tmp) - size += (SLJIT_S0 - tmp + 1) * sizeof(sljit_uw); + size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw); - return adjust_shadow_stack(compiler, SLJIT_UNUSED, 0, SLJIT_SP, size); + return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); } diff --git a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c index 515d98aefd1..c7dd9be8fd7 100644 --- a/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c +++ b/thirdparty/pcre2/src/sljit/sljitNativeX86_common.c @@ -65,6 +65,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) 15 - R15 */ +#define TMP_FREG (0) + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ @@ -77,9 +79,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ if (p <= compiler->scratches) \ - w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ else \ - w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ + w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \ p = SLJIT_MEM1(SLJIT_SP); \ do; \ } @@ -115,11 +117,11 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { /* Args: xmm0-xmm3 */ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; /* low-map. freg_map & 0x7. */ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { - 4, 0, 1, 2, 3, 5, 6 + 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }; #define REX_W 0x48 @@ -143,7 +145,8 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #endif /* SLJIT_CONFIG_X86_32 */ -#define TMP_FREG (0) +#define U8(v) ((sljit_u8)(v)) + /* Size flags for emit_x86_instruction: */ #define EX86_BIN_INS 0x0010 @@ -205,12 +208,15 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define JMP_i32 0xe9 #define JMP_rm (/* GROUP_FF */ 4 << 3) #define LEA_r_m 0x8d +#define LOOP_i8 0xe2 #define MOV_r_rm 0x8b #define MOV_r_i32 0xb8 #define MOV_rm_r 0x89 #define MOV_rm_i32 0xc7 #define MOV_rm8_i8 0xc6 #define MOV_rm8_r8 0x88 +#define MOVAPS_x_xm 0x28 +#define MOVAPS_xm_x 0x29 #define MOVSD_x_xm 0x10 #define MOVSD_xm_x 0x11 #define MOVSXD_r_rm 0x63 @@ -274,14 +280,12 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define MOD_REG 0xc0 #define MOD_DISP8 0x40 -#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) +#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s)) -#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) -#define POP_REG(r) (*inst++ = (POP_r + (r))) -#define RET() (*inst++ = (RET_near)) -#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) -/* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) +#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r))) +#define POP_REG(r) (*inst++ = U8(POP_r + (r))) +#define RET() (*inst++ = RET_near) +#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0) /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads @@ -371,7 +375,7 @@ static void get_cpu_features(void) cpu_has_cmov = (features >> 15) & 0x1; } -static sljit_u8 get_jump_code(sljit_s32 type) +static sljit_u8 get_jump_code(sljit_uw type) { switch (type) { case SLJIT_EQUAL: @@ -383,10 +387,12 @@ static sljit_u8 get_jump_code(sljit_s32 type) return 0x85 /* jne */; case SLJIT_LESS: + case SLJIT_CARRY: case SLJIT_LESS_F64: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: case SLJIT_GREATER_EQUAL_F64: return 0x83 /* jae */; @@ -434,14 +440,14 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - sljit_s32 type = jump->flags >> TYPE_SHIFT; + sljit_uw type = jump->flags >> TYPE_SHIFT; sljit_s32 short_jump; sljit_uw label_addr; if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else - label_addr = jump->u.target - executable_offset; + label_addr = jump->u.target - (sljit_uw)executable_offset; short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; @@ -463,7 +469,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code jump->addr++; } else if (short_jump) { - *code_ptr++ = get_jump_code(type) - 0x10; + *code_ptr++ = U8(get_jump_code(type) - 0x10); jump->addr++; } else { @@ -492,7 +498,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u8 *buf_end; sljit_u8 len; sljit_sw executable_offset; - sljit_sw jump_addr; + sljit_uw jump_addr; struct sljit_label *label; struct sljit_jump *jump; @@ -530,7 +536,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil switch (*buf_ptr) { case 0: label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = code_ptr - code; + label->size = (sljit_uw)(code_ptr - code); label = label->next; break; case 1: @@ -575,11 +581,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - jump_addr = jump->addr + executable_offset; + jump_addr = jump->addr + (sljit_uw)executable_offset; if (jump->flags & PATCH_MB) { SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); - *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); + *(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -600,7 +606,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr); #endif jump = jump->next; @@ -626,7 +632,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; compiler->executable_offset = executable_offset; - compiler->executable_size = code_ptr - code; + compiler->executable_size = (sljit_uw)(code_ptr - code); code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); @@ -682,17 +688,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) -static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) -static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + do { \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + BINARY_IMM32(op_imm, immw, arg, argw); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } \ + } while (0) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else /* !SLJIT_CONFIG_X86_64 */ + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + BINARY_IMM32(op_imm, immw, arg, argw) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif /* SLJIT_CONFIG_X86_64 */ static sljit_s32 emit_mov(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, @@ -795,7 +824,7 @@ static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void) } static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler, - sljit_s32 src, sljit_sw srcw, sljit_s32 base, sljit_sw disp) + sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__) sljit_u8 *inst, *jz_after_cmp_inst; @@ -821,12 +850,6 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0); #endif /* SLJIT_CONFIG_X86_32 */ - if (src == SLJIT_UNUSED) { - /* Return address is on stack. */ - src = SLJIT_MEM1(base); - srcw = disp; - } - /* Compare return address against TMP_REG1. */ FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw)); @@ -861,8 +884,6 @@ static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compile SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(base); - SLJIT_UNUSED_ARG(disp); #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */ return SLJIT_SUCCESS; } @@ -879,8 +900,6 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, { sljit_u8* inst; - SLJIT_ASSERT(dst != SLJIT_UNUSED); - if (FAST_IS_REG(src)) { inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); FAIL_IF(!inst); @@ -890,14 +909,14 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else if (!compiler->mode32) { if (NOT_HALFWORD(srcw)) return emit_load_imm64(compiler, dst, srcw); } else - return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw); #endif } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -938,7 +957,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile { sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - sljit_s32 size; + sljit_uw size; #endif CHECK_ERROR(); @@ -975,7 +994,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile && reg_map[SLJIT_R1] < 7 && reg_map[TMP_REG1] == 2); #endif - compiler->mode32 = op & SLJIT_I32_OP; + compiler->mode32 = op & SLJIT_32; #endif SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); @@ -1084,7 +1103,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \ FAIL_IF(!inst); \ INC_SIZE(1); \ - *inst = (prefix); \ + *inst = U8(prefix); \ } while (0) static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, @@ -1104,7 +1123,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); FAIL_IF(!inst); @@ -1134,7 +1153,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else if (FAST_IS_REG(src) && reg_map[src] >= 4) { /* src, dst are registers. */ - SLJIT_ASSERT(SLOW_IS_REG(dst)); + SLJIT_ASSERT(FAST_IS_REG(dst)); if (reg_map[dst] < 4) { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); @@ -1193,7 +1212,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, } if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); @@ -1206,7 +1225,7 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, *inst = MOV_rm8_r8; if (work_r == SLJIT_R0) { - ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); @@ -1267,7 +1286,7 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw); #else inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); FAIL_IF(!inst); @@ -1316,9 +1335,6 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, return SLJIT_SUCCESS; } - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); @@ -1343,9 +1359,6 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, { sljit_u8* inst; - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); @@ -1412,7 +1425,7 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); #else if (cpu_has_cmov) { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)); inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); FAIL_IF(!inst); @@ -1420,9 +1433,9 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, *inst = CMOVE_r_rm; } else - FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31))); + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31))); - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0); #endif FAIL_IF(!inst); @@ -1450,7 +1463,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); CHECK_EXTRA_REGS(src, srcw, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op_flags & SLJIT_I32_OP; + compiler->mode32 = op_flags & SLJIT_32; #endif op = GET_OPCODE(op); @@ -1465,8 +1478,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } - if (op_flags & SLJIT_I32_OP) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op_flags & SLJIT_32) { if (src & SLJIT_MEM) { if (op == SLJIT_MOV_S32) op = SLJIT_MOV_U32; @@ -1475,8 +1488,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (op == SLJIT_MOV_U32) op = SLJIT_MOV_S32; } -#endif } +#endif if (src & SLJIT_IMM) { switch (op) { @@ -1520,8 +1533,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) case SLJIT_MOV_U32: case SLJIT_MOV_S32: + case SLJIT_MOV32: #endif - FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + EMIT_MOV(compiler, dst, dstw, src, srcw); break; case SLJIT_MOV_U8: FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); @@ -1542,6 +1556,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV_S32: FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); break; + case SLJIT_MOV32: + compiler->mode32 = 1; + EMIT_MOV(compiler, dst, dstw, src, srcw); + compiler->mode32 = 0; + break; #endif } @@ -1558,9 +1577,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_not_with_flags(compiler, dst, dstw, src, srcw); return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); - case SLJIT_NEG: - return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); - case SLJIT_CLZ: return emit_clz(compiler, op_flags, dst, dstw, src, srcw); } @@ -1568,36 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - if (IS_HALFWORD(immw) || compiler->mode32) { \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); \ - } \ - else { \ - FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ - inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ - FAIL_IF(!inst); \ - *inst = (op_mr); \ - } - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) - -#else - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) - -#endif - static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, @@ -1605,23 +1591,10 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_u8 op_eax_imm = (op_types >> 24); - sljit_u8 op_rm = (op_types >> 16) & 0xff; - sljit_u8 op_mr = (op_types >> 8) & 0xff; - sljit_u8 op_imm = op_types & 0xff; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { @@ -1725,23 +1698,10 @@ static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_u8 op_eax_imm = (op_types >> 24); - sljit_u8 op_rm = (op_types >> 16) & 0xff; - sljit_u8 op_mr = (op_types >> 8) & 0xff; - sljit_u8 op_imm = op_types & 0xff; - - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - if (src2 & SLJIT_IMM) { - BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); - } - else { - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); - FAIL_IF(!inst); - *inst = op_rm; - } - return SLJIT_SUCCESS; - } + sljit_u8 op_eax_imm = U8(op_types >> 24); + sljit_u8 op_rm = U8((op_types >> 16) & 0xff); + sljit_u8 op_mr = U8((op_types >> 8) & 0xff); + sljit_u8 op_imm = U8(op_types & 0xff); if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { @@ -1810,9 +1770,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; - sljit_s32 dst_r; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; /* Register destination. */ if (dst_r == src1 && !(src2 & SLJIT_IMM)) { @@ -1841,7 +1799,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_s8)src1w; + *inst = U8(src1w); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { @@ -1884,7 +1842,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); INC_SIZE(1); - *inst = (sljit_s8)src2w; + *inst = U8(src2w); } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { @@ -2167,13 +2125,6 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; return SLJIT_SUCCESS; } - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); - FAIL_IF(!inst); - *inst |= mode; - return SLJIT_SUCCESS; - } if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); @@ -2206,7 +2157,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); } - else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) { if (src1 != dst) EMIT_MOV(compiler, dst, 0, src1, src1w); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); @@ -2235,7 +2186,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #endif - if (dst != SLJIT_UNUSED) + if (dst != TMP_REG1) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } @@ -2273,7 +2224,7 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); if (FAST_IS_REG(dst)) - return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0); + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); return SLJIT_SUCCESS; } @@ -2283,7 +2234,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -2292,11 +2243,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile CHECK_EXTRA_REGS(src1, src1w, (void)0); CHECK_EXTRA_REGS(src2, src2w, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = op & SLJIT_I32_OP; + compiler->mode32 = op & SLJIT_32; #endif - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; + SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op)); switch (GET_OPCODE(op)) { case SLJIT_ADD: @@ -2310,17 +2260,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_cum_binary(compiler, BINARY_OPCODE(ADC), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + if (src1 == SLJIT_IMM && src1w == 0) + return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); + if (!HAS_FLAGS(op)) { if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; - if (SLOW_IS_REG(dst) && src2 == dst) { + if (FAST_IS_REG(dst) && src2 == dst) { FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w)); return emit_unary(compiler, NEG_rm, dst, 0, dst, 0); } } - if (dst == SLJIT_UNUSED) - return emit_cmp_binary(compiler, src1, src1w, src2, src2w); return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: @@ -2329,8 +2280,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_MUL: return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: - if (dst == SLJIT_UNUSED) - return emit_test_binary(compiler, src1, src1w, src2, src2w); return emit_cum_binary(compiler, BINARY_OPCODE(AND), dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: @@ -2353,6 +2302,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 opcode = GET_OPCODE(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); + } + + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_32; +#endif + + if (opcode == SLJIT_SUB) { + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + } + return emit_test_binary(compiler, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -2369,7 +2350,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp /* Don't adjust shadow stack if it isn't enabled. */ if (!cpu_has_shadow_stack ()) return SLJIT_SUCCESS; - return adjust_shadow_stack(compiler, src, srcw, SLJIT_UNUSED, 0); + return adjust_shadow_stack(compiler, src, srcw); case SLJIT_PREFETCH_L1: case SLJIT_PREFETCH_L2: case SLJIT_PREFETCH_L3: @@ -2401,7 +2382,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_s32 size) + void *instruction, sljit_u32 size) { sljit_u8 *inst; @@ -2420,13 +2401,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* --------------------------------------------------------------------- */ /* Alignment(3) + 4 * 16 bytes. */ -static sljit_s32 sse2_data[3 + (4 * 4)]; -static sljit_s32 *sse2_buffer; +static sljit_u32 sse2_data[3 + (4 * 4)]; +static sljit_u32 *sse2_buffer; static void init_compiler(void) { /* Align to 16 bytes. */ - sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); + sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf); /* Single precision constants (each constant is 16 byte long). */ sse2_buffer[0] = 0x80000000; @@ -2486,7 +2467,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp compiler->mode32 = 0; #endif - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTTSD2SI_r_xm; @@ -2518,7 +2499,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp srcw = 0; } - inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = CVTSI2SD_x_rm; @@ -2527,7 +2508,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp compiler->mode32 = 1; #endif if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2536,11 +2517,11 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src2, sljit_sw src2w) { if (!FAST_IS_REG(src1)) { - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); src1 = TMP_FREG; } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2558,11 +2539,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (GET_OPCODE(op) == SLJIT_MOV_F64) { if (FAST_IS_REG(dst)) - return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw); + return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw); if (FAST_IS_REG(src)) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src); - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw)); - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); } if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) { @@ -2571,41 +2552,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* We overwrite the high bits of source. From SLJIT point of view, this is not an issue. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ - FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0)); } else { - FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw)); src = TMP_FREG; } - FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0)); + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0)); if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } if (FAST_IS_REG(dst)) { dst_r = dst; if (dst != src) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw)); } switch (GET_OPCODE(op)) { case SLJIT_NEG_F64: - FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8))); + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8))); break; case SLJIT_ABS_F64: - FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12))); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2636,37 +2617,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil src2w = src1w; } else if (dst != src2) - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w)); else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } } else { dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); } switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_SUB_F64: - FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_MUL_F64: - FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; case SLJIT_DIV_F64: - FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w)); break; } if (dst_r == TMP_FREG) - return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG); + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); return SLJIT_SUCCESS; } @@ -2708,7 +2689,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF_NULL(jump); - set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT))); type &= 0xff; /* Worst case size. */ @@ -2740,8 +2721,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (src == SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF_NULL(jump); - set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT)); - jump->u.target = srcw; + set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT))); + jump->u.target = (sljit_uw)srcw; /* Worst case size. */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -2764,7 +2745,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_FF; - *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; + *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm)); } return SLJIT_SUCCESS; } @@ -2790,7 +2771,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type &= 0xff; /* setcc = jcc + 0x10. */ - cond_set = get_jump_code(type) + 0x10; + cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { @@ -2802,9 +2783,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | reg_lmap[TMP_REG1]; - *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); + *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B)); *inst++ = OR_rm8_r8; - *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; + *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]); return SLJIT_SUCCESS; } @@ -2822,7 +2803,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* The movzx instruction does not affect flags. */ *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; + *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]); if (reg != TMP_REG1) return SLJIT_SUCCESS; @@ -2849,11 +2830,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co /* Set low byte to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; - *inst++ = MOD_REG | reg_map[dst]; + *inst++ = U8(MOD_REG | reg_map[dst]); *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; + *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]); return SLJIT_SUCCESS; } @@ -2872,15 +2853,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; /* cmovcc = setcc - 0x50. */ - *inst++ = cond_set - 0x50; - *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; + *inst++ = U8(cond_set - 0x50); + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]); return SLJIT_SUCCESS; } inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); /* Set al to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; @@ -2888,8 +2869,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; - *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */); + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); return SLJIT_SUCCESS; } @@ -2901,13 +2882,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); /* Set low register to conditional flag. */ - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | 0 /* eax */; *inst++ = OR_rm8_r8; *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); } else { inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); @@ -2915,14 +2896,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co INC_SIZE(2 + 3 + 2 + 2); /* Set low register to conditional flag. */ *inst++ = XCHG_r_rm; - *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); *inst++ = GROUP_0F; *inst++ = cond_set; *inst++ = MOD_REG | 1 /* ecx */; *inst++ = OR_rm8_r8; *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; *inst++ = XCHG_r_rm; - *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]); } return SLJIT_SUCCESS; } @@ -2931,7 +2912,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 3 + 1); - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); /* Set al to conditional flag. */ *inst++ = GROUP_0F; *inst++ = cond_set; @@ -2941,7 +2922,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = MOVZX_r_rm8; *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; - *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]); if (GET_OPCODE(op) < SLJIT_ADD) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); @@ -2964,7 +2945,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - dst_reg &= ~SLJIT_I32_OP; + dst_reg &= ~SLJIT_32; if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); @@ -2977,8 +2958,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil CHECK_EXTRA_REGS(src, srcw, (void)0); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = dst_reg & SLJIT_I32_OP; - dst_reg &= ~SLJIT_I32_OP; + compiler->mode32 = dst_reg & SLJIT_32; + dst_reg &= ~SLJIT_32; #endif if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { @@ -2990,7 +2971,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; - *inst = get_jump_code(type & 0xff) - 0x40; + *inst = U8(get_jump_code(type & 0xff) - 0x40); return SLJIT_SUCCESS; } @@ -3123,9 +3104,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); + sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset)); #else - sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); + sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target); #endif SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1); } diff --git a/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c b/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c index 147175afa68..915411fbed6 100644 --- a/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c +++ b/thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c @@ -66,7 +66,7 @@ /* --------------------------------------------------------------------- */ /* 64 KByte. */ -#define CHUNK_SIZE 0x10000 +#define CHUNK_SIZE (sljit_uw)0x10000 struct chunk_header { void *executable; @@ -194,7 +194,7 @@ static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size) if (fd == -1) return NULL; - if (ftruncate(fd, size)) { + if (ftruncate(fd, (off_t)size)) { close(fd); return NULL; } @@ -281,7 +281,7 @@ struct free_block { #define AS_FREE_BLOCK(base, offset) \ ((struct free_block*)(((sljit_u8*)base) + offset)) #define MEM_START(base) ((void*)((base) + 1)) -#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7) static struct free_block* free_blocks; static sljit_uw allocated_size; diff --git a/thirdparty/pcre2/src/sljit/sljitUtils.c b/thirdparty/pcre2/src/sljit/sljitUtils.c index 9bce714735d..967593b1578 100644 --- a/thirdparty/pcre2/src/sljit/sljitUtils.c +++ b/thirdparty/pcre2/src/sljit/sljitUtils.c @@ -131,12 +131,12 @@ static SLJIT_INLINE int open_dev_zero(void) #ifdef _WIN32 -static SLJIT_INLINE sljit_sw get_page_alignment(void) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { SYSTEM_INFO si; - static sljit_sw sljit_page_align; + static sljit_uw sljit_page_align = 0; if (!sljit_page_align) { GetSystemInfo(&si); - sljit_page_align = si.dwPageSize - 1; + sljit_page_align = (sljit_uw)si.dwPageSize - 1; } return sljit_page_align; } @@ -145,18 +145,21 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) { #include -static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align = -1; - if (sljit_page_align < 0) { +static SLJIT_INLINE sljit_uw get_page_alignment(void) { + static sljit_uw sljit_page_align = 0; + + sljit_sw align; + + if (!sljit_page_align) { #ifdef _SC_PAGESIZE - sljit_page_align = sysconf(_SC_PAGESIZE); + align = sysconf(_SC_PAGESIZE); #else - sljit_page_align = getpagesize(); + align = getpagesize(); #endif /* Should never happen. */ - if (sljit_page_align < 0) - sljit_page_align = 4096; - sljit_page_align--; + if (align < 0) + align = 4096; + sljit_page_align = (sljit_uw)align - 1; } return sljit_page_align; } @@ -227,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *st SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) { SLJIT_UNUSED_ARG(allocator_data); - munmap((void*)stack->min_start, stack->end - stack->min_start); + munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start)); SLJIT_FREE(stack, allocator_data); } @@ -237,7 +240,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj { struct sljit_stack *stack; void *ptr; - sljit_sw page_align; + sljit_uw page_align; SLJIT_UNUSED_ARG(allocator_data); @@ -295,7 +298,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st #if defined _WIN32 || defined(POSIX_MADV_DONTNEED) sljit_uw aligned_old_start; sljit_uw aligned_new_start; - sljit_sw page_align; + sljit_uw page_align; #endif if ((new_start < stack->min_start) || (new_start >= stack->end))