Upgrade PCRE2 to 10.32

[ChangeLog][Third-Party Code] PCRE2 was updated to version 10.32.

Change-Id: Id3bf7df0003f626cd1135d0508a5a489ff02f1e5
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
This commit is contained in:
Giuseppe D'Angelo 2018-10-01 12:24:08 +02:00
parent 6599c1f758
commit 44eeeb8e81
35 changed files with 4902 additions and 4077 deletions

View File

@ -4,11 +4,11 @@ PCRE2 LICENCE
PCRE2 is a library of functions to support regular expressions whose syntax PCRE2 is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
specified below, with one exemption for certain binary redistributions. The licence, as specified below, with one exemption for certain binary
documentation for PCRE2, supplied in the "doc" directory, is distributed under redistributions. The documentation for PCRE2, supplied in the "doc" directory,
the same terms as the software itself. The data in the testdata directory is is distributed under the same terms as the software itself. The data in the
not copyrighted and is in the public domain. testdata directory is not copyrighted and is in the public domain.
The basic library functions are written in C and are freestanding. Also The basic library functions are written in C and are freestanding. Also
included in the distribution is a just-in-time compiler that can be used to included in the distribution is a just-in-time compiler that can be used to
@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT
Written by: Zoltan Herczeg Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Email domain: freemail.hu
Copyright(c) 2010-2018 Zoltan Herczeg Copyright(c) 2010-2018 Zoltan Herczeg
All rights reserved. All rights reserved.
@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER
Written by: Zoltan Herczeg Written by: Zoltan Herczeg
Email local part: hzmester Email local part: hzmester
Emain domain: freemail.hu Email domain: freemail.hu
Copyright(c) 2009-2018 Zoltan Herczeg Copyright(c) 2009-2018 Zoltan Herczeg
All rights reserved. All rights reserved.

View File

@ -7,7 +7,7 @@
"Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.",
"Homepage": "http://www.pcre.org/", "Homepage": "http://www.pcre.org/",
"Version": "10.31", "Version": "10.32",
"DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2", "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2",
"License": "BSD 3-clause \"New\" or \"Revised\" License", "License": "BSD 3-clause \"New\" or \"Revised\" License",
"LicenseId": "BSD-3-Clause", "LicenseId": "BSD-3-Clause",
@ -26,7 +26,7 @@ Copyright (c) 2013-2013 Tilera Corporation (jiwang@tilera.com)"
"Path": "src/sljit", "Path": "src/sljit",
"Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.", "Description": "The PCRE library is a set of functions that implement regular expression pattern matching using the same syntax and semantics as Perl 5.",
"Homepage": "http://www.pcre.org/", "Homepage": "http://www.pcre.org/",
"Version": "10.31", "Version": "10.32",
"DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2", "DownloadLocation": "https://ftp.pcre.org/pub/pcre/pcre2-10.31.tar.bz2",
"License": "BSD 2-clause \"Simplified\" License", "License": "BSD 2-clause \"Simplified\" License",
"LicenseId": "BSD-2-Clause", "LicenseId": "BSD-2-Clause",

View File

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be /* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions. #included by applications that call PCRE2 functions.
Copyright (c) 2016-2017 University of Cambridge Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -42,9 +42,15 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */ /* The current PCRE version information. */
#define PCRE2_MAJOR 10 #define PCRE2_MAJOR 10
#define PCRE2_MINOR 31 #define PCRE2_MINOR 32
#define PCRE2_PRERELEASE #define PCRE2_PRERELEASE
#define PCRE2_DATE 2018-02-12 #define PCRE2_DATE 2018-09-10
/* For the benefit of systems without stdint.h, an alternative is to use
inttypes.h. The existence of these headers is checked by configure or CMake. */
#define PCRE2_HAVE_STDINT_H 1
#define PCRE2_HAVE_INTTYPES_H 1
/* When an application links to a PCRE DLL in Windows, the symbols that are /* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate imported have to be identified as such. When building PCRE2, the appropriate
@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
#define PCRE2_CALL_CONVENTION #define PCRE2_CALL_CONVENTION
#endif #endif
/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and /* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
uint8_t, UCHAR_MAX, etc are defined. */ that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
header, the relevant values must be provided by some other means. */
#include <limits.h> #include <limits.h>
#include <stdlib.h> #include <stdlib.h>
#if PCRE2_HAVE_STDINT_H
#include <stdint.h> #include <stdint.h>
#elif PCRE2_HAVE_INTTYPES_H
#include <inttypes.h>
#endif
/* Allow for C++ users compiling this directly. */ /* Allow for C++ users compiling this directly. */
@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156 #define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157 #define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158 #define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
/* Error 159 is obsolete and should now never occur */
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159 #define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
#define PCRE2_ERROR_VERB_UNKNOWN 160 #define PCRE2_ERROR_VERB_UNKNOWN 160
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161 #define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 #define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 #define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 #define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
/* "Expected" matching error codes: no match and partial match. */ /* "Expected" matching error codes: no match and partial match. */
@ -387,6 +402,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) #define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63) #define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64) #define PCRE2_ERROR_CONVERT_SYNTAX (-64)
#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -505,7 +505,7 @@ Arguments:
utf TRUE in UTF mode utf TRUE in UTF mode
cb compile data block cb compile data block
base_list the data list of the base opcode base_list the data list of the base opcode
base_end the end of the data list base_end the end of the base opcode
rec_limit points to recursion depth counter rec_limit points to recursion depth counter
Returns: TRUE if the auto-possessification is possible Returns: TRUE if the auto-possessification is possible
@ -730,7 +730,7 @@ for(;;)
if ((*xclass_flags & XCL_MAP) == 0) if ((*xclass_flags & XCL_MAP) == 0)
{ {
/* No bits are set for characters < 256. */ /* No bits are set for characters < 256. */
if (list[1] == 0) return TRUE; if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
/* Might be an empty repeat. */ /* Might be an empty repeat. */
continue; continue;
} }
@ -1235,6 +1235,7 @@ for (;;)
#endif #endif
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:

View File

@ -2,23 +2,24 @@
* Perl-Compatible Regular Expressions * * Perl-Compatible Regular Expressions *
*************************************************/ *************************************************/
/* This file contains character tables that are used when no external tables /* This file was automatically written by the dftables auxiliary
are passed to PCRE2 by the application that calls it. The tables are used only program. It contains character tables that are used when no external
for characters whose code values are less than 256. tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256. */
This is a default version of the tables that assumes ASCII encoding. A program /*The dftables program (which is distributed with PCRE2) can be used to
called dftables (which is distributed with PCRE2) can be used to build build alternative versions of this file. This is necessary if you are
alternative versions of this file. This is necessary if you are running in an running in an EBCDIC environment, or if you want to default to a different
EBCDIC environment, or if you want to default to a different encoding, for encoding, for example ISO-8859-1. When dftables is run, it creates these
example ISO-8859-1. When dftables is run, it creates these tables in the tables in the current locale. This happens automatically if PCRE2 is
current locale. If PCRE2 is configured with --enable-rebuild-chartables, this configured with --enable-rebuild-chartables. */
happens automatically.
The following #includes are present because without them gcc 4.x may remove the /* The following #include is present because without it gcc 4.x may remove
array definition from the final binary if PCRE2 is built into a static library the array definition from the final binary if PCRE2 is built into a static
and dead code stripping is activated. This leads to link errors. Pulling in the library and dead code stripping is activated. This leads to link errors.
header ensures that the array gets flagged as "someone outside this compilation Pulling in the header ensures that the array gets flagged as "someone
unit might reference this" and so it will always be supplied to the linker. */ outside this compilation unit might reference this" and so it will always
be supplied to the linker. */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
#include "config.h" #include "config.h"
@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32 /* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word, classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */ graph print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
0x04 decimal digit 0x04 decimal digit
0x08 hexadecimal digit 0x08 hexadecimal digit
0x10 alphanumeric or '_' 0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/ */
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */ 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */ 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */ 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */ 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */ 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */ 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */ 0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Other debugging code can be enabled by these defines. */ /* Other debugging code can be enabled by these defines. */
// #define DEBUG_SHOW_CAPTURES /* #define DEBUG_SHOW_CAPTURES */
// #define DEBUG_SHOW_PARSED /* #define DEBUG_SHOW_PARSED */
/* There are a few things that vary with different code unit sizes. Handle them /* There are a few things that vary with different code unit sizes. Handle them
by defining macros in order to minimize #if usage. */ by defining macros in order to minimize #if usage. */
@ -250,34 +250,35 @@ is present where expected in a conditional group. */
#define META_LOOKBEHINDNOT 0x80250000u /* (?<! */ #define META_LOOKBEHINDNOT 0x80250000u /* (?<! */
/* These must be kept in this order, with consecutive values, and the _ARG /* These must be kept in this order, with consecutive values, and the _ARG
versions of PRUNE, SKIP, and THEN immediately after their non-argument versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
versions. */ versions. */
#define META_MARK 0x80260000u /* (*MARK) */ #define META_MARK 0x80260000u /* (*MARK) */
#define META_ACCEPT 0x80270000u /* (*ACCEPT) */ #define META_ACCEPT 0x80270000u /* (*ACCEPT) */
#define META_COMMIT 0x80280000u /* (*COMMIT) */ #define META_FAIL 0x80280000u /* (*FAIL) */
#define META_FAIL 0x80290000u /* (*FAIL) */ #define META_COMMIT 0x80290000u /* These */
#define META_PRUNE 0x802a0000u /* These pairs must */ #define META_COMMIT_ARG 0x802a0000u /* pairs */
#define META_PRUNE_ARG 0x802b0000u /* be */ #define META_PRUNE 0x802b0000u /* must */
#define META_SKIP 0x802c0000u /* kept */ #define META_PRUNE_ARG 0x802c0000u /* be */
#define META_SKIP_ARG 0x802d0000u /* in */ #define META_SKIP 0x802d0000u /* kept */
#define META_THEN 0x802e0000u /* this */ #define META_SKIP_ARG 0x802e0000u /* in */
#define META_THEN_ARG 0x802f0000u /* order */ #define META_THEN 0x802f0000u /* this */
#define META_THEN_ARG 0x80300000u /* order */
/* These must be kept in groups of adjacent 3 values, and all together. */ /* These must be kept in groups of adjacent 3 values, and all together. */
#define META_ASTERISK 0x80300000u /* * */ #define META_ASTERISK 0x80310000u /* * */
#define META_ASTERISK_PLUS 0x80310000u /* *+ */ #define META_ASTERISK_PLUS 0x80320000u /* *+ */
#define META_ASTERISK_QUERY 0x80320000u /* *? */ #define META_ASTERISK_QUERY 0x80330000u /* *? */
#define META_PLUS 0x80330000u /* + */ #define META_PLUS 0x80340000u /* + */
#define META_PLUS_PLUS 0x80340000u /* ++ */ #define META_PLUS_PLUS 0x80350000u /* ++ */
#define META_PLUS_QUERY 0x80350000u /* +? */ #define META_PLUS_QUERY 0x80360000u /* +? */
#define META_QUERY 0x80360000u /* ? */ #define META_QUERY 0x80370000u /* ? */
#define META_QUERY_PLUS 0x80370000u /* ?+ */ #define META_QUERY_PLUS 0x80380000u /* ?+ */
#define META_QUERY_QUERY 0x80380000u /* ?? */ #define META_QUERY_QUERY 0x80390000u /* ?? */
#define META_MINMAX 0x80390000u /* {n,m} repeat */ #define META_MINMAX 0x803a0000u /* {n,m} repeat */
#define META_MINMAX_PLUS 0x803a0000u /* {n,m}+ repeat */ #define META_MINMAX_PLUS 0x803b0000u /* {n,m}+ repeat */
#define META_MINMAX_QUERY 0x803b0000u /* {n,m}? repeat */ #define META_MINMAX_QUERY 0x803c0000u /* {n,m}? repeat */
#define META_FIRST_QUANTIFIER META_ASTERISK #define META_FIRST_QUANTIFIER META_ASTERISK
#define META_LAST_QUANTIFIER META_MINMAX_QUERY #define META_LAST_QUANTIFIER META_MINMAX_QUERY
@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
SIZEOFFSET, /* META_LOOKBEHINDNOT */ SIZEOFFSET, /* META_LOOKBEHINDNOT */
1, /* META_MARK - plus the string length */ 1, /* META_MARK - plus the string length */
0, /* META_ACCEPT */ 0, /* META_ACCEPT */
0, /* META_COMMIT */
0, /* META_FAIL */ 0, /* META_FAIL */
0, /* META_COMMIT */
1, /* META_COMMIT_ARG - plus the string length */
0, /* META_PRUNE */ 0, /* META_PRUNE */
1, /* META_PRUNE_ARG - plus the string length */ 1, /* META_PRUNE_ARG - plus the string length */
0, /* META_SKIP */ 0, /* META_SKIP */
@ -510,17 +512,17 @@ static const short int escapes[] = {
-ESC_Z, CHAR_LEFT_SQUARE_BRACKET, -ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
CHAR_GRAVE_ACCENT, ESC_a, CHAR_GRAVE_ACCENT, CHAR_BEL,
-ESC_b, 0, -ESC_b, 0,
-ESC_d, ESC_e, -ESC_d, CHAR_ESC,
ESC_f, 0, CHAR_FF, 0,
-ESC_h, 0, -ESC_h, 0,
0, -ESC_k, 0, -ESC_k,
0, 0, 0, 0,
ESC_n, 0, CHAR_LF, 0,
-ESC_p, 0, -ESC_p, 0,
ESC_r, -ESC_s, CHAR_CR, -ESC_s,
ESC_tee, 0, CHAR_HT, 0,
-ESC_v, -ESC_w, -ESC_v, -ESC_w,
0, 0, 0, 0,
-ESC_z -ESC_z
@ -544,20 +546,20 @@ because it is defined as 'a', which of course picks up the ASCII value. */
#endif #endif
static const short int escapes[] = { static const short int escapes[] = {
/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* 80 */ CHAR_BEL, -ESC_b, 0, -ESC_d, CHAR_ESC, CHAR_FF, 0,
/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, /* 88 */ -ESC_h, 0, 0, '{', 0, 0, 0, 0,
/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p, /* 90 */ 0, 0, -ESC_k, 0, 0, CHAR_LF, 0, -ESC_p,
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, /* 98 */ 0, CHAR_CR, 0, '}', 0, 0, 0, 0,
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, /* A0 */ 0, '~', -ESC_s, CHAR_HT, 0, -ESC_v, -ESC_w, 0,
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, /* A8 */ 0, -ESC_z, 0, 0, 0, '[', 0, 0,
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', /* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, /* C0 */ '{', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G,
/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, /* C8 */ -ESC_H, 0, 0, 0, 0, 0, 0, 0,
/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, /* D0 */ '}', 0, -ESC_K, 0, 0, -ESC_N, 0, -ESC_P,
/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, /* D8 */ -ESC_Q, -ESC_R, 0, 0, 0, 0, 0, 0,
/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, /* E0 */ '\\', 0, -ESC_S, 0, 0, -ESC_V, -ESC_W, -ESC_X,
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, /* E8 */ 0, -ESC_Z, 0, 0, 0, 0, 0, 0,
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,
/* F8 */ 0, 0 /* F8 */ 0, 0
}; };
@ -586,9 +588,9 @@ static const char verbnames[] =
"\0" /* Empty name is a shorthand for MARK */ "\0" /* Empty name is a shorthand for MARK */
STRING_MARK0 STRING_MARK0
STRING_ACCEPT0 STRING_ACCEPT0
STRING_COMMIT0
STRING_F0 STRING_F0
STRING_FAIL0 STRING_FAIL0
STRING_COMMIT0
STRING_PRUNE0 STRING_PRUNE0
STRING_SKIP0 STRING_SKIP0
STRING_THEN; STRING_THEN;
@ -596,11 +598,11 @@ static const char verbnames[] =
static const verbitem verbs[] = { static const verbitem verbs[] = {
{ 0, META_MARK, +1 }, /* > 0 => must have an argument */ { 0, META_MARK, +1 }, /* > 0 => must have an argument */
{ 4, META_MARK, +1 }, { 4, META_MARK, +1 },
{ 6, META_ACCEPT, -1 }, /* < 0 => must not have an argument */ { 6, META_ACCEPT, -1 }, /* < 0 => Optional argument, convert to pre-MARK */
{ 6, META_COMMIT, -1 },
{ 1, META_FAIL, -1 }, { 1, META_FAIL, -1 },
{ 4, META_FAIL, -1 }, { 4, META_FAIL, -1 },
{ 5, META_PRUNE, 0 }, /* Argument is optional; bump META code if found */ { 6, META_COMMIT, 0 },
{ 5, META_PRUNE, 0 }, /* Optional argument; bump META code if found */
{ 4, META_SKIP, 0 }, { 4, META_SKIP, 0 },
{ 4, META_THEN, 0 } { 4, META_THEN, 0 }
}; };
@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
/* Verb opcodes, indexed by their META code offset from META_MARK. */ /* Verb opcodes, indexed by their META code offset from META_MARK. */
static const uint32_t verbops[] = { static const uint32_t verbops[] = {
OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP, OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
OP_SKIP_ARG, OP_THEN, OP_THEN_ARG }; OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
/* Offsets from OP_STAR for case-independent and negative repeat opcodes. */ /* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91, ERR92}; ERR91, ERR92, ERR93, ERR94 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such /* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -976,8 +978,8 @@ for (;;)
case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break; case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break; case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_FAIL: fprintf(stderr, "META (*FAIL)"); break; case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break; case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break; case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
case META_THEN: fprintf(stderr, "META (*THEN)"); break; case META_THEN: fprintf(stderr, "META (*THEN)"); break;
@ -1067,6 +1069,10 @@ for (;;)
fprintf(stderr, "META (*MARK:"); fprintf(stderr, "META (*MARK:");
goto SHOWARG; goto SHOWARG;
case META_COMMIT_ARG:
fprintf(stderr, "META (*COMMIT:");
goto SHOWARG;
case META_PRUNE_ARG: case META_PRUNE_ARG:
fprintf(stderr, "META (*PRUNE:"); fprintf(stderr, "META (*PRUNE:");
goto SHOWARG; goto SHOWARG;
@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
escape = -i; /* Else return a special escape */ escape = -i; /* Else return a special escape */
if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X)) if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */
/* Perl supports \N{name} for character names and \N{U+dddd} for numerical
Unicode code points, as well as plain \N for "not newline". PCRE does not
support \N{name}. However, it does support quantification such as \N{2,3},
so if \N{ is not followed by U+dddd we check for a quantifier. */
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{
PCRE2_SPTR p = ptr + 1;
/* \N{U+ can be handled by the \x{ code. However, this construction is
not valid in EBCDIC environments because it specifies a Unicode
character, not a codepoint in the local code. For example \N{U+0041}
must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
casing semantics for the entire pattern, so allow it only in UTF (i.e.
Unicode) mode. */
if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
{
#ifdef EBCDIC
*errorcodeptr = ERR93;
#else
if (utf)
{
ptr = p + 1;
escape = 0; /* Not a fancy escape after all */
goto COME_FROM_NU;
}
else *errorcodeptr = ERR93;
#endif
}
/* Give an error if what follows is not a quantifier, but don't override
an error set by the quantifier reader (e.g. number overflow). */
else
{
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
*errorcodeptr == 0)
*errorcodeptr = ERR37;
}
}
} }
} }
@ -1462,6 +1510,7 @@ else
/* A number of Perl escapes are not handled by PCRE. We give an explicit /* A number of Perl escapes are not handled by PCRE. We give an explicit
error. */ error. */
case CHAR_F:
case CHAR_l: case CHAR_l:
case CHAR_L: case CHAR_L:
*errorcodeptr = ERR37; *errorcodeptr = ERR37;
@ -1719,6 +1768,9 @@ else
{ {
if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET) if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
{ {
#ifndef EBCDIC
COME_FROM_NU:
#endif
if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
{ {
*errorcodeptr = ERR78; *errorcodeptr = ERR78;
@ -1852,19 +1904,6 @@ else
} }
} }
/* Perl supports \N{name} for character names, as well as plain \N for "not
newline". PCRE does not support \N{name}. However, it does support
quantification such as \N{2,3}. */
if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
ptrend - ptr > 2)
{
PCRE2_SPTR p = ptr + 1;
if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
*errorcodeptr == 0)
*errorcodeptr = ERR37;
}
/* Set the pointer to the next character before returning. */ /* Set the pointer to the next character before returning. */
*ptrptr = ptr; *ptrptr = ptr;
@ -2251,11 +2290,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u #define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u #define NSF_CONDASSERT 0x0002u
/* Of the options that are changeable within the pattern, these are tracked /* Options that are changeable within the pattern must be tracked during
during parsing. The rest are used from META_OPTIONS items when compiling. */ parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
but all must be tracked so that META_OPTIONS items set the correct values for
the main compiling phase. */
#define PARSE_TRACKED_OPTIONS \ #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
(PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values /* States used for analyzing ranges in character classes. The two OK values
must be last. */ must be last. */
@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern; uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end; uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t meta_quantifier = 0; uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint16_t nest_depth = 0; uint16_t nest_depth = 0;
int after_manual_callout = 0; int after_manual_callout = 0;
int expect_cond_assert = 0; int expect_cond_assert = 0;
@ -2434,11 +2477,17 @@ while (ptr < ptrend)
/* EITHER: not both options set */ /* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
/* OR: character > 255 */ #ifdef SUPPORT_UNICODE
c > 255 || /* OR: character > 255 AND not Unicode Pattern White Space */
/* OR: not a # comment or white space */ (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) #endif
)) /* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL
#endif
)))
{ {
PCRE2_SIZE verbnamelength; PCRE2_SIZE verbnamelength;
@ -2461,6 +2510,16 @@ while (ptr < ptrend)
goto FAILED; goto FAILED;
} }
*verblengthptr = (uint32_t)verbnamelength; *verblengthptr = (uint32_t)verbnamelength;
/* If this name was on a verb such as (*ACCEPT) which does not continue,
a (*MARK) was generated for the name. We now add the original verb as the
next item. */
if (add_after_mark != 0)
{
*parsed_pattern++ = add_after_mark;
add_after_mark = 0;
}
break; break;
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
@ -2510,11 +2569,18 @@ while (ptr < ptrend)
/* Skip over whitespace and # comments in extended mode. Note that c is a /* Skip over whitespace and # comments in extended mode. Note that c is a
character, not a code unit, so we must not use MAX_255 to test its size character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0) if ((options & PCRE2_EXTENDED) != 0)
{ {
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
#ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
#endif
if (c == CHAR_NUMBER_SIGN) if (c == CHAR_NUMBER_SIGN)
{ {
while (ptr < ptrend) while (ptr < ptrend)
@ -3206,7 +3272,6 @@ while (ptr < ptrend)
tempptr = ptr; tempptr = ptr;
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
options, TRUE, cb); options, TRUE, cb);
if (errorcode != 0) if (errorcode != 0)
{ {
CLASS_ESCAPE_FAILED: CLASS_ESCAPE_FAILED:
@ -3454,13 +3519,25 @@ while (ptr < ptrend)
if (*ptr++ == CHAR_COLON) /* Skip past : or ) */ if (*ptr++ == CHAR_COLON) /* Skip past : or ) */
{ {
if (verbs[i].has_arg < 0) /* Argument is forbidden */ /* Some optional arguments can be treated as a preceding (*MARK) */
if (verbs[i].has_arg < 0)
{ {
errorcode = ERR59; add_after_mark = verbs[i].meta;
goto FAILED; *parsed_pattern++ = META_MARK;
} }
/* The remaining verbs with arguments (except *MARK) need a different
opcode. */
else
{
*parsed_pattern++ = verbs[i].meta + *parsed_pattern++ = verbs[i].meta +
((verbs[i].meta != META_MARK)? 0x00010000u:0); ((verbs[i].meta != META_MARK)? 0x00010000u:0);
}
/* Set up for reading the name in the main loop. */
verblengthptr = parsed_pattern++; verblengthptr = parsed_pattern++;
verbnamestart = ptr; verbnamestart = ptr;
inverbname = TRUE; inverbname = TRUE;
@ -3521,17 +3598,39 @@ while (ptr < ptrend)
else else
{ {
BOOL hyphenok = TRUE;
uint32_t oldoptions = options;
top_nest->reset_group = 0; top_nest->reset_group = 0;
top_nest->max_group = 0; top_nest->max_group = 0;
set = unset = 0; set = unset = 0;
optset = &set; optset = &set;
/* ^ at the start unsets imnsx and disables the subsequent use of - */
if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
{
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
hyphenok = FALSE;
ptr++;
}
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS && while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
*ptr != CHAR_COLON) *ptr != CHAR_COLON)
{ {
switch (*ptr++) switch (*ptr++)
{ {
case CHAR_MINUS: optset = &unset; break; case CHAR_MINUS:
if (!hyphenok)
{
errorcode = ERR94;
ptr--; /* Correct the offset */
goto FAILED;
}
optset = &unset;
hyphenok = FALSE;
break;
case CHAR_J: /* Record that it changed in the external options */ case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE2_DUPNAMES; *optset |= PCRE2_DUPNAMES;
@ -3591,7 +3690,7 @@ while (ptr < ptrend)
/* If nothing changed, no need to record. */ /* If nothing changed, no need to record. */
if (set != 0 || unset != 0) if (options != oldoptions)
{ {
*parsed_pattern++ = META_OPTIONS; *parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options; *parsed_pattern++ = options;
@ -3896,9 +3995,8 @@ while (ptr < ptrend)
if (*ptr == CHAR_DOT) if (*ptr == CHAR_DOT)
{ {
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode)) minor = (*ptr++ - CHAR_0) * 10;
goto FAILED; if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (minor < 10) minor *= 10;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION; goto BAD_VERSION_CONDITION;
} }
@ -4261,11 +4359,11 @@ goto FAILED;
/************************************************* /*************************************************
* Find first significant op code * * Find first significant opcode *
*************************************************/ *************************************************/
/* This is called by several functions that scan a compiled expression looking /* This is called by several functions that scan a compiled expression looking
for a fixed first character, or an anchoring op code etc. It skips over things for a fixed first character, or an anchoring opcode etc. It skips over things
that do not influence this. For some calls, it makes sense to skip negative that do not influence this. For some calls, it makes sense to skip negative
forward and all backward assertions, and also the \b assertion; for others it forward and all backward assertions, and also the \b assertion; for others it
does not. does not.
@ -5472,7 +5570,7 @@ for (;; pptr++)
set xclass = TRUE. Then, in the pre-compile phase, accumulate the length set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
of the extra data and reset the pointer. This is so that very large of the extra data and reset the pointer. This is so that very large
classes that contain a zillion wide characters or Unicode property tests classes that contain a zillion wide characters or Unicode property tests
do not overwrite the work space (which is on the stack). */ do not overwrite the workspace (which is on the stack). */
if (class_uchardata > class_uchardata_base) if (class_uchardata > class_uchardata_base)
{ {
@ -5563,7 +5661,7 @@ for (;; pptr++)
if (class_has_8bitchar > 0) if (class_has_8bitchar > 0)
{ {
*code++ |= XCL_MAP; *code++ |= XCL_MAP;
memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
CU2BYTES(class_uchardata - code)); CU2BYTES(class_uchardata - code));
if (negate_class && !xclass_has_prop) if (negate_class && !xclass_has_prop)
for (i = 0; i < 32; i++) classbits[i] = ~classbits[i]; for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@ -5655,6 +5753,7 @@ for (;; pptr++)
cb->had_pruneorskip = TRUE; cb->had_pruneorskip = TRUE;
/* Fall through */ /* Fall through */
case META_MARK: case META_MARK:
case META_COMMIT_ARG:
VERB_ARG: VERB_ARG:
*code++ = verbops[(meta - META_MARK) >> 16]; *code++ = verbops[(meta - META_MARK) >> 16];
/* The length is in characters. */ /* The length is in characters. */
@ -6509,7 +6608,7 @@ for (;; pptr++)
/* Wrap the recursion call in OP_BRA brackets. */ /* Wrap the recursion call in OP_BRA brackets. */
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
op_previous = *previous = OP_BRA; op_previous = *previous = OP_BRA;
PUT(previous, 1, 2 + 2*LINK_SIZE); PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET; previous[2 + 2*LINK_SIZE] = OP_KET;
@ -6589,7 +6688,7 @@ for (;; pptr++)
if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED) if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
{ {
memmove(previous + 1, previous, CU2BYTES(len)); (void)memmove(previous + 1, previous, CU2BYTES(len));
code++; code++;
if (repeat_max == 0) if (repeat_max == 0)
{ {
@ -6610,7 +6709,7 @@ for (;; pptr++)
else else
{ {
int linkoffset; int linkoffset;
memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
code += 2 + LINK_SIZE; code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA; *previous++ = OP_BRA;
@ -6811,7 +6910,7 @@ for (;; pptr++)
if (*bracode == OP_COND || *bracode == OP_SCOND) if (*bracode == OP_COND || *bracode == OP_SCOND)
{ {
int nlen = (int)(code - bracode); int nlen = (int)(code - bracode);
memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE;
*bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@ -7082,7 +7181,7 @@ for (;; pptr++)
else else
{ {
memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE; len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE; tempcode[0] = OP_ONCE;
@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
beginning. We accumulate in a local variable to save frequent testing of beginning. We accumulate in a local variable to save frequent testing of
lengthptr for NULL. We cannot do this by looking at the value of 'code' at the lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
start and end of each alternative, because compiled items are discarded during start and end of each alternative, because compiled items are discarded during
the pre-compile phase so that the work space is not exceeded. */ the pre-compile phase so that the workspace is not exceeded. */
length = 2 + 2*LINK_SIZE + skipunits; length = 2 + 2*LINK_SIZE + skipunits;
@ -7622,7 +7721,7 @@ for (;;)
{ {
if (cb->open_caps->flag) if (cb->open_caps->flag)
{ {
memmove(start_bracket + 1 + LINK_SIZE, start_bracket, (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket)); CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE; *start_bracket = OP_ONCE;
code += 1 + LINK_SIZE; code += 1 + LINK_SIZE;
@ -7765,10 +7864,11 @@ do {
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
} }
/* Condition */ /* Condition. If there is no second branch, it can't be anchored. */
else if (op == OP_COND) else if (op == OP_COND || op == OP_SCOND)
{ {
if (scode[GET(scode,1)] != OP_ALT) return FALSE;
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
return FALSE; return FALSE;
} }
@ -8003,6 +8103,7 @@ for (;;)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
if (crc < 0) if (crc < 0)
{ {
memmove(slot + cb->name_entry_size, slot, (void)memmove(slot + cb->name_entry_size, slot,
CU2BYTES((tablecount - i) * cb->name_entry_size)); CU2BYTES((tablecount - i) * cb->name_entry_size));
break; break;
} }
@ -8311,6 +8412,7 @@ for (;; pptr++)
break; break;
case META_MARK: /* Add the length of the name. */ case META_MARK: /* Add the length of the name. */
case META_COMMIT_ARG:
case META_PRUNE_ARG: case META_PRUNE_ARG:
case META_SKIP_ARG: case META_SKIP_ARG:
case META_THEN_ARG: case META_THEN_ARG:
@ -8501,6 +8603,7 @@ for (;; pptr++)
goto EXIT; goto EXIT;
case META_MARK: case META_MARK:
case META_COMMIT_ARG:
case META_PRUNE_ARG: case META_PRUNE_ARG:
case META_SKIP_ARG: case META_SKIP_ARG:
case META_THEN_ARG: case META_THEN_ARG:
@ -8572,6 +8675,32 @@ for (;; pptr++)
case META_LOOKAHEADNOT: case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr + 1, PSKIP_KET); pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED; if (pptr == NULL) goto PARSED_SKIP_FAILED;
/* Also ignore any qualifiers that follow a lookahead assertion. */
switch (pptr[1])
{
case META_ASTERISK:
case META_ASTERISK_PLUS:
case META_ASTERISK_QUERY:
case META_PLUS:
case META_PLUS_PLUS:
case META_PLUS_QUERY:
case META_QUERY:
case META_QUERY_PLUS:
case META_QUERY_QUERY:
pptr++;
break;
case META_MINMAX:
case META_MINMAX_PLUS:
case META_MINMAX_QUERY:
pptr += 3;
break;
default:
break;
}
break; break;
/* Lookbehinds can be ignored, but must themselves be checked. */ /* Lookbehinds can be ignored, but must themselves be checked. */
@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
break; break;
case META_MARK: case META_MARK:
case META_COMMIT_ARG:
case META_PRUNE_ARG: case META_PRUNE_ARG:
case META_SKIP_ARG: case META_SKIP_ARG:
case META_THEN_ARG: case META_THEN_ARG:

View File

@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
}; };
@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */ 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ 0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
}; };
@ -292,6 +294,35 @@ typedef struct stateblock {
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int)) #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
local working space and output vectors that were created on the stack. This has
caused issues for some patterns, especially in small-stack environments such as
Windows. A new scheme is now in use which sets up a vector on the stack, but if
this is too small, heap memory is used, up to the heap_limit. The main
parameters are all numbers of ints because the workspace is a vector of ints.
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
defined in pcre2_internal.h so as to be available to pcre2test when it is
finding the minimum heap requirement for a match. */
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
#define RWS_RSIZE 1000 /* Work size for recursion */
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
/* This structure is at the start of each workspace block. */
typedef struct RWS_anchor {
struct RWS_anchor *next;
unsigned int size; /* Number of ints */
unsigned int free; /* Number of ints */
} RWS_anchor;
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
/************************************************* /*************************************************
* Process a callout * * Process a callout *
@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data);
/*************************************************
* Expand local workspace memory *
*************************************************/
/* This function is called when internal_dfa_match() is about to be called
recursively and there is insufficient working space left in the current
workspace block. If there's an existing next block, use it; otherwise get a new
block unless the heap limit is reached.
Arguments:
rwsptr pointer to block pointer (updated)
ovecsize space needed for an ovector
mb the match block
Returns: 0 rwsptr has been updated
!0 an error code
*/
static int
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
{
RWS_anchor *rws = *rwsptr;
RWS_anchor *new;
if (rws->next != NULL)
{
new = rws->next;
}
/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
kibibytes. */
else
{
unsigned int newsize = rws->size * 2;
unsigned int heapleft = (unsigned int)
(((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
if (newsize > heapleft) newsize = heapleft;
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
return PCRE2_ERROR_HEAPLIMIT;
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
mb->heap_used += newsize;
new->next = NULL;
new->size = newsize;
rws->next = new;
}
new->free = new->size - RWS_ANCHOR_SIZE;
*rwsptr = new;
return 0;
}
/************************************************* /*************************************************
* Match a Regular Expression - DFA engine * * Match a Regular Expression - DFA engine *
*************************************************/ *************************************************/
@ -431,7 +517,8 @@ internal_dfa_match(
uint32_t offsetcount, uint32_t offsetcount,
int *workspace, int *workspace,
int wscount, int wscount,
uint32_t rlevel) uint32_t rlevel,
int *RWS)
{ {
stateblock *active_states, *new_states, *temp_states; stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state; stateblock *next_active_state, *next_new_state;
@ -788,7 +875,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0; match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets, if (count > 0) (void)memmove(offsets + 2, offsets,
(size_t)count * sizeof(PCRE2_SIZE)); (size_t)count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2) if (offsetcount >= 2)
{ {
@ -2587,10 +2674,22 @@ for (;;)
case OP_ASSERTBACK: case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NOT:
{ {
PCRE2_SPTR endasscode = code + GET(code, 1);
PCRE2_SIZE local_offsets[2];
int rc; int rc;
int local_workspace[1000]; int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR endasscode = code + GET(code, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@ -2600,10 +2699,13 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ RWS_RSIZE, /* size of same */
rlevel); /* function recursion level */ rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
@ -2615,8 +2717,6 @@ for (;;)
case OP_COND: case OP_COND:
case OP_SCOND: case OP_SCOND:
{ {
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
int codelink = (int)GET(code, 1); int codelink = (int)GET(code, 1);
PCRE2_UCHAR condcode; PCRE2_UCHAR condcode;
@ -2673,8 +2773,22 @@ for (;;)
else else
{ {
int rc; int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR asscode = code + LINK_SIZE + 1; PCRE2_SPTR asscode = code + LINK_SIZE + 1;
PCRE2_SPTR endasscode = asscode + GET(asscode, 1); PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@ -2684,10 +2798,13 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ RWS_RSIZE, /* size of same */
rlevel); /* function recursion level */ rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc; if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == if ((rc >= 0) ==
@ -2702,13 +2819,25 @@ for (;;)
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
case OP_RECURSE: case OP_RECURSE:
{ {
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
dfa_recursion_info *ri; dfa_recursion_info *ri;
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1); PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 : uint32_t recno = (callpat == mb->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE); GET2(callpat, 1 + LINK_SIZE);
int rc;
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
/* Check for repeating a recursion without advancing the subject /* Check for repeating a recursion without advancing the subject
pointer. This should catch convoluted mutual recursions. (Some simple pointer. This should catch convoluted mutual recursions. (Some simple
@ -2732,11 +2861,13 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ RWS_RSIZE, /* size of same */
rlevel); /* function recursion level */ rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
mb->recursive = new_recursive.prevrec; /* Done this recursion */ mb->recursive = new_recursive.prevrec; /* Done this recursion */
/* Ran out of internal offsets */ /* Ran out of internal offsets */
@ -2782,10 +2913,25 @@ for (;;)
case OP_SCBRAPOS: case OP_SCBRAPOS:
case OP_BRAPOSZERO: case OP_BRAPOSZERO:
{ {
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SIZE charcount, matched_count; PCRE2_SIZE charcount, matched_count;
PCRE2_SPTR local_ptr = ptr; PCRE2_SPTR local_ptr = ptr;
RWS_anchor *rws = (RWS_anchor *)RWS;
BOOL allow_zero; BOOL allow_zero;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
if (codevalue == OP_BRAPOSZERO) if (codevalue == OP_BRAPOSZERO)
{ {
allow_zero = TRUE; allow_zero = TRUE;
@ -2798,19 +2944,17 @@ for (;;)
for (matched_count = 0;; matched_count++) for (matched_count = 0;; matched_count++)
{ {
PCRE2_SIZE local_offsets[2]; rc = internal_dfa_match(
int local_workspace[1000];
int rc = internal_dfa_match(
mb, /* fixed match data */ mb, /* fixed match data */
code, /* this subexpression's code */ code, /* this subexpression's code */
local_ptr, /* where we currently are */ local_ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ RWS_RSIZE, /* size of same */
rlevel); /* function recursion level */ rlevel, /* function recursion level */
RWS); /* recursion workspace */
/* Failed to match */ /* Failed to match */
@ -2827,6 +2971,8 @@ for (;;)
local_ptr += charcount; /* Advance temporary position ptr */ local_ptr += charcount; /* Advance temporary position ptr */
} }
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
/* At this point we have matched the subpattern matched_count /* At this point we have matched the subpattern matched_count
times, and local_ptr is pointing to the character after the end of the times, and local_ptr is pointing to the character after the end of the
last match. */ last match. */
@ -2869,19 +3015,35 @@ for (;;)
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
case OP_ONCE: case OP_ONCE:
{ {
PCRE2_SIZE local_offsets[2]; int rc;
int local_workspace[1000]; int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
int rc = internal_dfa_match( if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
rc = internal_dfa_match(
mb, /* fixed match data */ mb, /* fixed match data */
code, /* this subexpression's code */ code, /* this subexpression's code */
ptr, /* where we currently are */ ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */ (PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */ RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ RWS_RSIZE, /* size of same */
rlevel); /* function recursion level */ rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc >= 0) if (rc >= 0)
{ {
@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount) pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{ {
int rc;
const pcre2_real_code *re = (const pcre2_real_code *)code; const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match; PCRE2_SPTR start_match;
@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr; PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline; BOOL utf, anchored, startline, firstline;
BOOL has_first_cu = FALSE; BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE; BOOL has_req_cu = FALSE;
PCRE2_UCHAR first_cu = 0; PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0; PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0; PCRE2_UCHAR req_cu = 0;
@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
dfa_match_block actual_match_block; dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block; dfa_match_block *mb = &actual_match_block;
/* Set up a starting block of memory for use during recursive calls to
internal_dfa_match(). By putting this on the stack, it minimizes resource use
in the case when it is not needed. If this is too small, more memory is
obtained from the heap. At the start of each block is an anchor structure.*/
int base_recursion_workspace[RWS_BASE_SIZE];
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
rws->next = NULL;
rws->size = RWS_BASE_SIZE;
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */ subject string. */
@ -3184,6 +3358,7 @@ if (mcontext == NULL)
mb->memctl = re->memctl; mb->memctl = re->memctl;
mb->match_limit = PRIV(default_match_context).match_limit; mb->match_limit = PRIV(default_match_context).match_limit;
mb->match_limit_depth = PRIV(default_match_context).depth_limit; mb->match_limit_depth = PRIV(default_match_context).depth_limit;
mb->heap_limit = PRIV(default_match_context).heap_limit;
} }
else else
{ {
@ -3198,6 +3373,7 @@ else
mb->memctl = mcontext->memctl; mb->memctl = mcontext->memctl;
mb->match_limit = mcontext->match_limit; mb->match_limit = mcontext->match_limit;
mb->match_limit_depth = mcontext->depth_limit; mb->match_limit_depth = mcontext->depth_limit;
mb->heap_limit = mcontext->heap_limit;
} }
if (mb->match_limit > re->limit_match) if (mb->match_limit > re->limit_match)
@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
if (mb->match_limit_depth > re->limit_depth) if (mb->match_limit_depth > re->limit_depth)
mb->match_limit_depth = re->limit_depth; mb->match_limit_depth = re->limit_depth;
if (mb->heap_limit > re->limit_heap)
mb->heap_limit = re->limit_heap;
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size; re->name_count * re->name_entry_size;
mb->tables = re->tables; mb->tables = re->tables;
@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
mb->moptions = options; mb->moptions = options;
mb->poptions = re->overall_options; mb->poptions = re->overall_options;
mb->match_call_count = 0; mb->match_call_count = 0;
mb->heap_used = 0;
/* Process the \R and newline settings. */ /* Process the \R and newline settings. */
@ -3351,8 +3531,6 @@ a match. */
for (;;) for (;;)
{ {
int rc;
/* ----------------- Start of match optimizations ---------------- */ /* ----------------- Start of match optimizations ---------------- */
/* There are some optimizations that avoid running the match if a known /* There are some optimizations that avoid running the match if a known
@ -3544,7 +3722,7 @@ for (;;)
in characters, we treat it as code units to avoid spending too much time in characters, we treat it as code units to avoid spending too much time
in this optimization. */ in this optimization. */
if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH; if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
/* If req_cu is set, we know that that code unit must appear in the /* If req_cu is set, we know that that code unit must appear in the
subject for the match to succeed. If the first code unit is set, req_cu subject for the match to succeed. If the first code unit is set, req_cu
@ -3621,7 +3799,8 @@ for (;;)
(uint32_t)match_data->oveccount * 2, /* actual size of same */ (uint32_t)match_data->oveccount * 2, /* actual size of same */
workspace, /* workspace vector */ workspace, /* workspace vector */
(int)wscount, /* size of same */ (int)wscount, /* size of same */
0); /* function recurse level */ 0, /* function recurse level */
base_recursion_workspace); /* initial workspace for recursion */
/* Anything other than "no match" means we are done, always; otherwise, carry /* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */ on only if not anchored. */
@ -3637,7 +3816,7 @@ for (;;)
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject); match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc; match_data->rc = rc;
return rc; goto EXIT;
} }
/* Advance to the next subject character unless we are at the end of a line /* Advance to the next subject character unless we are at the end of a line
@ -3668,8 +3847,18 @@ for (;;)
} /* "Bumpalong" loop */ } /* "Bumpalong" loop */
NOMATCH_EXIT:
rc = PCRE2_ERROR_NOMATCH;
return PCRE2_ERROR_NOMATCH; EXIT:
while (rws->next != NULL)
{
RWS_anchor *next = rws->next;
rws->next = next->next;
mb->memctl.free(next, mb->memctl.memory_data);
}
return rc;
} }
/* End of pcre2_dfa_match.c */ /* End of pcre2_dfa_match.c */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] =
/* 35 */ /* 35 */
"lookbehind is too complicated\0" "lookbehind is too complicated\0"
"\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
"PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
"number after (?C is greater than 255\0" "number after (?C is greater than 255\0"
"closing parenthesis for (?C expected\0" "closing parenthesis for (?C expected\0"
/* 40 */ /* 40 */
@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] =
"internal error: unknown newline setting\0" "internal error: unknown newline setting\0"
"\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
"(?R (recursive pattern call) must be followed by a closing parenthesis\0" "(?R (recursive pattern call) must be followed by a closing parenthesis\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
"obsolete error (should not occur)\0" /* Was the above */
/* 60 */ /* 60 */
"(*VERB) not recognized or malformed\0" "(*VERB) not recognized or malformed\0"
"group number is too big\0" "group number is too big\0"
@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] =
"using UCP is disabled by the application\0" "using UCP is disabled by the application\0"
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0" "character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0" "digits missing in \\x{} or \\o{} or \\N{U+}\0"
"syntax error or number too big in (?(VERSION condition\0" "syntax error or number too big in (?(VERSION condition\0"
/* 80 */ /* 80 */
"internal error: unknown opcode in auto_possessify()\0" "internal error: unknown opcode in auto_possessify()\0"
@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] =
"internal error: bad code value in parsed_skip()\0" "internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0" "invalid option bits with PCRE2_LITERAL\0"
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
"invalid hyphen in option setting\0"
; ;
/* Match-time and UTF error texts are in the same format. */ /* Match-time and UTF error texts are in the same format. */
@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] =
"expected closing curly bracket in replacement string\0" "expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0" "bad substitution in replacement string\0"
/* 60 */ /* 60 */
"match with end before start is not supported\0" "match with end before start or start moved backwards is not supported\0"
"too many replacements (more than INT_MAX)\0" "too many replacements (more than INT_MAX)\0"
"bad serialized data\0" "bad serialized data\0"
"heap limit exceeded\0" "heap limit exceeded\0"
"invalid syntax\0" "invalid syntax\0"
/* 65 */
"internal error - duplicate substitution match\0"
; ;

View File

@ -129,11 +129,11 @@ while (eptr < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */ if ((ricount & 1) != 0) break; /* Grapheme break required */
} }
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
any number of Extend before a following E_Modifier. */ allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend || if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
(lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) lgb != ucp_gbExtended_Pictographic)
lgb = rgb; lgb = rgb;
eptr += len; eptr += len;

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -131,6 +131,7 @@ for (;;)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -165,6 +165,16 @@ by "configure". */
#define INT64_OR_DOUBLE double #define INT64_OR_DOUBLE double
#endif #endif
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* When compiling for use with the Virtual Pascal compiler, these functions /* When compiling for use with the Virtual Pascal compiler, these functions
need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
option on the command line. */ option on the command line. */
@ -178,50 +188,15 @@ option on the command line. */
#define memset(s,c,n) _memset(s,c,n) #define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */ #else /* VPCOMPAT */
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), /* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY a macro that calls an emulating function. */
is set. Otherwise, include an emulating function for those systems that have
neither (there some non-Unix environments where this is the case). */
#ifndef HAVE_MEMMOVE #ifndef HAVE_MEMMOVE
#undef memmove /* some systems may have a macro */ #undef memmove /* Some systems may have a macro */
#ifdef HAVE_BCOPY #define memmove(a, b, c) PRIV(memmove)(a, b, c)
#define memmove(a, b, c) bcopy(b, a, c)
#else /* HAVE_BCOPY */
static void *
pcre2_memmove(void *d, const void *s, size_t n)
{
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
}
#define memmove(a, b, c) pcre2_memmove(a, b, c)
#endif /* not HAVE_BCOPY */
#endif /* not HAVE_MEMMOVE */ #endif /* not HAVE_MEMMOVE */
#endif /* not VPCOMPAT */ #endif /* not VPCOMPAT */
/* External (in the C sense) functions and tables that are private to the
libraries are always referenced using the PRIV macro. This makes it possible
for pcre2test.c to include some of the source files from the libraries using a
different PRIV definition to avoid name clashes. It also makes it clear in the
code that a non-static object is being referenced. */
#ifndef PRIV
#define PRIV(name) _pcre2_##name
#endif
/* This is an unsigned int value that no UTF character can ever have, as /* This is an unsigned int value that no UTF character can ever have, as
Unicode doesn't go beyond 0x0010ffff. */ Unicode doesn't go beyond 0x0010ffff. */
@ -247,12 +222,17 @@ not rely on this. */
pcre2_match() is allocated on the system stack, of this size (bytes). The size pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
on the number of capturing parentheses) so 20K handles quite a few frames. A on the number of capturing parentheses) so 20KiB handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */ maximum size of this can be limited. */
#define START_FRAMES_SIZE 20480 #define START_FRAMES_SIZE 20480
/* Similarly, for DFA matching, an initial internal workspace vector is
allocated on the stack. */
#define DFA_START_RWS_SIZE 30720
/* Define the default BSR convention. */ /* Define the default BSR convention. */
#ifdef BSR_ANYCRLF #ifdef BSR_ANYCRLF
@ -585,14 +565,15 @@ these tables. */
#define cbit_cntrl 288 /* [:cntrl:] */ #define cbit_cntrl 288 /* [:cntrl:] */
#define cbit_length 320 /* Length of the cbits table */ #define cbit_length 320 /* Length of the cbits table */
/* Bit definitions for entries in the ctypes table. */ /* Bit definitions for entries in the ctypes table. Do not change these values
without checking pcre2_jit_compile.c, which has an assertion to ensure that
ctype_word has the value 16. */
#define ctype_space 0x01 #define ctype_space 0x01
#define ctype_letter 0x02 #define ctype_letter 0x02
#define ctype_digit 0x04 #define ctype_digit 0x04
#define ctype_xdigit 0x08 #define ctype_xdigit 0x08 /* not actually used any more */
#define ctype_word 0x10 /* alphanumeric or '_' */ #define ctype_word 0x10 /* alphanumeric or '_' */
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
/* Offsets of the various tables from the base tables pointer, and /* Offsets of the various tables from the base tables pointer, and
total length of the tables. */ total length of the tables. */
@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ #define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ #define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* Escape items that are just an encoding of a particular data value. These
appear in the escapes[] table in pcre2_compile.c as positive numbers. */
#ifndef ESC_a
#define ESC_a CHAR_BEL
#endif
#ifndef ESC_e
#define ESC_e CHAR_ESC
#endif
#ifndef ESC_f
#define ESC_f CHAR_FF
#endif
#ifndef ESC_n
#define ESC_n CHAR_LF
#endif
#ifndef ESC_r
#define ESC_r CHAR_CR
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee CHAR_HT
#endif
/* These are escaped items that aren't just an encoding of a particular data /* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns 0 value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. In the escapes[] table in pcre2_compile.c their values for a data character. In the escapes[] table in pcre2_compile.c their values
@ -1578,23 +1529,26 @@ enum {
OP_THEN, /* 155 */ OP_THEN, /* 155 */
OP_THEN_ARG, /* 156 same, but with argument */ OP_THEN_ARG, /* 156 same, but with argument */
OP_COMMIT, /* 157 */ OP_COMMIT, /* 157 */
OP_COMMIT_ARG, /* 158 same, but with argument */
/* These are forced failure and success verbs */ /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
without the need for a special opcode. */
OP_FAIL, /* 158 */ OP_FAIL, /* 159 */
OP_ACCEPT, /* 159 */ OP_ACCEPT, /* 160 */
OP_ASSERT_ACCEPT, /* 160 Used inside assertions */ OP_ASSERT_ACCEPT, /* 161 Used inside assertions */
OP_CLOSE, /* 161 Used before OP_ACCEPT to close open captures */ OP_CLOSE, /* 162 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */ /* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 162 */ OP_SKIPZERO, /* 163 */
/* This is used to identify a DEFINE group during compilation so that it can /* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */ compilation finishes. */
OP_DEFINE, /* 163 */ OP_DEFINE, /* 164 */
/* This is not an opcode, but is used to check that tables indexed by opcode /* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been are the correct length, in order to catch updating errors - there have been
@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */
"Cond false", "Cond true", \ "Cond false", "Cond true", \
"Brazero", "Braminzero", "Braposzero", \ "Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \ "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \ "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL", \
"*ACCEPT", "*ASSERT_ACCEPT", \ "*ACCEPT", "*ASSERT_ACCEPT", \
"Close", "Skip zero", "Define" "Close", "Skip zero", "Define"
@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \
1, 3, /* THEN, THEN_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1, 3, /* COMMIT, COMMIT_ARG */ \
1, 1, 1, /* FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */ 1 /* DEFINE */
@ -1896,7 +1851,7 @@ extern const ucd_record PRIV(ucd_records)[];
#if PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 32
extern const ucd_record PRIV(dummy_ucd_record)[]; extern const ucd_record PRIV(dummy_ucd_record)[];
#endif #endif
extern const uint8_t PRIV(ucd_stage1)[]; extern const uint16_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[]; extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gbtable)[];
extern const uint32_t PRIV(ucp_gentype)[]; extern const uint32_t PRIV(ucp_gentype)[];
@ -1976,6 +1931,14 @@ extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
uint32_t *, BOOL); uint32_t *, BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
/* This function is needed only when memmove() is not available. */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
#define _pcre2_memmove PCRE2_SUFFIX(_pcre2_memmove)
extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_CODE_UNIT_WIDTH */
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */ #endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */

View File

@ -793,11 +793,23 @@ typedef struct heapframe {
uint8_t return_id; /* Where to go on in internal "return" */ uint8_t return_id; /* Where to go on in internal "return" */
uint8_t op; /* Processing opcode */ uint8_t op; /* Processing opcode */
/* At this point, the structure is 16-bit aligned. On most architectures
the alignment requirement for a pointer will ensure that the eptr field below
is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
that is 16-bit aligned. We must therefore ensure that what comes between here
and eptr is an odd multiple of 16 bits so as to get back into 32-bit
alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
fudges in the other cases. In the 32-bit case the padding comes first so that
the occu field itself is 32-bit aligned. Without the padding, this structure
is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
PCRE2_UCHAR occu[6]; /* Used for other case code units */ PCRE2_UCHAR occu[6]; /* Used for other case code units */
#elif PCRE2_CODE_UNIT_WIDTH == 16 #elif PCRE2_CODE_UNIT_WIDTH == 16
PCRE2_UCHAR occu[2]; /* Used for other case code units */ PCRE2_UCHAR occu[2]; /* Used for other case code units */
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
#else #else
uint8_t unused[2]; /* Ensure 32-bit alignment (see above) */
PCRE2_UCHAR occu[1]; /* Used for other case code units */ PCRE2_UCHAR occu[1]; /* Used for other case code units */
#endif #endif
@ -818,6 +830,9 @@ typedef struct heapframe {
PCRE2_SIZE ovector[131072]; /* Must be last in the structure */ PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
} heapframe; } heapframe;
/* This typedef is a check that the size of the heapframe structure is a
multiple of PCRE2_SIZE. See various comments above. */
typedef char check_heapframe_size[ typedef char check_heapframe_size[
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)]; ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
@ -881,6 +896,8 @@ typedef struct dfa_match_block {
PCRE2_SPTR last_used_ptr; /* Latest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */ const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */ PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE heap_limit; /* As it says */
PCRE2_SIZE heap_used; /* As it says */
uint32_t match_limit; /* As it says */ uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */ uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of calls of internal function */ uint32_t match_call_count; /* Number of calls of internal function */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -839,6 +839,7 @@ switch(*cc)
#endif #endif
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
@ -939,6 +940,7 @@ while (cc < ccend)
common->control_head_ptr = 1; common->control_head_ptr = 1;
/* Fall through. */ /* Fall through. */
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_MARK: case OP_MARK:
if (common->mark_ptr == 0) if (common->mark_ptr == 0)
@ -1553,6 +1555,7 @@ while (cc < ccend)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0); SLJIT_ASSERT(common->mark_ptr != 0);
@ -1733,6 +1736,7 @@ while (cc < ccend)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0); SLJIT_ASSERT(common->mark_ptr != 0);
@ -2041,6 +2045,7 @@ while (cc < ccend)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0); SLJIT_ASSERT(common->mark_ptr != 0);
@ -2428,6 +2433,7 @@ while (cc < ccend)
break; break;
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0); SLJIT_ASSERT(common->mark_ptr != 0);
@ -3666,7 +3672,8 @@ if (!common->utf)
#endif #endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++)
} }
} }
if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
i = 0; i = 0;
j = 0; j = 0;
@ -6627,7 +6636,8 @@ if (needstype || needsscript)
#endif #endif
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@ -7254,10 +7264,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */ if ((ricount & 1) != 0) break; /* Grapheme break required */
} }
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
any number of Extend before a following E_Modifier. */ allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb; lgb = rgb;
prevcc = cc; prevcc = cc;
@ -7309,10 +7320,11 @@ while (cc < end_subject)
if ((ricount & 1) != 0) break; /* Grapheme break required */ if ((ricount & 1) != 0) break; /* Grapheme break required */
} }
/* If Extend follows E_Base[_GAZ] do not update lgb; this allows /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
any number of Extend before a following E_Modifier. */ allows any number of them before a following Extended_Pictographic. */
if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ)) if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
lgb != ucp_gbExtended_Pictographic)
lgb = rgb; lgb = rgb;
cc++; cc++;
@ -10346,7 +10358,8 @@ backtrack_common *backtrack;
PCRE2_UCHAR opcode = *cc; PCRE2_UCHAR opcode = *cc;
PCRE2_SPTR ccend = cc + 1; PCRE2_SPTR ccend = cc + 1;
if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
ccend += 2 + cc[1]; ccend += 2 + cc[1];
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP)
return ccend; return ccend;
} }
if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
{ {
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
@ -10677,6 +10690,7 @@ while (cc < ccend)
case OP_THEN: case OP_THEN:
case OP_THEN_ARG: case OP_THEN_ARG:
case OP_COMMIT: case OP_COMMIT:
case OP_COMMIT_ARG:
cc = compile_control_verb_matchingpath(common, cc, parent); cc = compile_control_verb_matchingpath(common, cc, parent);
break; break;
@ -11751,6 +11765,7 @@ while (current)
break; break;
case OP_COMMIT: case OP_COMMIT:
case OP_COMMIT_ARG:
if (!common->local_quit_available) if (!common->local_quit_available)
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
if (common->quit_label == NULL) if (common->quit_label == NULL)

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -141,13 +141,6 @@ for (i = 0; i < 256; i++)
if (isdigit(i)) x += ctype_digit; if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit; if (isxdigit(i)) x += ctype_xdigit;
if (isalnum(i) || i == '_') x += ctype_word; if (isalnum(i) || i == '_') x += ctype_word;
/* Note: strchr includes the terminating zero in the characters it considers.
In this instance, that is ok because we want binary zero to be flagged as a
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x; *p++ = x;
} }

View File

@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h" #include "config.h"
#endif #endif
/* These defines enables debugging code */ /* These defines enable debugging code */
//#define DEBUG_FRAMES_DISPLAY /* #define DEBUG_FRAMES_DISPLAY */
//#define DEBUG_SHOW_OPS /* #define DEBUG_SHOW_OPS */
//#define DEBUG_SHOW_RMATCH /* #define DEBUG_SHOW_RMATCH */
#ifdef DEBUG_FRAME_DISPLAY #ifdef DEBUG_FRAME_DISPLAY
#include <stdarg.h> #include <stdarg.h>
@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
RM31, RM32, RM33, RM34, RM35 }; RM31, RM32, RM33, RM34, RM35, RM36 };
#ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_WIDE_CHARS
enum { RM100=100, RM101 }; enum { RM100=100, RM101 };
@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */ /* ===================================================================== */
/* Real or forced end of the pattern, assertion, or recursion. In an /* Real or forced end of the pattern, assertion, or recursion. In an
assertion ACCEPT, update the last used pointer and remember the current assertion ACCEPT, update the last used pointer and remember the current
frame so that the captures can be fished out of it. */ frame so that the captures and mark can be fished out of it. */
case OP_ASSERT_ACCEPT: case OP_ASSERT_ACCEPT:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */ /* ===================================================================== */
/* Match a bit-mapped character class, possibly repeatedly. These op codes /* Match a bit-mapped character class, possibly repeatedly. These opcodes
are used when all the characters in the class have values in the range are used when all the characters in the class have values in the range
0-255, and either the matching is caseful, or the characters are in the 0-255, and either the matching is caseful, or the characters are in the
range 0-127 when UTF processing is enabled. The only difference between range 0-127 when UTF processing is enabled. The only difference between
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */ if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
go too far. */
for (;;) for (;;)
{ {
RMATCH(Fecode, RM201); RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
BACKCHAR(Feptr); BACKCHAR(Feptr);
} }
} }
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */ if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
go too far. */
for(;;) for(;;)
{ {
RMATCH(Fecode, RM101); RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(Feptr); if (utf) BACKCHAR(Feptr);
#endif #endif
@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */ /* ===================================================================== */
/* Match a single character type repeatedly. Note that the property type /* Match a single character type repeatedly. Note that the property type
does not need to be in a stack frame as it not used within an RMATCH() does not need to be in a stack frame as it is not used within an RMATCH()
loop. */ loop. */
#define Lstart_eptr F->temp_sptr[0] #define Lstart_eptr F->temp_sptr[0]
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */ if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a /* After \C in UTF mode, Lstart_eptr might be in the middle of a
Unicode character. Use <= pp to ensure backtracking doesn't go too far. Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
*/ go too far. */
for(;;) for(;;)
{ {
@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Positive assertions are like other groups except that PCRE doesn't allow /* Positive assertions are like other groups except that PCRE doesn't allow
the effect of (*THEN) to escape beyond an assertion; it is therefore the effect of (*THEN) to escape beyond an assertion; it is therefore
treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
captures retained. Any other return is an error. */ captures and mark retained. Any other return is an error. */
#define Lframe_type F->temp_32[0] #define Lframe_type F->temp_32[0]
@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
(char *)assert_accept_frame + offsetof(heapframe, ovector), (char *)assert_accept_frame + offsetof(heapframe, ovector),
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE)); assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
Foffset_top = assert_accept_frame->offset_top; Foffset_top = assert_accept_frame->offset_top;
Fmark = assert_accept_frame->mark;
break; break;
} }
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Feptr -= number; Feptr -= number;
} }
/* Save the earliest consulted character, then skip to next op code */ /* Save the earliest consulted character, then skip to next opcode */
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr; if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
Fecode += 1 + LINK_SIZE; Fecode += 1 + LINK_SIZE;
@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
frame so that it points to the final branch. */ frame so that it points to the final branch. */
case OP_ONCE: case OP_ONCE:
Fback_frame = ((char *)F - (char *)P) + frame_size; Fback_frame = ((char *)F - (char *)P);
for (;;) for (;;)
{ {
uint32_t y = GET(P->ecode,1); uint32_t y = GET(P->ecode,1);
@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
mb->verb_current_recurse = Fcurrent_recurse; mb->verb_current_recurse = Fcurrent_recurse;
RRETURN(MATCH_COMMIT); RRETURN(MATCH_COMMIT);
case OP_COMMIT_ARG:
Fmark = mb->nomatch_mark = Fecode + 2;
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
mb->verb_current_recurse = Fcurrent_recurse;
RRETURN(MATCH_COMMIT);
case OP_PRUNE: case OP_PRUNE:
RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14); RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -5921,7 +5937,7 @@ in rrc. */
RETURN_SWITCH: RETURN_SWITCH:
if (Frdepth == 0) return rrc; /* Exit from the top level */ if (Frdepth == 0) return rrc; /* Exit from the top level */
F = (heapframe *)((char *)F - Fback_frame); /* Back track */ F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
#ifdef DEBUG_SHOW_RMATCH #ifdef DEBUG_SHOW_RMATCH
@ -5934,7 +5950,7 @@ switch (Freturn_id)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
LBL(33) LBL(34) LBL(35) LBL(33) LBL(34) LBL(35) LBL(36)
#ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_WIDE_CHARS
LBL(100) LBL(101) LBL(100) LBL(101)
@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
/* If a pattern has very many capturing parentheses, the frame size may be very /* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary, except when the heap limit prevents this. Get vector on the heap if necessary, except when the heap limit prevents this. Get
fewer if possible. (The heap limit is in kilobytes.) */ fewer if possible. (The heap limit is in kibibytes.) */
if (frame_size <= START_FRAMES_SIZE/10) if (frame_size <= START_FRAMES_SIZE/10)
{ {

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -390,6 +390,7 @@ while (TRUE)
#endif #endif
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -127,7 +127,25 @@ dst_bytes += tables_length;
for (i = 0; i < number_of_codes; i++) for (i = 0; i < number_of_codes; i++)
{ {
re = (const pcre2_real_code *)(codes[i]); re = (const pcre2_real_code *)(codes[i]);
memcpy(dst_bytes, (char *)re, re->blocksize); (void)memcpy(dst_bytes, (char *)re, re->blocksize);
/* Certain fields in the compiled code block are re-set during
deserialization. In order to ensure that the serialized data stream is always
the same for the same pattern, set them to zero here. We can't assume the
copy of the pattern is correctly aligned for accessing the fields as part of
a structure. Note the use of sizeof(void *) in the second of these, to
specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
pointer to uint8_t), gcc gives a warning because the first argument is also a
pointer to uint8_t. Casting the first argument to (void *) can stop this, but
it didn't stop Coverity giving the same complaint. */
(void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
sizeof(pcre2_memctl));
(void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
sizeof(void *));
(void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
sizeof(void *));
dst_bytes += re->blocksize; dst_bytes += re->blocksize;
} }

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -50,6 +50,42 @@ functions work only on 8-bit data. */
#include "pcre2_internal.h" #include "pcre2_internal.h"
/*************************************************
* Emulated memmove() for systems without it *
*************************************************/
/* This function can make use of bcopy() if it is available. Otherwise do it by
steam, as there some non-Unix environments that lack both memmove() and
bcopy(). */
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
void *
PRIV(memmove)(void *d, const void *s, size_t n)
{
#ifdef HAVE_BCOPY
bcopy(s, d, n);
return d;
#else
size_t i;
unsigned char *dest = (unsigned char *)d;
const unsigned char *src = (const unsigned char *)s;
if (dest > src)
{
dest += n;
src += n;
for (i = 0; i < n; ++i) *(--dest) = *(--src);
return (void *)dest;
}
else
{
for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n);
}
#endif /* not HAVE_BCOPY */
}
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */
/************************************************* /*************************************************
* Compare two zero-terminated PCRE2 strings * * Compare two zero-terminated PCRE2 strings *
*************************************************/ *************************************************/

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -707,6 +707,7 @@ for (;;)
/* Skip these, but we need to add in the name length. */ /* Skip these, but we need to add in the name length. */
case OP_MARK: case OP_MARK:
case OP_COMMIT_ARG:
case OP_PRUNE_ARG: case OP_PRUNE_ARG:
case OP_SKIP_ARG: case OP_SKIP_ARG:
case OP_THEN_ARG: case OP_THEN_ARG:
@ -956,6 +957,7 @@ do
case OP_CIRCM: case OP_CIRCM:
case OP_CLOSE: case OP_CLOSE:
case OP_COMMIT: case OP_COMMIT:
case OP_COMMIT_ARG:
case OP_COND: case OP_COND:
case OP_CREF: case OP_CREF:
case OP_FALSE: case OP_FALSE:
@ -1274,7 +1276,7 @@ do
break; break;
/* Single character types set the bits and stop. Note that if PCRE2_UCP /* Single character types set the bits and stop. Note that if PCRE2_UCP
is set, we do not see these op codes because \d etc are converted to is set, we do not see these opcodes because \d etc are converted to
properties. Therefore, these apply in the case when only characters less properties. Therefore, these apply in the case when only characters less
than 256 are recognized to match the types. */ than 256 are recognized to match the types. */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -238,10 +238,12 @@ PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0; PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector; PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[3];
buff_offset = 0; buff_offset = 0;
lengthleft = buff_length = *blength; lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET; *blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
/* Partial matching is not valid. */ /* Partial matching is not valid. */
@ -361,14 +363,34 @@ do
} }
/* Handle a successful match. Matches that use \K to end before they start /* Handle a successful match. Matches that use \K to end before they start
are not supported. */ or start before the current point in the subject are not supported. */
if (ovector[1] < ovector[0]) if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{ {
rc = PCRE2_ERROR_BADSUBSPATTERN; rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT; goto EXIT;
} }
/* Check for the same match as previous. This is legitimate after matching an
empty string that starts after the initial match offset. We have tried again
at the match point in case the pattern is one like /(?<=\G.)/ which can never
match at its starting point, so running the match achieves the bumpalong. If
we do get the same (null) match at the original match point, it isn't such a
pattern, so we now do the empty string magic. In all other cases, a repeat
match should never occur. */
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
{
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
ovecsave[2] = start_offset;
continue; /* Back to the top of the loop */
}
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
goto EXIT;
}
/* Count substitutions with a paranoid check for integer overflow; surely no /* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */ real call to this function would ever hit this! */
@ -799,13 +821,18 @@ do
} /* End handling a literal code unit */ } /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */ } /* End of loop for scanning the replacement. */
/* The replacement has been copied to the output. Update the start offset to /* The replacement has been copied to the output. Save the details of this
point to the rest of the subject string. If we matched an empty string, match. See above for how this data is used. If we matched an empty string, do
do the magic for global matches. */ the magic for global matches. Finally, update the start offset to point to
the rest of the subject string. */
start_offset = ovector[1]; ovecsave[0] = ovector[0];
goptions = (ovector[0] != ovector[1])? 0 : ovecsave[1] = ovector[1];
ovecsave[2] = start_offset;
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */ /* Copy the rest of the subject. */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2017 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
/* This table encodes the rules for finding the end of an extended grapheme /* This table encodes the rules for finding the end of an extended grapheme
cluster. Every code point has a grapheme break property which is one of the cluster. Every code point has a grapheme break property which is one of the
ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
the properties of two adjacent code points. The left property selects a word 10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
from the table, and the right property selects a bit from that word like this: code points. The left property selects a word from the table, and the right
property selects a bit from that word like this:
PRIV(ucp_gbtable)[left-property] & (1 << right-property) PRIV(ucp_gbtable)[left-property] & (1 << right-property)
@ -166,19 +167,17 @@ are implementing).
6. Do not break after Prepend characters. 6. Do not break after Prepend characters.
7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed 7. Do not break within emoji modifier sequences or emoji zwj sequences. That
by E_Modifier). Extend characters are allowed before the modifier; this is, do not break between characters with the Extended_Pictographic property.
cannot be represented in this table, the code has to deal with it. Extend and ZWJ characters are allowed between the characters; this cannot be
represented in this table, the code has to deal with it.
8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj or 8. Do not break within emoji flag sequences. That is, do not break between
E_Base_GAZ).
9. Do not break within emoji flag sequences. That is, do not break between
regional indicator (RI) symbols if there are an odd number of RI characters regional indicator (RI) symbols if there are an odd number of RI characters
before the break point. This table encodes "join RI characters"; the code before the break point. This table encodes "join RI characters"; the code
has to deal with checking for previous adjoining RIs. has to deal with checking for previous adjoining RIs.
10. Otherwise, break everywhere. 9. Otherwise, break everywhere.
*/ */
#define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ) #define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
@ -191,10 +190,7 @@ const uint32_t PRIV(ucp_gbtable)[] = {
ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */ ESZ|(1<<ucp_gbPrepend)| /* 4 Prepend */
(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)| (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
(1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)| (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
(1<<ucp_gbRegionalIndicator)| (1<<ucp_gbRegionalIndicator),
(1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
(1<<ucp_gbE_Base_GAZ)|
(1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
ESZ, /* 5 SpacingMark */ ESZ, /* 5 SpacingMark */
ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */ ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)| /* 6 L */
(1<<ucp_gbLVT), (1<<ucp_gbLVT),
@ -204,11 +200,8 @@ const uint32_t PRIV(ucp_gbtable)[] = {
ESZ|(1<<ucp_gbT), /* 10 LVT */ ESZ|(1<<ucp_gbT), /* 10 LVT */
(1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */ (1<<ucp_gbRegionalIndicator), /* 11 RegionalIndicator */
ESZ, /* 12 Other */ ESZ, /* 12 Other */
ESZ|(1<<ucp_gbE_Modifier), /* 13 E_Base */ ESZ, /* 13 ZWJ */
ESZ, /* 14 E_Modifier */ ESZ|(1<<ucp_gbExtended_Pictographic) /* 14 Extended Pictographic */
ESZ|(1<<ucp_gbE_Modifier), /* 15 E_Base_GAZ */
ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ), /* 16 ZWJ */
ESZ /* 12 Glue_After_Zwj */
}; };
#undef ESZ #undef ESZ
@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0" #define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0" #define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0" #define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" #define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" #define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
#define STRING_Han0 STR_H STR_a STR_n "\0" #define STRING_Han0 STR_H STR_a STR_n "\0"
#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0" #define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0" #define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0" #define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0" #define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0" #define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0" #define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" #define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" #define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
#define STRING_Mc0 STR_M STR_c "\0" #define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0" #define STRING_Me0 STR_M STR_e "\0"
#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0" #define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0" #define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0" #define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0" #define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sk0 STR_S STR_k "\0" #define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0" #define STRING_Sm0 STR_S STR_m "\0"
#define STRING_So0 STR_S STR_o "\0" #define STRING_So0 STR_S STR_o "\0"
#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0" #define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0" #define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" #define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0 STRING_Cyrillic0
STRING_Deseret0 STRING_Deseret0
STRING_Devanagari0 STRING_Devanagari0
STRING_Dogra0
STRING_Duployan0 STRING_Duployan0
STRING_Egyptian_Hieroglyphs0 STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0 STRING_Elbasan0
@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
STRING_Grantha0 STRING_Grantha0
STRING_Greek0 STRING_Greek0
STRING_Gujarati0 STRING_Gujarati0
STRING_Gunjala_Gondi0
STRING_Gurmukhi0 STRING_Gurmukhi0
STRING_Han0 STRING_Han0
STRING_Hangul0 STRING_Hangul0
STRING_Hanifi_Rohingya0
STRING_Hanunoo0 STRING_Hanunoo0
STRING_Hatran0 STRING_Hatran0
STRING_Hebrew0 STRING_Hebrew0
@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
STRING_Lydian0 STRING_Lydian0
STRING_M0 STRING_M0
STRING_Mahajani0 STRING_Mahajani0
STRING_Makasar0
STRING_Malayalam0 STRING_Malayalam0
STRING_Mandaic0 STRING_Mandaic0
STRING_Manichaean0 STRING_Manichaean0
@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
STRING_Masaram_Gondi0 STRING_Masaram_Gondi0
STRING_Mc0 STRING_Mc0
STRING_Me0 STRING_Me0
STRING_Medefaidrin0
STRING_Meetei_Mayek0 STRING_Meetei_Mayek0
STRING_Mende_Kikakui0 STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0 STRING_Meroitic_Cursive0
@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
STRING_Old_North_Arabian0 STRING_Old_North_Arabian0
STRING_Old_Permic0 STRING_Old_Permic0
STRING_Old_Persian0 STRING_Old_Persian0
STRING_Old_Sogdian0
STRING_Old_South_Arabian0 STRING_Old_South_Arabian0
STRING_Old_Turkic0 STRING_Old_Turkic0
STRING_Oriya0 STRING_Oriya0
@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
STRING_Sk0 STRING_Sk0
STRING_Sm0 STRING_Sm0
STRING_So0 STRING_So0
STRING_Sogdian0
STRING_Sora_Sompeng0 STRING_Sora_Sompeng0
STRING_Soyombo0 STRING_Soyombo0
STRING_Sundanese0 STRING_Sundanese0
@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
{ 265, PT_SC, ucp_Cyrillic }, { 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret }, { 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari }, { 282, PT_SC, ucp_Devanagari },
{ 293, PT_SC, ucp_Duployan }, { 293, PT_SC, ucp_Dogra },
{ 302, PT_SC, ucp_Egyptian_Hieroglyphs }, { 299, PT_SC, ucp_Duployan },
{ 323, PT_SC, ucp_Elbasan }, { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 331, PT_SC, ucp_Ethiopic }, { 329, PT_SC, ucp_Elbasan },
{ 340, PT_SC, ucp_Georgian }, { 337, PT_SC, ucp_Ethiopic },
{ 349, PT_SC, ucp_Glagolitic }, { 346, PT_SC, ucp_Georgian },
{ 360, PT_SC, ucp_Gothic }, { 355, PT_SC, ucp_Glagolitic },
{ 367, PT_SC, ucp_Grantha }, { 366, PT_SC, ucp_Gothic },
{ 375, PT_SC, ucp_Greek }, { 373, PT_SC, ucp_Grantha },
{ 381, PT_SC, ucp_Gujarati }, { 381, PT_SC, ucp_Greek },
{ 390, PT_SC, ucp_Gurmukhi }, { 387, PT_SC, ucp_Gujarati },
{ 399, PT_SC, ucp_Han }, { 396, PT_SC, ucp_Gunjala_Gondi },
{ 403, PT_SC, ucp_Hangul }, { 410, PT_SC, ucp_Gurmukhi },
{ 410, PT_SC, ucp_Hanunoo }, { 419, PT_SC, ucp_Han },
{ 418, PT_SC, ucp_Hatran }, { 423, PT_SC, ucp_Hangul },
{ 425, PT_SC, ucp_Hebrew }, { 430, PT_SC, ucp_Hanifi_Rohingya },
{ 432, PT_SC, ucp_Hiragana }, { 446, PT_SC, ucp_Hanunoo },
{ 441, PT_SC, ucp_Imperial_Aramaic }, { 454, PT_SC, ucp_Hatran },
{ 458, PT_SC, ucp_Inherited }, { 461, PT_SC, ucp_Hebrew },
{ 468, PT_SC, ucp_Inscriptional_Pahlavi }, { 468, PT_SC, ucp_Hiragana },
{ 490, PT_SC, ucp_Inscriptional_Parthian }, { 477, PT_SC, ucp_Imperial_Aramaic },
{ 513, PT_SC, ucp_Javanese }, { 494, PT_SC, ucp_Inherited },
{ 522, PT_SC, ucp_Kaithi }, { 504, PT_SC, ucp_Inscriptional_Pahlavi },
{ 529, PT_SC, ucp_Kannada }, { 526, PT_SC, ucp_Inscriptional_Parthian },
{ 537, PT_SC, ucp_Katakana }, { 549, PT_SC, ucp_Javanese },
{ 546, PT_SC, ucp_Kayah_Li }, { 558, PT_SC, ucp_Kaithi },
{ 555, PT_SC, ucp_Kharoshthi }, { 565, PT_SC, ucp_Kannada },
{ 566, PT_SC, ucp_Khmer }, { 573, PT_SC, ucp_Katakana },
{ 572, PT_SC, ucp_Khojki }, { 582, PT_SC, ucp_Kayah_Li },
{ 579, PT_SC, ucp_Khudawadi }, { 591, PT_SC, ucp_Kharoshthi },
{ 589, PT_GC, ucp_L }, { 602, PT_SC, ucp_Khmer },
{ 591, PT_LAMP, 0 }, { 608, PT_SC, ucp_Khojki },
{ 594, PT_SC, ucp_Lao }, { 615, PT_SC, ucp_Khudawadi },
{ 598, PT_SC, ucp_Latin }, { 625, PT_GC, ucp_L },
{ 604, PT_SC, ucp_Lepcha }, { 627, PT_LAMP, 0 },
{ 611, PT_SC, ucp_Limbu }, { 630, PT_SC, ucp_Lao },
{ 617, PT_SC, ucp_Linear_A }, { 634, PT_SC, ucp_Latin },
{ 626, PT_SC, ucp_Linear_B }, { 640, PT_SC, ucp_Lepcha },
{ 635, PT_SC, ucp_Lisu }, { 647, PT_SC, ucp_Limbu },
{ 640, PT_PC, ucp_Ll }, { 653, PT_SC, ucp_Linear_A },
{ 643, PT_PC, ucp_Lm }, { 662, PT_SC, ucp_Linear_B },
{ 646, PT_PC, ucp_Lo }, { 671, PT_SC, ucp_Lisu },
{ 649, PT_PC, ucp_Lt }, { 676, PT_PC, ucp_Ll },
{ 652, PT_PC, ucp_Lu }, { 679, PT_PC, ucp_Lm },
{ 655, PT_SC, ucp_Lycian }, { 682, PT_PC, ucp_Lo },
{ 662, PT_SC, ucp_Lydian }, { 685, PT_PC, ucp_Lt },
{ 669, PT_GC, ucp_M }, { 688, PT_PC, ucp_Lu },
{ 671, PT_SC, ucp_Mahajani }, { 691, PT_SC, ucp_Lycian },
{ 680, PT_SC, ucp_Malayalam }, { 698, PT_SC, ucp_Lydian },
{ 690, PT_SC, ucp_Mandaic }, { 705, PT_GC, ucp_M },
{ 698, PT_SC, ucp_Manichaean }, { 707, PT_SC, ucp_Mahajani },
{ 709, PT_SC, ucp_Marchen }, { 716, PT_SC, ucp_Makasar },
{ 717, PT_SC, ucp_Masaram_Gondi }, { 724, PT_SC, ucp_Malayalam },
{ 731, PT_PC, ucp_Mc }, { 734, PT_SC, ucp_Mandaic },
{ 734, PT_PC, ucp_Me }, { 742, PT_SC, ucp_Manichaean },
{ 737, PT_SC, ucp_Meetei_Mayek }, { 753, PT_SC, ucp_Marchen },
{ 750, PT_SC, ucp_Mende_Kikakui }, { 761, PT_SC, ucp_Masaram_Gondi },
{ 764, PT_SC, ucp_Meroitic_Cursive }, { 775, PT_PC, ucp_Mc },
{ 781, PT_SC, ucp_Meroitic_Hieroglyphs }, { 778, PT_PC, ucp_Me },
{ 802, PT_SC, ucp_Miao }, { 781, PT_SC, ucp_Medefaidrin },
{ 807, PT_PC, ucp_Mn }, { 793, PT_SC, ucp_Meetei_Mayek },
{ 810, PT_SC, ucp_Modi }, { 806, PT_SC, ucp_Mende_Kikakui },
{ 815, PT_SC, ucp_Mongolian }, { 820, PT_SC, ucp_Meroitic_Cursive },
{ 825, PT_SC, ucp_Mro }, { 837, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 829, PT_SC, ucp_Multani }, { 858, PT_SC, ucp_Miao },
{ 837, PT_SC, ucp_Myanmar }, { 863, PT_PC, ucp_Mn },
{ 845, PT_GC, ucp_N }, { 866, PT_SC, ucp_Modi },
{ 847, PT_SC, ucp_Nabataean }, { 871, PT_SC, ucp_Mongolian },
{ 857, PT_PC, ucp_Nd }, { 881, PT_SC, ucp_Mro },
{ 860, PT_SC, ucp_New_Tai_Lue }, { 885, PT_SC, ucp_Multani },
{ 872, PT_SC, ucp_Newa }, { 893, PT_SC, ucp_Myanmar },
{ 877, PT_SC, ucp_Nko }, { 901, PT_GC, ucp_N },
{ 881, PT_PC, ucp_Nl }, { 903, PT_SC, ucp_Nabataean },
{ 884, PT_PC, ucp_No }, { 913, PT_PC, ucp_Nd },
{ 887, PT_SC, ucp_Nushu }, { 916, PT_SC, ucp_New_Tai_Lue },
{ 893, PT_SC, ucp_Ogham }, { 928, PT_SC, ucp_Newa },
{ 899, PT_SC, ucp_Ol_Chiki }, { 933, PT_SC, ucp_Nko },
{ 908, PT_SC, ucp_Old_Hungarian }, { 937, PT_PC, ucp_Nl },
{ 922, PT_SC, ucp_Old_Italic }, { 940, PT_PC, ucp_No },
{ 933, PT_SC, ucp_Old_North_Arabian }, { 943, PT_SC, ucp_Nushu },
{ 951, PT_SC, ucp_Old_Permic }, { 949, PT_SC, ucp_Ogham },
{ 962, PT_SC, ucp_Old_Persian }, { 955, PT_SC, ucp_Ol_Chiki },
{ 974, PT_SC, ucp_Old_South_Arabian }, { 964, PT_SC, ucp_Old_Hungarian },
{ 992, PT_SC, ucp_Old_Turkic }, { 978, PT_SC, ucp_Old_Italic },
{ 1003, PT_SC, ucp_Oriya }, { 989, PT_SC, ucp_Old_North_Arabian },
{ 1009, PT_SC, ucp_Osage }, { 1007, PT_SC, ucp_Old_Permic },
{ 1015, PT_SC, ucp_Osmanya }, { 1018, PT_SC, ucp_Old_Persian },
{ 1023, PT_GC, ucp_P }, { 1030, PT_SC, ucp_Old_Sogdian },
{ 1025, PT_SC, ucp_Pahawh_Hmong }, { 1042, PT_SC, ucp_Old_South_Arabian },
{ 1038, PT_SC, ucp_Palmyrene }, { 1060, PT_SC, ucp_Old_Turkic },
{ 1048, PT_SC, ucp_Pau_Cin_Hau }, { 1071, PT_SC, ucp_Oriya },
{ 1060, PT_PC, ucp_Pc }, { 1077, PT_SC, ucp_Osage },
{ 1063, PT_PC, ucp_Pd }, { 1083, PT_SC, ucp_Osmanya },
{ 1066, PT_PC, ucp_Pe }, { 1091, PT_GC, ucp_P },
{ 1069, PT_PC, ucp_Pf }, { 1093, PT_SC, ucp_Pahawh_Hmong },
{ 1072, PT_SC, ucp_Phags_Pa }, { 1106, PT_SC, ucp_Palmyrene },
{ 1081, PT_SC, ucp_Phoenician }, { 1116, PT_SC, ucp_Pau_Cin_Hau },
{ 1092, PT_PC, ucp_Pi }, { 1128, PT_PC, ucp_Pc },
{ 1095, PT_PC, ucp_Po }, { 1131, PT_PC, ucp_Pd },
{ 1098, PT_PC, ucp_Ps }, { 1134, PT_PC, ucp_Pe },
{ 1101, PT_SC, ucp_Psalter_Pahlavi }, { 1137, PT_PC, ucp_Pf },
{ 1117, PT_SC, ucp_Rejang }, { 1140, PT_SC, ucp_Phags_Pa },
{ 1124, PT_SC, ucp_Runic }, { 1149, PT_SC, ucp_Phoenician },
{ 1130, PT_GC, ucp_S }, { 1160, PT_PC, ucp_Pi },
{ 1132, PT_SC, ucp_Samaritan }, { 1163, PT_PC, ucp_Po },
{ 1142, PT_SC, ucp_Saurashtra }, { 1166, PT_PC, ucp_Ps },
{ 1153, PT_PC, ucp_Sc }, { 1169, PT_SC, ucp_Psalter_Pahlavi },
{ 1156, PT_SC, ucp_Sharada }, { 1185, PT_SC, ucp_Rejang },
{ 1164, PT_SC, ucp_Shavian }, { 1192, PT_SC, ucp_Runic },
{ 1172, PT_SC, ucp_Siddham }, { 1198, PT_GC, ucp_S },
{ 1180, PT_SC, ucp_SignWriting }, { 1200, PT_SC, ucp_Samaritan },
{ 1192, PT_SC, ucp_Sinhala }, { 1210, PT_SC, ucp_Saurashtra },
{ 1200, PT_PC, ucp_Sk }, { 1221, PT_PC, ucp_Sc },
{ 1203, PT_PC, ucp_Sm }, { 1224, PT_SC, ucp_Sharada },
{ 1206, PT_PC, ucp_So }, { 1232, PT_SC, ucp_Shavian },
{ 1209, PT_SC, ucp_Sora_Sompeng }, { 1240, PT_SC, ucp_Siddham },
{ 1222, PT_SC, ucp_Soyombo }, { 1248, PT_SC, ucp_SignWriting },
{ 1230, PT_SC, ucp_Sundanese }, { 1260, PT_SC, ucp_Sinhala },
{ 1240, PT_SC, ucp_Syloti_Nagri }, { 1268, PT_PC, ucp_Sk },
{ 1253, PT_SC, ucp_Syriac }, { 1271, PT_PC, ucp_Sm },
{ 1260, PT_SC, ucp_Tagalog }, { 1274, PT_PC, ucp_So },
{ 1268, PT_SC, ucp_Tagbanwa }, { 1277, PT_SC, ucp_Sogdian },
{ 1277, PT_SC, ucp_Tai_Le }, { 1285, PT_SC, ucp_Sora_Sompeng },
{ 1284, PT_SC, ucp_Tai_Tham }, { 1298, PT_SC, ucp_Soyombo },
{ 1293, PT_SC, ucp_Tai_Viet }, { 1306, PT_SC, ucp_Sundanese },
{ 1302, PT_SC, ucp_Takri }, { 1316, PT_SC, ucp_Syloti_Nagri },
{ 1308, PT_SC, ucp_Tamil }, { 1329, PT_SC, ucp_Syriac },
{ 1314, PT_SC, ucp_Tangut }, { 1336, PT_SC, ucp_Tagalog },
{ 1321, PT_SC, ucp_Telugu }, { 1344, PT_SC, ucp_Tagbanwa },
{ 1328, PT_SC, ucp_Thaana }, { 1353, PT_SC, ucp_Tai_Le },
{ 1335, PT_SC, ucp_Thai }, { 1360, PT_SC, ucp_Tai_Tham },
{ 1340, PT_SC, ucp_Tibetan }, { 1369, PT_SC, ucp_Tai_Viet },
{ 1348, PT_SC, ucp_Tifinagh }, { 1378, PT_SC, ucp_Takri },
{ 1357, PT_SC, ucp_Tirhuta }, { 1384, PT_SC, ucp_Tamil },
{ 1365, PT_SC, ucp_Ugaritic }, { 1390, PT_SC, ucp_Tangut },
{ 1374, PT_SC, ucp_Vai }, { 1397, PT_SC, ucp_Telugu },
{ 1378, PT_SC, ucp_Warang_Citi }, { 1404, PT_SC, ucp_Thaana },
{ 1390, PT_ALNUM, 0 }, { 1411, PT_SC, ucp_Thai },
{ 1394, PT_PXSPACE, 0 }, { 1416, PT_SC, ucp_Tibetan },
{ 1398, PT_SPACE, 0 }, { 1424, PT_SC, ucp_Tifinagh },
{ 1402, PT_UCNC, 0 }, { 1433, PT_SC, ucp_Tirhuta },
{ 1406, PT_WORD, 0 }, { 1441, PT_SC, ucp_Ugaritic },
{ 1410, PT_SC, ucp_Yi }, { 1450, PT_SC, ucp_Vai },
{ 1413, PT_GC, ucp_Z }, { 1454, PT_SC, ucp_Warang_Citi },
{ 1415, PT_SC, ucp_Zanabazar_Square }, { 1466, PT_ALNUM, 0 },
{ 1432, PT_PC, ucp_Zl }, { 1470, PT_PXSPACE, 0 },
{ 1435, PT_PC, ucp_Zp }, { 1474, PT_SPACE, 0 },
{ 1438, PT_PC, ucp_Zs } { 1478, PT_UCNC, 0 },
{ 1482, PT_WORD, 0 },
{ 1486, PT_SC, ucp_Yi },
{ 1489, PT_GC, ucp_Z },
{ 1491, PT_SC, ucp_Zanabazar_Square },
{ 1508, PT_PC, ucp_Zl },
{ 1511, PT_PC, ucp_Zp },
{ 1514, PT_PC, ucp_Zs }
}; };
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016 University of Cambridge New API code Copyright (c) 2016-2018 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -100,7 +100,8 @@ enum {
ucp_Zs /* Space separator */ ucp_Zs /* Space separator */
}; };
/* These are grapheme break properties. */ /* These are grapheme break properties. The Extended Pictographic property
comes from the emoji-data.txt file. */
enum { enum {
ucp_gbCR, /* 0 */ ucp_gbCR, /* 0 */
@ -116,11 +117,8 @@ enum {
ucp_gbLVT, /* 10 Hangul syllable type LVT */ ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */ ucp_gbRegionalIndicator, /* 11 */
ucp_gbOther, /* 12 */ ucp_gbOther, /* 12 */
ucp_gbE_Base, /* 13 */ ucp_gbZWJ, /* 13 */
ucp_gbE_Modifier, /* 14 */ ucp_gbExtended_Pictographic /* 14 */
ucp_gbE_Base_GAZ, /* 15 */
ucp_gbZWJ, /* 16 */
ucp_gbGlue_After_Zwj /* 17 */
}; };
/* These are the script identifications. */ /* These are the script identifications. */
@ -274,7 +272,15 @@ enum {
ucp_Masaram_Gondi, ucp_Masaram_Gondi,
ucp_Nushu, ucp_Nushu,
ucp_Soyombo, ucp_Soyombo,
ucp_Zanabazar_Square ucp_Zanabazar_Square,
/* New for Unicode 11.0.0 */
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Sogdian
}; };
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

View File

@ -66,7 +66,7 @@
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros: Other macros:
SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/ */
@ -147,17 +147,23 @@
#define SLJIT_CONFIG_UNSUPPORTED 1 #define SLJIT_CONFIG_UNSUPPORTED 1
#endif #endif
#else /* !_WIN32 */ #else /* _WIN32 */
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1 #define SLJIT_CONFIG_X86_64 1
#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
#define SLJIT_CONFIG_ARM_THUMB2 1
#elif (defined(_M_ARM) && _M_ARM >= 7)
#define SLJIT_CONFIG_ARM_V7 1
#elif defined(_ARM_) #elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1 #define SLJIT_CONFIG_ARM_V5 1
#elif defined(_M_ARM64) || defined(__aarch64__)
#define SLJIT_CONFIG_ARM_64 1
#else #else
#define SLJIT_CONFIG_X86_32 1 #define SLJIT_CONFIG_X86_32 1
#endif #endif
#endif /* !WIN32 */ #endif /* !_WIN32 */
#endif /* SLJIT_CONFIG_AUTO */ #endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@ -324,6 +330,11 @@
sparc_cache_flush((from), (to)) sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif defined _WIN32
#define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
#else #else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */ /* Calls __ARM_NR_cacheflush on ARM-Linux. */
@ -371,12 +382,18 @@ typedef int sljit_sw;
#define SLJIT_64BIT_ARCHITECTURE 1 #define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3 #define SLJIT_WORD_SHIFT 3
#ifdef _WIN32 #ifdef _WIN32
#ifdef __GNUC__
/* These types do not require windows.h */
typedef unsigned long long sljit_uw;
typedef long long sljit_sw;
#else
typedef unsigned __int64 sljit_uw; typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw; typedef __int64 sljit_sw;
#else #endif
#else /* !_WIN32 */
typedef unsigned long int sljit_uw; typedef unsigned long int sljit_uw;
typedef long int sljit_sw; typedef long int sljit_sw;
#endif #endif /* _WIN32 */
#endif #endif
typedef sljit_uw sljit_p; typedef sljit_uw sljit_p;
@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw)) #define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)

View File

@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
void *retval; void *retval;
#ifdef MAP_ANON #ifdef MAP_ANON
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
int flags = MAP_PRIVATE | MAP_ANON;
#ifdef MAP_JIT
flags |= MAP_JIT;
#endif
retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
#else #else
if (dev_zero < 0) { if (dev_zero < 0) {
if (open_dev_zero()) if (open_dev_zero())

View File

@ -26,6 +26,13 @@
#include "sljitLir.h" #include "sljitLir.h"
#ifdef _WIN32
/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
#include <windows.h>
#endif /* _WIN32 */
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */ /* These libraries are needed for the macros below. */
@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
#endif #endif
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{ {

View File

@ -138,7 +138,7 @@ of sljitConfigInternal.h */
be specified as scratch registers and the fifth one as saved register be specified as scratch registers and the fifth one as saved register
on the CPU above and any user code which requires four scratch on the CPU above and any user code which requires four scratch
registers can run unmodified. The SLJIT compiler automatically saves registers can run unmodified. The SLJIT compiler automatically saves
the content of the two extra scrath register on the stack. Scratch the content of the two extra scratch register on the stack. Scratch
registers can also be preserved by saving their value on the stack registers can also be preserved by saving their value on the stack
but this needs to be done manually. but this needs to be done manually.
@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
register can hold any 32 or 64 bit value, and it is converted to a 32 bit register can hold any 32 or 64 bit value, and it is converted to a 32 bit
compatible format first. This conversion is free (no instructions are compatible format first. This conversion is free (no instructions are
emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension). value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
Note: memory addressing always uses 64 bit values on 64 bit systems so Note: memory addressing always uses 64 bit values on 64 bit systems so
@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
*/ */
#define SLJIT_F32_OP SLJIT_I32_OP #define SLJIT_F32_OP SLJIT_I32_OP
/* Many CPUs (x86, ARM, PPC) has status flags which can be set according /* Many CPUs (x86, ARM, PPC) have status flags which can be set according
to the result of an operation. Other CPUs (MIPS) does not have status to the result of an operation. Other CPUs (MIPS) do not have status
flags, and results must be stored in registers. To cover both architecture flags, and results must be stored in registers. To cover both architecture
types efficiently only two flags are defined by SLJIT: types efficiently only two flags are defined by SLJIT:
@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Using these flags can reduce the number of emitted instructions. E.g. a Using these flags can reduce the number of emitted instructions. E.g. a
fast loop can be implemented by decreasing a counter register and set the fast loop can be implemented by decreasing a counter register and set the
zero flag to jump back if the counter register is not reached zero. zero flag to jump back if the counter register has not reached zero.
Motivation: although CPUs can set a large number of flags, usually their Motivation: although CPUs can set a large number of flags, usually their
values are ignored or only one of them is used. Emulating a large number values are ignored or only one of them is used. Emulating a large number
of flags on systems without flag register is complicated so SLJIT of flags on systems without flag register is complicated so SLJIT
instructions must specify the flag they want to use and only that flag instructions must specify the flag they want to use and only that flag
will be emulated. The last arithmetic instruction can be repeated if will be emulated. The last arithmetic instruction can be repeated if
multiple flags needs to be checked. multiple flags need to be checked.
*/ */
/* Set Zero status flag. */ /* Set Zero status flag. */
@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op1. */ /* Starting index of opcodes for sljit_emit_op1. */
#define SLJIT_OP1_BASE 32 #define SLJIT_OP1_BASE 32
/* The MOV instruction transfer data from source to destination. /* The MOV instruction transfers data from source to destination.
MOV instruction suffixes: MOV instruction suffixes:
@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_FAST_CALL 25 #define SLJIT_FAST_CALL 25
/* Called function must be declared with the SLJIT_FUNC attribute. */ /* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 26 #define SLJIT_CALL 26
/* Called function must be decalred with cdecl attribute. /* Called function must be declared with cdecl attribute.
This is the default attribute for C functions. */ This is the default attribute for C functions. */
#define SLJIT_CALL_CDECL 27 #define SLJIT_CALL_CDECL 27
@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
/* Set the destination address of the jump to this label. */ /* Set the destination address of the jump to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
/* Emit an indirect jump or fast call. Both direct and indirect form /* Emit an indirect jump or fast call.
Direct form: set src to SLJIT_IMM() and srcw to the address Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode Indirect form: any other valid addressing mode
type must be between SLJIT_JUMP and SLJIT_FAST_CALL type must be between SLJIT_JUMP and SLJIT_FAST_CALL
@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
#define SLJIT_MEM_POST 0x1000 #define SLJIT_MEM_POST 0x1000
/* Emit a single memory load or store with update instruction. When the /* Emit a single memory load or store with update instruction. When the
requested instruction from is not supported by the CPU, it returns requested instruction form is not supported by the CPU, it returns
with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
allows specializing tight loops based on the supported instruction allows specializing tight loops based on the supported instruction
forms (see SLJIT_MEM_SUPP flag). forms (see SLJIT_MEM_SUPP flag).

View File

@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* r18 - platform register, currently not used */ /* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
}; };
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ADC 0x9a000000 #define ADC 0x9a000000
#define ADD 0x8b000000 #define ADD 0x8b000000
#define ADDE 0x8b200000
#define ADDI 0x91000000 #define ADDI 0x91000000
#define AND 0x8a000000 #define AND 0x8a000000
#define ANDI 0x92000000 #define ANDI 0x92000000
@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800 #define FSUB 0x1e603800
#define LDRI 0xf9400000 #define LDRI 0xf9400000
#define LDP 0xa9400000 #define LDP 0xa9400000
#define LDP_PST 0xa8c00000 #define LDP_PRE 0xa9c00000
#define LDR_PRE 0xf8400c00
#define LSLV 0x9ac02000 #define LSLV 0x9ac02000
#define LSRV 0x9ac02400 #define LSRV 0x9ac02400
#define MADD 0x9b000000 #define MADD 0x9b000000
@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; if (saved_regs_size & 0x8)
local_size = (local_size + 15) & ~0xf;
compiler->local_size = local_size;
if (local_size <= (63 * sizeof(sljit_sw))) {
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
offs = (local_size - saved_regs_size) << (15 - 3);
} else {
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw); saved_regs_size += sizeof(sljit_sw);
}
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; local_size = (local_size + 15) & ~0xf;
if (saved_regs_size > 0) compiler->local_size = local_size + saved_regs_size;
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
} FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
#ifdef _WIN32
if (local_size >= 4096)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
else if (local_size > 256)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
#endif
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1; prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) { for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
SLJIT_ASSERT(prev == -1); if (prev != -1)
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size > (63 * sizeof(sljit_sw))) {
/* The local_size is already adjusted by the saved registers. */ FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
}
args = get_arg_count(arg_types); args = get_arg_count(arg_types);
@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3) if (args >= 3)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 4096) {
if (local_size < 4 * 4096) {
/* No need for a loop. */
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size -= 4096;
}
else {
FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size &= 0xfff;
}
if (local_size > 256) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 256) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
#else /* !_WIN32 */
/* The local_size does not include saved registers size. */
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size != 0)
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
#endif /* _WIN32 */
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 saved_regs_size;
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
local_size = (local_size + 15) & ~0xf; if (saved_regs_size & 0x8)
compiler->local_size = local_size; saved_regs_size += sizeof(sljit_sw);
compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
local_size = compiler->local_size; saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
if (saved_regs_size & 0x8)
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
if (local_size <= (63 * sizeof(sljit_sw)))
offs = (local_size - saved_regs_size) << (15 - 3);
else {
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw); saved_regs_size += sizeof(sljit_sw);
local_size = compiler->local_size - saved_regs_size;
/* Load LR as early as possible. */
if (local_size == 0)
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
else if (local_size < 63 * sizeof(sljit_sw)) {
FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | (local_size << (15 - 3))));
} }
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; else {
if (local_size > 0xfff) { if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff; local_size &= 0xfff;
} }
if (local_size) if (local_size)
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
} }
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1; prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) { for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
SLJIT_ASSERT(prev == -1); if (prev != -1)
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size <= (63 * sizeof(sljit_sw))) { /* These two can be executed in parallel. */
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); return push_inst(compiler, RET | RN(TMP_LR));
} else if (saved_regs_size > 0) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
}
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
return SLJIT_SUCCESS;
} }
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
} }
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
sljit_s32 dst_reg;
sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (offset <= 0xffffff && offset >= -0xffffff) {
ins = ADDI;
if (offset < 0) {
offset = -offset;
ins = SUBI;
}
if (offset <= 0xfff)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
else {
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
offset &= 0xfff;
if (offset != 0)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
}
}
else {
FAIL_IF(load_immediate (compiler, dst_reg, offset));
/* Add extended register form. */
FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
}
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{ {
struct sljit_const *const_; struct sljit_const *const_;

View File

@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ASRSI 0x1000 #define ASRSI 0x1000
#define ASR_W 0xfa40f000 #define ASR_W 0xfa40f000
#define ASR_WI 0xea4f0020 #define ASR_WI 0xea4f0020
#define BCC 0xd000
#define BICI 0xf0200000 #define BICI 0xf0200000
#define BKPT 0xbe00 #define BKPT 0xbe00
#define BLX 0x4780 #define BLX 0x4780
@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040 #define EORS 0x4040
#define EOR_W 0xea800000 #define EOR_W 0xea800000
#define IT 0xbf00 #define IT 0xbf00
#define LDRI 0xf8500800
#define LSLS 0x4080 #define LSLS 0x4080
#define LSLSI 0x0000 #define LSLSI 0x0000
#define LSL_W 0xfa00f000 #define LSL_W 0xfa00f000
@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBCI 0xf1600000 #define SBCI 0xf1600000
#define SBCS 0x4180 #define SBCS 0x4180
#define SBC_W 0xeb600000 #define SBC_W 0xeb600000
#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000 #define SMULL 0xfb800000
#define STR_SP 0x9000 #define STR_SP 0x9000
#define SUBS 0x1a00 #define SUBS 0x1a00
@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SXTH 0xb200 #define SXTH 0xb200
#define SXTH_W 0xfa0ff080 #define SXTH_W 0xfa0ff080
#define TST 0x4200 #define TST 0x4200
#define UDIV 0xfbb0f0f0
#define UMULL 0xfba00000 #define UMULL 0xfba00000
#define UXTB 0xb2c0 #define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080 #define UXTB_W 0xfa5ff080
@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
/* Really complex instruction form for branches. */ /* Really complex instruction form for branches. */
s = (diff >> 23) & 0x1; s = (diff >> 23) & 0x1;
j1 = (~(diff >> 21) ^ s) & 0x1; j1 = (~(diff >> 22) ^ s) & 0x1;
j2 = (~(diff >> 22) ^ s) & 0x1; j2 = (~(diff >> 21) ^ s) & 0x1;
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
{ {
sljit_uw tmp; sljit_uw tmp;
/* MOVS cannot be used since it destroy flags. */
if (imm >= 0x10000) { if (imm >= 0x10000) {
tmp = get_imm(imm); tmp = get_imm(imm);
if (tmp != INVALID_IMM) if (tmp != INVALID_IMM)
@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{ {
sljit_s32 args, size, i, tmp; sljit_s32 args, size, i, tmp;
sljit_ins push = 0; sljit_ins push = 0;
#ifdef _WIN32
sljit_uw imm;
#endif
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size; local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096)
imm = get_imm(4096);
else
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
}
#else
if (local_size > 0) { if (local_size > 0) {
if (local_size <= (127 << 2)) if (local_size <= (127 << 2))
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
else else
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
} }
#endif
args = get_arg_count(arg_types); args = get_arg_count(arg_types);
@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3) if (args >= 3)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096) {
imm = get_imm(4096);
SLJIT_ASSERT(imm != INVALID_IMM);
if (local_size < 4 * 4096) {
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
local_size -= 4096;
SLJIT_ASSERT(local_size > 0);
}
else {
FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
local_size &= 0xfff;
if (local_size != 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
}
if (local_size >= 256) {
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
}
}
local_size &= 0xff;
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
#endif
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */ /* Operators */
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#if defined(__GNUC__) #ifdef _WIN32
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
extern long long __rt_sdiv(int denominator, int numerator);
#elif defined(__GNUC__)
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
extern int __aeabi_idivmod(int numerator, int denominator); extern int __aeabi_idivmod(int numerator, int denominator);
#else #else
@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
} }
#endif #endif
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{ {
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
sljit_sw saved_reg_list[3]; sljit_sw saved_reg_list[3];
sljit_sw saved_reg_count; sljit_sw saved_reg_count;
#endif
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op)); CHECK(check_sljit_emit_op0(compiler, op));
@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 16) | (reg_map[SLJIT_R0] << 16)
| reg_map[SLJIT_R1]); | reg_map[SLJIT_R1]);
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW: case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW: case SLJIT_DIV_UW:
@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
} }
} }
#if defined(__GNUC__) #ifdef _WIN32
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
#elif defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
#else #else
@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
} }
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
} }
return SLJIT_SUCCESS; return SLJIT_SUCCESS;

View File

@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins ins = NOP; sljit_ins ins = NOP;
sljit_u8 offsets[4]; sljit_u8 offsets[4];
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT; arg_types >>= SLJIT_DEF_SHIFT;
@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
else if (arg_count != word_arg_count) else if (arg_count != word_arg_count)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
else if (arg_count == 1) else if (arg_count == 1)
ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--; arg_count--;
word_arg_count--; word_arg_count--;

View File

@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins prev_ins = NOP; sljit_ins prev_ins = NOP;
sljit_ins ins = NOP; sljit_ins ins = NOP;
SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT; arg_types >>= SLJIT_DEF_SHIFT;
@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
if (arg_count != word_arg_count) if (arg_count != word_arg_count)
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
else if (arg_count == 1) else if (arg_count == 1)
ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--; arg_count--;
word_arg_count--; word_arg_count--;
break; break;

View File

@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
#define RETURN_ADDR_REG 31 #define RETURN_ADDR_REG 31
/* Flags are kept in volatile registers. */ /* Flags are kept in volatile registers. */
#define EQUAL_FLAG 31 #define EQUAL_FLAG 3
#define OTHER_FLAG 1 #define OTHER_FLAG 1
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
}; };
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
/* Frequent case. */ /* Frequent case. */
FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
base = S(SLJIT_SP); base = S(SLJIT_SP);
offs = local_size - (sljit_sw)sizeof(sljit_sw);
} }
else { else {
FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
base = S(TMP_REG2); base = S(TMP_REG2);
local_size = 0; local_size = 0;
offs = -(sljit_sw)sizeof(sljit_sw);
} }
offs = local_size - (sljit_sw)(sizeof(sljit_sw));
FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
tmp_ar = reg_ar; tmp_ar = reg_ar;
delay_slot = reg_ar; delay_slot = reg_ar;
} else { }
else {
tmp_ar = DR(TMP_REG1); tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS; delay_slot = MOVABLE_INS;
} }
@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
{ {
sljit_s32 tmp_ar, base, delay_slot;
if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
return compiler->error; return compiler->error;
compiler->cache_arg = 0;
compiler->cache_argw = 0; if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); tmp_ar = reg_ar;
delay_slot = reg_ar;
}
else {
tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS;
}
base = arg & REG_MASK;
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
argw &= 0x3;
if (SLJIT_UNLIKELY(argw)) {
FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
}
else
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
FAIL_IF(load_immediate(compiler, tmp_ar, argw));
if (base != 0)
FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
} }
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)

View File

@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) { if (args > 0) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
inst += 2;
} }
if (args > 1) { if (args > 1) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
inst += 2;
} }
if (args > 2) { if (args > 2) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
*inst++ = 0x24; inst[2] = 0x24;
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
} }
#else #else
if (args > 0) { if (args > 0) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 2; inst[2] = sizeof(sljit_sw) * 2;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 3; inst[2] = sizeof(sljit_sw) * 3;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 4; inst[2] = sizeof(sljit_sw) * 4;
} }
#endif #endif
@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN32 #ifdef _WIN32
if (local_size > 1024) { if (local_size > 0) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (local_size <= 4 * 4096) {
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); if (local_size > 4096)
#else EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
/* Space for a single argument. This amount is excluded when the stack is allocated below. */ if (local_size > 2 * 4096)
local_size -= sizeof(sljit_sw); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); if (local_size > 3 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw))); SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
#endif FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -16;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
} }
#endif #endif

View File

@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
compiler->mode32 = 0;
#ifdef _WIN64 #ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */ /* Two/four register slots for parameters plus space for xmm6 register if needed. */
if (fscratches >= 6 || fsaveds >= 1) if (fscratches >= 6 || fsaveds >= 1)
@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifndef _WIN64 #ifndef _WIN64
if (args > 0) { if (args > 0) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = REX_W | REX_R; inst[0] = REX_W | REX_R;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = REX_W | REX_R; inst[0] = REX_W | REX_R;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
} }
#else #else
if (args > 0) { if (args > 0) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = REX_W | REX_B; inst[0] = REX_W | REX_B;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
} }
#endif #endif
} }
@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN64 #ifdef _WIN64
if (local_size > 1024) { if (local_size > 0) {
/* Allocate stack for the callback, which grows the stack. */ if (local_size <= 4 * 4096) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); if (local_size > 4096)
FAIL_IF(!inst); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
INC_SIZE(4 + (3 + sizeof(sljit_s32))); if (local_size > 2 * 4096)
*inst++ = REX_W; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
*inst++ = GROUP_BINARY_83; if (local_size > 3 * 4096)
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
/* Allocated size for registers must be divisible by 8. */
SLJIT_ASSERT(!(saved_register_size & 0x7));
/* Aligned to 16 byte. */
if (saved_register_size & 0x8) {
*inst++ = 5 * sizeof(sljit_sw);
local_size -= 5 * sizeof(sljit_sw);
} else {
*inst++ = 4 * sizeof(sljit_sw);
local_size -= 4 * sizeof(sljit_sw);
} }
/* Second instruction */ else {
SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
*inst++ = REX_W; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
*inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0]; SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
compiler->skip_checks = 1; SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
#endif FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -19;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
} }
#endif #endif
if (local_size > 0) { if (local_size > 0) {
if (local_size <= 127) { FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
*inst++ = local_size;
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
sljit_unaligned_store_s32(inst, local_size);
inst += sizeof(sljit_s32);
}
} }
#ifdef _WIN64 #ifdef _WIN64

View File

@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
#ifdef _WIN32
#include <malloc.h>
static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
{
/* Workaround for calling the internal _chkstk() function on Windows.
This function touches all 4k pages belongs to the requested stack space,
which size is passed in local_size. This is necessary on Windows where
the stack can only grow in 4k steps. However, this function just burn
CPU cycles if the stack is large enough. However, you don't know it in
advance, so it must always be called. I think this is a bad design in
general even if it has some reasons. */
*(volatile sljit_s32*)alloca(local_size) = 0;
}
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c" #include "sljitNativeX86_32.c"
#else #else