Upgrade the PCRE bundle to 8.34

New upstream version, changelogs at:
http://pcre.org/news.txt
http://pcre.org/changelog.txt

Qt still requires 8.30.

Change-Id: I76794a3079601b07c469b952367f71f794079edc
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
This commit is contained in:
Giuseppe D'Angelo 2013-12-16 11:11:47 +01:00 committed by The Qt Project
parent 2975aa39e6
commit 492d922207
35 changed files with 6630 additions and 3680 deletions

View File

@ -8,7 +8,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
All rights reserved
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
Copyright(c) 2010-2013 Zoltan Herczeg
All rights reserved.
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
Copyright(c) 2009-2013 Zoltan Herczeg
All rights reserved.

View File

@ -24,7 +24,7 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service,
Cambridge, England.
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
All rights reserved.
@ -35,7 +35,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
Copyright(c) 2010-2013 Zoltan Herczeg
All rights reserved.
@ -46,7 +46,7 @@ Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
Copyright(c) 2009-2013 Zoltan Herczeg
All rights reserved.

View File

@ -8,6 +8,7 @@
#define MAX_NAME_COUNT 10000
#define MAX_NAME_SIZE 32
#define NEWLINE 10
#define PARENS_NEST_LIMIT 250
#define POSIX_MALLOC_THRESHOLD 10
#define SUPPORT_UCP

View File

@ -1,18 +0,0 @@
Index: sljit/sljitConfigInternal.h
===================================================================
--- sljit/sljitConfigInternal.h (revision 1339)
+++ sljit/sljitConfigInternal.h (working copy)
@@ -221,6 +221,13 @@
#define SLJIT_CACHE_FLUSH(from, to) \
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
+#elif defined __ANDROID__
+
+/* Android lacks __clear_cache; instead, cacheflush should be used. */
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+ cacheflush((long)(from), (long)(to), 0)
+
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */

View File

@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 32
#define PCRE_MINOR 34
#define PCRE_PRERELEASE
#define PCRE_DATE 2012-11-30
#define PCRE_DATE 2013-12-15
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@ -96,11 +96,14 @@ extern "C" {
#endif
/* Public options. Some are compile-time only, some are run-time only, and some
are both, so we keep them all distinct. However, almost all the bits in the
options word are now used. In the long run, we may have to re-use some of the
compile-time only bits for runtime options, or vice versa. Any of the
compile-time options may be inspected during studying (and therefore JIT
compiling).
are both. Most of the compile-time options are saved with the compiled regex so
that they can be inspected during studying (and therefore JIT compiling). Note
that pcre_study() has its own set of options. Originally, all the options
defined here used distinct bits. However, almost all the bits in a 32-bit word
are now used, so in order to conserve them, option bits that were previously
only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
also be used for compile-time options that affect only compiling and are not
relevant for studying or JIT compiling.
Some options for pcre_compile() change its behaviour but do not affect the
behaviour of the execution functions. Other options are passed through to the
@ -142,8 +145,15 @@ with J. */
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
#define PCRE_DFA_SHORTEST 0x00010000 /* D */
#define PCRE_DFA_RESTART 0x00020000 /* D */
/* This pair use the same bit. */
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
/* This pair use the same bit. */
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
#define PCRE_DUPNAMES 0x00080000 /* C1 */
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
@ -199,6 +209,7 @@ with J. */
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
@ -224,7 +235,7 @@ with J. */
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
#define PCRE_UTF8_ERR22 22
#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
/* Specific error codes for UTF-16 validity checks */
@ -232,13 +243,13 @@ with J. */
#define PCRE_UTF16_ERR1 1
#define PCRE_UTF16_ERR2 2
#define PCRE_UTF16_ERR3 3
#define PCRE_UTF16_ERR4 4
#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
/* Specific error codes for UTF-32 validity checks */
#define PCRE_UTF32_ERR0 0
#define PCRE_UTF32_ERR1 1
#define PCRE_UTF32_ERR2 2
#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
#define PCRE_UTF32_ERR3 3
/* Request types for pcre_fullinfo() */
@ -263,10 +274,13 @@ with J. */
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
#define PCRE_INFO_FIRSTCHARACTER 19
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_FIRSTCHARACTER 19
#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
#define PCRE_INFO_REQUIREDCHARFLAGS 22
#define PCRE_INFO_REQUIREDCHARFLAGS 22
#define PCRE_INFO_MATCHLIMIT 23
#define PCRE_INFO_RECURSIONLIMIT 24
#define PCRE_INFO_MATCH_EMPTY 25
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
@ -284,6 +298,7 @@ compatible. */
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
#define PCRE_CONFIG_UTF32 12
#define PCRE_CONFIG_PARENS_LIMIT 13
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
@ -645,6 +660,9 @@ PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
pcre16_jit_callback, void *);
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
pcre32_jit_callback, void *);
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -69,7 +69,7 @@ PCRE_UTF16_ERR0 No error
PCRE_UTF16_ERR1 Missing low surrogate at the end of the string
PCRE_UTF16_ERR2 Invalid low surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
PCRE_UTF16_ERR4 Non-character
PCRE_UTF16_ERR4 Unused (was non-character)
Arguments:
string points to the string
@ -100,19 +100,10 @@ for (p = string; length-- > 0; p++)
if ((c & 0xf800) != 0xd800)
{
/* Normal UTF-16 code point. Neither high nor low surrogate. */
/* Check for non-characters */
if ((c & 0xfffeu) == 0xfffeu || (c >= 0xfdd0u && c <= 0xfdefu))
{
*erroroffset = p - string;
return PCRE_UTF16_ERR4;
}
}
else if ((c & 0x0400) == 0)
{
/* High surrogate. */
/* Must be a followed by a low surrogate. */
/* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = p - string;
@ -125,16 +116,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = p - string;
return PCRE_UTF16_ERR2;
}
else
{
/* Valid surrogate, but check for non-characters */
c = (((c & 0x3ffu) << 10) | (*p & 0x3ffu)) + 0x10000u;
if ((c & 0xfffeu) == 0xfffeu)
{
*erroroffset = p - string;
return PCRE_UTF16_ERR4;
}
}
}
else
{

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -126,14 +126,15 @@ if (re->magic_number == MAGIC_NUMBER)
}
if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
re->magic_number = MAGIC_NUMBER;
re->size = swap_uint32(re->size);
re->options = swap_uint32(re->options);
re->flags = swap_uint16(re->flags);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
re->flags = swap_uint32(re->flags);
re->limit_match = swap_uint32(re->limit_match);
re->limit_recursion = swap_uint32(re->limit_recursion);
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
re->first_char = swap_uint16(re->first_char);
re->req_char = swap_uint16(re->req_char);
@ -141,15 +142,15 @@ re->req_char = swap_uint16(re->req_char);
re->first_char = swap_uint32(re->first_char);
re->req_char = swap_uint32(re->req_char);
#endif
re->max_lookbehind = swap_uint16(re->max_lookbehind);
re->top_bracket = swap_uint16(re->top_bracket);
re->top_backref = swap_uint16(re->top_backref);
re->name_table_offset = swap_uint16(re->name_table_offset);
re->name_entry_size = swap_uint16(re->name_entry_size);
re->name_count = swap_uint16(re->name_count);
re->ref_count = swap_uint16(re->ref_count);
re->tables = tables;
#ifdef COMPILE_PCRE32
re->dummy1 = swap_uint16(re->dummy1);
re->dummy2 = swap_uint16(re->dummy2);
#endif
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
{

View File

@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */

File diff suppressed because it is too large Load Diff

View File

@ -161,6 +161,10 @@ switch (what)
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
case PCRE_CONFIG_PARENS_LIMIT:
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned long int *)where) = MATCH_LIMIT;
break;

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see
below for why this module is different).
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -120,7 +120,7 @@ static const pcre_uint8 coptable[] = {
0, 0, /* \P, \p */
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
0, /* \X */
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@ -151,11 +151,14 @@ static const pcre_uint8 coptable[] = {
/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
0, /* CLASS */
0, /* NCLASS */
0, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@ -171,8 +174,8 @@ static const pcre_uint8 coptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, NCREF */
0, 0, /* RREF, NRREF */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@ -194,7 +197,7 @@ static const pcre_uint8 poptable[] = {
1, 1, /* \P, \p */
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
1, /* \X */
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
1, /* Char */
1, /* Chari */
1, /* not */
@ -220,11 +223,14 @@ static const pcre_uint8 poptable[] = {
/* Character class & ref repeats */
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
1, 1, /* CRRANGE, CRMINRANGE */
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
1, /* CLASS */
1, /* NCLASS */
1, /* XCLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
0, /* DNREFI */
0, /* RECURSE */
0, /* CALLOUT */
0, /* Alt */
@ -240,8 +246,8 @@ static const pcre_uint8 poptable[] = {
0, 0, /* ONCE, ONCE_NC */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
0, 0, /* CREF, NCREF */
0, 0, /* RREF, NRREF */
0, 0, /* CREF, DNCREF */
0, 0, /* RREF, DNRREF */
0, /* DEF */
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
@ -636,7 +642,7 @@ for (;;)
const pcre_uchar *code;
int state_offset = current_state->offset;
int codevalue, rrc;
unsigned int count;
int count;
#ifdef PCRE_DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@ -1094,15 +1100,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@ -1120,6 +1134,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1249,7 +1269,7 @@ for (;;)
(d != OP_ANY || !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else
{ ADD_NEW(state_offset, count); }
@ -1283,7 +1303,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@ -1338,15 +1358,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@ -1364,6 +1392,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1576,15 +1610,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@ -1602,6 +1644,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1705,7 +1753,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);
ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;
default:
@ -1749,7 +1797,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, 0);
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@ -1790,7 +1838,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW_DATA(-(state_offset + count), 0, 0);
ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}
}
break;
@ -1839,15 +1887,23 @@ for (;;)
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;
case PT_SPACE: /* Perl space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
OK = TRUE;
break;
default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
}
break;
case PT_WORD:
@ -1865,6 +1921,12 @@ for (;;)
}
break;
case PT_UCNC:
OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
c >= 0xe000;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1879,7 +1941,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else
{ ADD_NEW(state_offset, count); }
@ -1918,7 +1980,7 @@ for (;;)
}
if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
reset_could_continue = TRUE;
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@ -1960,7 +2022,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else
{ ADD_NEW_DATA(-state_offset, count, ncount); }
@ -2000,7 +2062,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@ -2037,7 +2099,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else
{ ADD_NEW_DATA(-state_offset, count, 0); }
@ -2407,7 +2469,7 @@ for (;;)
}
if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
{
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@ -2456,7 +2518,7 @@ for (;;)
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= GET2(code, 1))
if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else
{ ADD_NEW(state_offset, count); }
@ -2509,31 +2571,65 @@ for (;;)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRPOSSTAR:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass) { ADD_NEW(state_offset, 0); }
if (isinclass)
{
if (*ecode == OP_CRPOSSTAR)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW(state_offset, 0);
}
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
if (isinclass) { count++; ADD_NEW(state_offset, count); }
if (isinclass)
{
if (count > 0 && *ecode == OP_CRPOSPLUS)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
count++;
ADD_NEW(state_offset, count);
}
break;
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSQUERY:
ADD_ACTIVE(next_state_offset + 1, 0);
if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
if (isinclass)
{
if (*ecode == OP_CRPOSQUERY)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
ADD_NEW(next_state_offset + 1, 0);
}
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
count = current_state->count; /* Already matched */
if (count >= GET2(ecode, 1))
if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)
{
unsigned int max = GET2(ecode, 1 + IMM2_SIZE);
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (*ecode == OP_CRPOSRANGE)
{
active_count--; /* Remove non-match possibility */
next_active_state--;
}
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
@ -2633,9 +2729,11 @@ for (;;)
condcode = code[LINK_SIZE+1];
/* Back reference conditions are not supported */
/* Back reference conditions and duplicate named recursion conditions
are not supported */
if (condcode == OP_CREF || condcode == OP_NCREF)
if (condcode == OP_CREF || condcode == OP_DNCREF ||
condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
/* The DEFINE condition is always false */
@ -2647,7 +2745,7 @@ for (;;)
which means "test if in any recursion". We can't test for specifically
recursed groups. */
else if (condcode == OP_RREF || condcode == OP_NRREF)
else if (condcode == OP_RREF)
{
int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
@ -3023,15 +3121,7 @@ for (;;)
ptr > md->start_used_ptr) /* Inspected non-empty string */
)
)
{
if (offsetcount >= 2)
{
offsets[0] = (int)(md->start_used_ptr - start_subject);
offsets[1] = (int)(end_subject - start_subject);
}
match_count = PCRE_ERROR_PARTIAL;
}
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
rlevel*2-2, SP));
@ -3545,7 +3635,17 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
if (rc != PCRE_ERROR_NOMATCH || anchored)
{
if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
{
offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
if (offsetcount > 2)
offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
}
return rc;
}
/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -222,6 +222,20 @@ switch (what)
*((int *)where) = re->max_lookbehind;
break;
case PCRE_INFO_MATCHLIMIT:
if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_match;
break;
case PCRE_INFO_RECURSIONLIMIT:
if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
*((pcre_uint32 *)where) = re->limit_recursion;
break;
case PCRE_INFO_MATCH_EMPTY:
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}

View File

@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -194,23 +194,31 @@ preprocessor time in standard C environments. */
typedef unsigned char pcre_uint8;
#if USHRT_MAX == 65535
typedef unsigned short pcre_uint16;
typedef short pcre_int16;
typedef unsigned short pcre_uint16;
typedef short pcre_int16;
#define PCRE_UINT16_MAX USHRT_MAX
#define PCRE_INT16_MAX SHRT_MAX
#elif UINT_MAX == 65535
typedef unsigned int pcre_uint16;
typedef int pcre_int16;
typedef unsigned int pcre_uint16;
typedef int pcre_int16;
#define PCRE_UINT16_MAX UINT_MAX
#define PCRE_INT16_MAX INT_MAX
#else
# error Cannot determine a type for 16-bit unsigned integers
#error Cannot determine a type for 16-bit integers
#endif
#if UINT_MAX == 4294967295
typedef unsigned int pcre_uint32;
typedef int pcre_int32;
#elif ULONG_MAX == 4294967295
typedef unsigned long int pcre_uint32;
typedef long int pcre_int32;
#if UINT_MAX == 4294967295U
typedef unsigned int pcre_uint32;
typedef int pcre_int32;
#define PCRE_UINT32_MAX UINT_MAX
#define PCRE_INT32_MAX INT_MAX
#elif ULONG_MAX == 4294967295UL
typedef unsigned long int pcre_uint32;
typedef long int pcre_int32;
#define PCRE_UINT32_MAX ULONG_MAX
#define PCRE_INT32_MAX LONG_MAX
#else
# error Cannot determine a type for 32-bit unsigned integers
#error Cannot determine a type for 32-bit integers
#endif
/* When checking for integer overflow in pcre_compile(), we need to handle
@ -1121,23 +1129,27 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
/* Private flags containing information about the compiled regex. They used to
live at the top end of the options word, but that got almost full, so now they
are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
the restrictions on partial matching have been lifted. It remains for backwards
live at the top end of the options word, but that got almost full, so they were
moved to a 16-bit flags word - which got almost full, so now they are in a
32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
#define PCRE_FIRSTSET 0x0010 /* first_char is set */
#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
#define PCRE_REQCHSET 0x0040 /* req_byte is set */
#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
#define PCRE_JCHANGED 0x0400 /* j option used in regex */
#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */
#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */
#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */
#define PCRE_FIRSTSET 0x00000010 /* first_char is set */
#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */
#define PCRE_REQCHSET 0x00000040 /* req_byte is set */
#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */
#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */
#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */
#define PCRE_JCHANGED 0x00000400 /* j option used in regex */
#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
#define PCRE_MLSET 0x00002000 /* match limit set by regex */
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
@ -1162,9 +1174,10 @@ time, run time, or study time, respectively. */
#define PUBLIC_COMPILE_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
@ -1520,20 +1533,25 @@ a positive value. */
#define STRING_xdigit "xdigit"
#define STRING_DEFINE "DEFINE"
#define STRING_WEIRD_STARTWORD "[:<:]]"
#define STRING_WEIRD_ENDWORD "[:>:]]"
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
#define STRING_CRLF_RIGHTPAR "CRLF)"
#define STRING_ANY_RIGHTPAR "ANY)"
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
#define STRING_UTF8_RIGHTPAR "UTF8)"
#define STRING_UTF16_RIGHTPAR "UTF16)"
#define STRING_UTF32_RIGHTPAR "UTF32)"
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_CR_RIGHTPAR "CR)"
#define STRING_LF_RIGHTPAR "LF)"
#define STRING_CRLF_RIGHTPAR "CRLF)"
#define STRING_ANY_RIGHTPAR "ANY)"
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
#define STRING_UTF8_RIGHTPAR "UTF8)"
#define STRING_UTF16_RIGHTPAR "UTF16)"
#define STRING_UTF32_RIGHTPAR "UTF32)"
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@ -1781,20 +1799,25 @@ only. */
#define STRING_xdigit STR_x STR_d STR_i STR_g STR_i STR_t
#define STRING_DEFINE STR_D STR_E STR_F STR_I STR_N STR_E
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@ -1835,6 +1858,18 @@ only. */
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
#define PT_UCNC 10 /* Universal Character nameable character */
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
/* The following special properties are used only in XCLASS items, when POSIX
classes are specified and PCRE_UCP is set - in other words, for Unicode
handling of these classes. They are not available via the \p or \P escapes like
those in the above list, and so they do not take part in the autopossessifying
table. */
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
@ -1849,9 +1884,9 @@ contain characters with values greater than 255. */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns
0 for a data character. Also, they must appear in the same order as in the opcode
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
value such as \n. They must have non-zero values, as check_escape() returns 0
for a data character. Also, they must appear in the same order as in the
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
non-DOTALL mode, "." behaves like \N.
@ -1874,12 +1909,31 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
ESC_E, ESC_Q, ESC_g, ESC_k,
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
OP_EOD must correspond in order to the list of escapes immediately above.
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
that follow must also be updated to match. There are also tables called
"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
/********************** Opcode definitions ******************/
/****** NOTE NOTE NOTE ******
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
order to the list of escapes immediately above. Furthermore, values up to
OP_DOLLM must not be changed without adjusting the table called autoposstab in
pcre_compile.c
Whenever this list is updated, the two macro definitions that follow must be
updated to match. The possessification table called "opcode_possessify" in
pcre_compile.c must also be updated, and also the tables called "coptable"
and "poptable" in pcre_dfa_exec.c.
****** NOTE NOTE NOTE ******/
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
are used in a table for deciding whether a repeated character type can be
auto-possessified. */
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
enum {
OP_END, /* 0 End of pattern */
@ -1912,10 +1966,15 @@ enum {
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
OP_EOD, /* 24 End of data (\z) */
OP_CIRC, /* 25 Start of line - not multiline */
OP_CIRCM, /* 26 Start of line - multiline */
OP_DOLL, /* 27 End of line - not multiline */
OP_DOLLM, /* 28 End of line - multiline */
/* Line end assertions */
OP_DOLL, /* 25 End of line - not multiline */
OP_DOLLM, /* 26 End of line - multiline */
OP_CIRC, /* 27 Start of line - not multiline */
OP_CIRCM, /* 28 Start of line - multiline */
/* Single characters; caseful must precede the caseless ones */
OP_CHAR, /* 29 Match one character, casefully */
OP_CHARI, /* 30 Match one character, caselessly */
OP_NOT, /* 31 Match one character, not the given one, casefully */
@ -1924,7 +1983,7 @@ enum {
/* The following sets of 13 opcodes must always be kept in step because
the offset from the first one is used to generate the others. */
/**** Single characters, caseful, must precede the caseless ones ****/
/* Repeated characters; caseful must precede the caseless ones */
OP_STAR, /* 33 The maximizing and minimizing versions of */
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
@ -1942,7 +2001,7 @@ enum {
OP_POSQUERY, /* 44 Posesssified query, caseful */
OP_POSUPTO, /* 45 Possessified upto, caseful */
/**** Single characters, caseless, must follow the caseful ones */
/* Repeated characters; caseless must follow the caseful ones */
OP_STARI, /* 46 */
OP_MINSTARI, /* 47 */
@ -1960,8 +2019,8 @@ enum {
OP_POSQUERYI, /* 57 Posesssified query, caseless */
OP_POSUPTOI, /* 58 Possessified upto, caseless */
/**** The negated ones must follow the non-negated ones, and match them ****/
/**** Negated single character, caseful; must precede the caseless ones ****/
/* The negated ones must follow the non-negated ones, and match them */
/* Negated repeated character, caseful; must precede the caseless ones */
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
@ -1979,7 +2038,7 @@ enum {
OP_NOTPOSQUERY, /* 70 */
OP_NOTPOSUPTO, /* 71 */
/**** Negated single character, caseless; must follow the caseful ones ****/
/* Negated repeated character, caseless; must follow the caseful ones */
OP_NOTSTARI, /* 72 */
OP_NOTMINSTARI, /* 73 */
@ -1997,7 +2056,7 @@ enum {
OP_NOTPOSQUERYI, /* 83 */
OP_NOTPOSUPTOI, /* 84 */
/**** Character types ****/
/* Character types */
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
@ -2028,89 +2087,96 @@ enum {
OP_CRRANGE, /* 104 These are different to the three sets above. */
OP_CRMINRANGE, /* 105 */
OP_CRPOSSTAR, /* 106 Possessified versions */
OP_CRPOSPLUS, /* 107 */
OP_CRPOSQUERY, /* 108 */
OP_CRPOSRANGE, /* 109 */
/* End of quantifier opcodes */
OP_CLASS, /* 106 Match a character class, chars < 256 only */
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
OP_CLASS, /* 110 Match a character class, chars < 256 only */
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
class - the difference is relevant only when a
character > 255 is encountered. */
OP_XCLASS, /* 108 Extended class for handling > 255 chars within the
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
class. This does both positive and negative. */
OP_REF, /* 109 Match a back reference, casefully */
OP_REFI, /* 110 Match a back reference, caselessly */
OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
OP_CALLOUT, /* 112 Call out to external function if provided */
OP_REF, /* 113 Match a back reference, casefully */
OP_REFI, /* 114 Match a back reference, caselessly */
OP_DNREF, /* 115 Match a duplicate name backref, casefully */
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
OP_CALLOUT, /* 118 Call out to external function if provided */
OP_ALT, /* 113 Start of alternation */
OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
OP_KETRMAX, /* 115 These two must remain together and in this */
OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
OP_KETRPOS, /* 117 Possessive unlimited repeat. */
OP_ALT, /* 119 Start of alternation */
OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
OP_KETRMAX, /* 121 These two must remain together and in this */
OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
OP_KETRPOS, /* 123 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
OP_ASSERT, /* 119 Positive lookahead */
OP_ASSERT_NOT, /* 120 Negative lookahead */
OP_ASSERTBACK, /* 121 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
OP_ASSERT, /* 125 Positive lookahead */
OP_ASSERT_NOT, /* 126 Negative lookahead */
OP_ASSERTBACK, /* 127 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
OP_ONCE, /* 123 Atomic group, contains captures */
OP_ONCE_NC, /* 124 Atomic group containing no captures */
OP_BRA, /* 125 Start of non-capturing bracket */
OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 127 Start of capturing bracket */
OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
OP_COND, /* 129 Conditional group */
OP_ONCE, /* 129 Atomic group, contains captures */
OP_ONCE_NC, /* 130 Atomic group containing no captures */
OP_BRA, /* 131 Start of non-capturing bracket */
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
OP_CBRA, /* 133 Start of capturing bracket */
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 132 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 134 Conditional group, check empty */
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
OP_SCBRA, /* 138 Start of capturing bracket, check empty */
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
OP_CREF, /* 135 Used to hold a capture number as condition */
OP_NCREF, /* 136 Same, but generated by a name reference*/
OP_RREF, /* 137 Used to hold a recursion number as condition */
OP_NRREF, /* 138 Same, but generated by a name reference*/
OP_DEF, /* 139 The DEFINE condition */
OP_CREF, /* 141 Used to hold a capture number as condition */
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
OP_RREF, /* 143 Used to hold a recursion number as condition */
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
OP_DEF, /* 145 The DEFINE condition */
OP_BRAZERO, /* 140 These two must remain together and in this */
OP_BRAMINZERO, /* 141 order. */
OP_BRAPOSZERO, /* 142 */
OP_BRAZERO, /* 146 These two must remain together and in this */
OP_BRAMINZERO, /* 147 order. */
OP_BRAPOSZERO, /* 148 */
/* These are backtracking control verbs */
OP_MARK, /* 143 always has an argument */
OP_PRUNE, /* 144 */
OP_PRUNE_ARG, /* 145 same, but with argument */
OP_SKIP, /* 146 */
OP_SKIP_ARG, /* 147 same, but with argument */
OP_THEN, /* 148 */
OP_THEN_ARG, /* 149 same, but with argument */
OP_COMMIT, /* 150 */
OP_MARK, /* 149 always has an argument */
OP_PRUNE, /* 150 */
OP_PRUNE_ARG, /* 151 same, but with argument */
OP_SKIP, /* 152 */
OP_SKIP_ARG, /* 153 same, but with argument */
OP_THEN, /* 154 */
OP_THEN_ARG, /* 155 same, but with argument */
OP_COMMIT, /* 156 */
/* These are forced failure and success verbs */
OP_FAIL, /* 151 */
OP_ACCEPT, /* 152 */
OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
OP_FAIL, /* 157 */
OP_ACCEPT, /* 158 */
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 155 */
OP_SKIPZERO, /* 161 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@ -2121,7 +2187,8 @@ enum {
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
definitions that follow must also be updated to match. There are also tables
called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
pcre_dfa_exec.c that must be updated. */
/* This macro defines textual names for all the opcodes. These are used only
@ -2134,7 +2201,7 @@ some cases doesn't actually use these names at all). */
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
"extuni", "\\Z", "\\z", \
"^", "^", "$", "$", "char", "chari", "not", "noti", \
"$", "$", "^", "^", "char", "chari", "not", "noti", \
"*", "*?", "+", "+?", "?", "??", \
"{", "{", "{", \
"*+","++", "?+", "{", \
@ -2150,7 +2217,8 @@ some cases doesn't actually use these names at all). */
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", \
"class", "nclass", "xclass", "Ref", "Refi", \
"*+","++", "?+", "{", \
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
@ -2159,7 +2227,7 @@ some cases doesn't actually use these names at all). */
"Cond", \
"SBra", "SBraPos", "SCBra", "SCBraPos", \
"SCond", \
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
"Brazero", "Braminzero", "Braposzero", \
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
@ -2184,7 +2252,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
3, 3, /* \P, \p */ \
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
1, /* \X */ \
1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
2, /* Char - the minimum length */ \
2, /* Chari - the minimum length */ \
2, /* not */ \
@ -2215,11 +2283,14 @@ in UTF-8 mode. The code that uses this table must know about such things. */
/* Character class & ref repeats */ \
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
0, /* XCLASS - variable length */ \
1+IMM2_SIZE, /* REF */ \
1+IMM2_SIZE, /* REFI */ \
1+2*IMM2_SIZE, /* DNREF */ \
1+2*IMM2_SIZE, /* DNREFI */ \
1+LINK_SIZE, /* RECURSE */ \
2+2*LINK_SIZE, /* CALLOUT */ \
1+LINK_SIZE, /* Alt */ \
@ -2244,8 +2315,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
1+LINK_SIZE, /* SCOND */ \
1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \
1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
1, /* DEF */ \
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
@ -2254,8 +2325,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
condition. */
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
#define RREF_ANY 0xffff
@ -2270,9 +2340,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
ERR80, ERR81, ERR82, ERR83, ERR84, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
JIT_NUMBER_OF_COMPILE_MODES };
@ -2280,48 +2352,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
code vector run on as long as necessary after the end. We store an explicit
offset to the name table so that if a regex is compiled on one host, saved, and
then run on another where the size of pointers is different, all might still
be well. For the case of compiled-on-4 and run-on-8, we include an extra
pointer that is always NULL. For future-proofing, a few dummy fields were
originally included - even though you can never get this planning right - but
there is only one left now.
be well.
NOTE NOTE NOTE:
Because people can now save and re-use compiled patterns, any additions to this
structure should be made at the end, and something earlier (e.g. a new
flag in the options or one of the dummy fields) should indicate that the new
fields are present. Currently PCRE always sets the dummy fields to zero.
NOTE NOTE NOTE
The size of the structure must be a multiple of 8 bytes. For the case of
compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
that there are an even number of pointers which therefore are a multiple of 8
bytes.
It is necessary to fork the struct for the 32 bit library, since it needs to
use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
variants.
*** WARNING ***
When new fields are added to these structures, remember to adjust the code in
pcre_byte_order.c that is concerned with swapping the byte order of the fields
when a compiled regex is reloaded on a host with different endianness.
*** WARNING ***
There is also similar byte-flipping code in pcretest.c, which is used for
testing the byte-flipping features. It must also be kept in step.
*** WARNING ***
*/
#if defined COMPILE_PCRE8
#define REAL_PCRE real_pcre
#elif defined COMPILE_PCRE16
#define REAL_PCRE real_pcre16
#elif defined COMPILE_PCRE32
#define REAL_PCRE real_pcre32
#endif
/* It is necessary to fork the struct for 32 bit, since it needs to use
* pcre_uchar for first_char and req_char. Can't put an ifdef inside the
* typedef since pcretest needs access to the struct of the 8-, 16-
* and 32-bit variants. */
typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
pcre_uint16 flags; /* Private flags */
pcre_uint32 flags; /* Private flags */
pcre_uint32 limit_match; /* Limit set from regex */
pcre_uint32 limit_recursion; /* Limit set from regex */
pcre_uint16 first_char; /* Starting character */
pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
pcre_uint16 first_char; /* Starting character */
pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */
pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */
pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
const pcre_uint8 *nullpad; /* NULL padding */
void *nullpad; /* NULL padding */
} real_pcre8_or_16;
typedef struct real_pcre8_or_16 real_pcre;
@ -2331,22 +2404,31 @@ typedef struct real_pcre32 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
pcre_uint16 flags; /* Private flags */
pcre_uint32 flags; /* Private flags */
pcre_uint32 limit_match; /* Limit set from regex */
pcre_uint32 limit_recursion; /* Limit set from regex */
pcre_uint32 first_char; /* Starting character */
pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
pcre_uint32 first_char; /* Starting character */
pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
pcre_uint16 dummy1; /* for later expansion */
pcre_uint16 dummy2; /* for later expansion */
pcre_uint16 dummy; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
void *nullpad; /* for later expansion */
void *nullpad; /* NULL padding */
} real_pcre32;
#if defined COMPILE_PCRE8
#define REAL_PCRE real_pcre
#elif defined COMPILE_PCRE16
#define REAL_PCRE real_pcre16
#elif defined COMPILE_PCRE32
#define REAL_PCRE real_pcre32
#endif
/* Assert that the size of REAL_PCRE is divisible by 8 */
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
@ -2380,6 +2462,15 @@ typedef struct open_capitem {
pcre_uint16 flag; /* Set TRUE if recursive back ref */
} open_capitem;
/* Structure for building a list of named groups during the first pass of
compiling. */
typedef struct named_group {
const pcre_uchar *name; /* Points to the name in the pattern */
int length; /* Length of the name */
pcre_uint32 number; /* Group number */
} named_group;
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
@ -2392,24 +2483,29 @@ typedef struct compile_data {
const pcre_uchar *start_code; /* The start of the compiled code */
const pcre_uchar *start_pattern; /* The start of the pattern */
const pcre_uchar *end_pattern; /* The end of the pattern */
open_capitem *open_caps; /* Chain of open capture items */
pcre_uchar *hwm; /* High watermark of workspace */
open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */
pcre_uchar *name_table; /* The name/number table */
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int named_group_list_size; /* Number of entries in the list */
int workspace_size; /* Size of workspace */
unsigned int bracount; /* Count of capturing parens as we compile */
unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
unsigned int namedrefcount; /* Number of backreferences by name */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
int external_options; /* External (initial) options */
int external_flags; /* External flag bits to be set */
pcre_uint32 external_options; /* External (initial) options */
pcre_uint32 external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */
int nltype; /* Newline type */
int nllen; /* Newline string length */
pcre_uchar nl[4]; /* Newline string when fixed length */
@ -2431,6 +2527,7 @@ typedef struct recursion_info {
unsigned int group_num; /* Number of group that was called */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
int saved_capture_last; /* Last capture number */
PCRE_PUCHAR subject_position; /* Position at start of recursion */
} recursion_info;
@ -2467,12 +2564,13 @@ typedef struct match_data {
int nllen; /* Newline string length */
int name_count; /* Number of names in name table */
int name_entry_size; /* Size of entry in names table */
unsigned int skip_arg_count; /* For counting SKIP_ARGs */
unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */
pcre_uchar *name_table; /* Table of names */
pcre_uchar nl[4]; /* Newline string when fixed */
const pcre_uint8 *lcc; /* Points to lower casing table */
const pcre_uint8 *fcc; /* Points to case-flipping table */
const pcre_uint8 *ctypes; /* Points to table of type maps */
BOOL offset_overflow; /* Set if too many extractions */
BOOL notbol; /* NOTBOL flag */
BOOL noteol; /* NOTEOL flag */
BOOL utf; /* UTF-8 / UTF-16 flag */
@ -2484,7 +2582,6 @@ typedef struct match_data {
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
BOOL hasthen; /* Pattern contains (*THEN) */
BOOL ignore_skip_arg; /* For re-run when SKIP name not found */
const pcre_uchar *start_code; /* For use when recursing */
PCRE_PUCHAR start_subject; /* Start of the subject string */
PCRE_PUCHAR end_subject; /* End of the subject string */
@ -2493,7 +2590,7 @@ typedef struct match_data {
PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */
int partial; /* PARTIAL options */
int end_offset_top; /* Highwater mark at end of match */
int capture_last; /* Most recent capture number */
pcre_int32 capture_last; /* Most recent capture number + overflow flag */
int start_offset; /* The start offset value */
int match_function_type; /* Set for certain special calls of MATCH() */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */

File diff suppressed because it is too large Load Diff

View File

@ -98,13 +98,17 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
exclusive ones - in some locales things may be different.
Note that the table for "space" includes everything "isspace" gives, including
VT in the default locale. This makes it work for the POSIX class [:space:].
From release 8.34 is is also correct for Perl space, because Perl added VT at
release 5.18.
Note also that it is possible for a character to be alnum or alpha without
being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the
fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must
test for alnum specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
@ -123,14 +127,15 @@ for (i = 0; i < 256; i++)
}
p += cbit_length;
/* Finally, the character type table. In this, we exclude VT from the white
space chars, because Perl doesn't recognize it as such for \s and for comments
within regexes. */
/* Finally, the character type table. In this, we used to exclude VT from the
white space chars, because Perl didn't recognize it as such for \s and for
comments within regexes. However, Perl changed at release 5.18, so PCRE changed
at release 8.34. */
for (i = 0; i < 256; i++)
{
int x = 0;
if (i != CHAR_VT && isspace(i)) x += ctype_space;
if (isspace(i)) x += ctype_space;
if (isalpha(i)) x += ctype_letter;
if (isdigit(i)) x += ctype_digit;
if (isxdigit(i)) x += ctype_xdigit;

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -38,8 +38,8 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains an internal function that is used to match an extended
class. It is used by both pcre_exec() and pcre_def_exec(). */
/* This module contains internal functions for comparing and finding the length
of strings for different data item sizes. */
#ifdef PCRE_HAVE_CONFIG_H
@ -54,7 +54,7 @@ class. It is used by both pcre_exec() and pcre_def_exec(). */
* Compare string utilities *
*************************************************/
/* The following two functions compares two strings. Basically an strcmp
/* The following two functions compares two strings. Basically a strcmp
for non 8 bit characters.
Arguments:

View File

@ -66,8 +66,9 @@ string of that length that matches. In UTF8 mode, the result is in characters
rather than bytes.
Arguments:
re compiled pattern block
code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern
startcode pointer to start of the whole pattern's code
options the compiling options
int RECURSE depth
@ -78,8 +79,8 @@ Returns: the minimum length
*/
static int
find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
int recurse_depth)
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
const pcre_uchar *startcode, int options, int recurse_depth)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
@ -129,7 +130,7 @@ for (;;)
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
d = find_minlength(cc, startcode, options, recurse_depth);
d = find_minlength(re, cc, startcode, options, recurse_depth);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@ -175,9 +176,9 @@ for (;;)
case OP_REVERSE:
case OP_CREF:
case OP_NCREF:
case OP_DNCREF:
case OP_RREF:
case OP_NRREF:
case OP_DNRREF:
case OP_DEF:
case OP_CALLOUT:
case OP_SOD:
@ -341,6 +342,7 @@ for (;;)
{
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
branchlength++;
/* Fall through */
@ -348,11 +350,14 @@ for (;;)
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
branchlength += GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
break;
@ -375,7 +380,38 @@ for (;;)
matches an empty string (by default it causes a matching failure), so in
that case we must set the minimum length to zero. */
case OP_REF:
case OP_DNREF: /* Duplicate named pattern back reference */
case OP_DNREFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
pcre_uchar *slot = (pcre_uchar *)re +
re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
d = INT_MAX;
while (count-- > 0)
{
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce)
{
d = 0;
had_recurse = TRUE;
break;
}
else
{
int dd = find_minlength(re, cs, startcode, options, recurse_depth);
if (dd < d) d = dd;
}
slot += re->name_entry_size;
}
}
else d = 0;
cc += 1 + 2*IMM2_SIZE;
goto REPEAT_BACK_REFERENCE;
case OP_REF: /* Single back reference */
case OP_REFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
@ -389,7 +425,7 @@ for (;;)
}
else
{
d = find_minlength(cs, startcode, options, recurse_depth);
d = find_minlength(re, cs, startcode, options, recurse_depth);
}
}
else d = 0;
@ -397,24 +433,29 @@ for (;;)
/* Handle repeated back references */
REPEAT_BACK_REFERENCE:
switch (*cc)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
min = 0;
cc++;
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
min = 1;
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
min = GET2(cc, 1);
cc += 1 + 2 * IMM2_SIZE;
break;
@ -437,7 +478,8 @@ for (;;)
had_recurse = TRUE;
else
{
branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
branchlength += find_minlength(re, cs, startcode, options,
recurse_depth + 1);
}
cc += 1 + LINK_SIZE;
break;
@ -778,6 +820,10 @@ do
case OP_COND:
case OP_CREF:
case OP_DEF:
case OP_DNCREF:
case OP_DNREF:
case OP_DNREFI:
case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_END:
@ -786,7 +832,6 @@ do
case OP_EXTUNI:
case OP_FAIL:
case OP_MARK:
case OP_NCREF:
case OP_NOT:
case OP_NOTEXACT:
case OP_NOTEXACTI:
@ -818,7 +863,6 @@ do
case OP_NOTUPTOI:
case OP_NOT_HSPACE:
case OP_NOT_VSPACE:
case OP_NRREF:
case OP_PROP:
case OP_PRUNE:
case OP_PRUNE_ARG:
@ -1183,24 +1227,16 @@ do
set_type_bits(start_bits, cbit_digit, table_limit, cd);
break;
/* The cbit_space table has vertical tab as whitespace; we have to
ensure it gets set as not whitespace. Luckily, the code value is the
same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate
bit. */
/* The cbit_space table has vertical tab as whitespace; we no longer
have to play fancy tricks because Perl added VT to its whitespace at
release 5.18. PCRE added it at release 8.34. */
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
start_bits[1] |= 0x08;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
avoid setting it. Luckily, the code value is the same (0x0b) in ASCII
and EBCDIC, so we can just adjust the appropriate bit. */
case OP_WHITESPACE:
c = start_bits[1]; /* Save in case it was already set */
set_type_bits(start_bits, cbit_space, table_limit, cd);
start_bits[1] = (start_bits[1] & ~0x08) | c;
break;
case OP_NOT_WORDCHAR:
@ -1277,11 +1313,14 @@ do
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
else try_next = FALSE;
break;
@ -1346,6 +1385,7 @@ pcre_uchar *code;
compile_data compile_block;
const REAL_PCRE *re = (const REAL_PCRE *)external_re;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
@ -1422,7 +1462,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* Find the minimum length of subject string. */
switch(min = find_minlength(code, code, re->options, 0))
switch(min = find_minlength(re, code, code, re->options, 0))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;

View File

@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
@ -493,6 +494,7 @@ const char PRIV(utt_names)[] =
STRING_Xan0
STRING_Xps0
STRING_Xsp0
STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
{ 1023, PT_WORD, 0 },
{ 1027, PT_SC, ucp_Yi },
{ 1030, PT_GC, ucp_Z },
{ 1032, PT_PC, ucp_Zl },
{ 1035, PT_PC, ucp_Zp },
{ 1038, PT_PC, ucp_Zs }
{ 1023, PT_UCNC, 0 },
{ 1027, PT_WORD, 0 },
{ 1031, PT_SC, ucp_Yi },
{ 1034, PT_GC, ucp_Z },
{ 1036, PT_PC, ucp_Zl },
{ 1039, PT_PC, ucp_Zp },
{ 1042, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -92,7 +92,7 @@ PCRE_UTF8_ERR18 Overlong 5-byte sequence (won't ever occur)
PCRE_UTF8_ERR19 Overlong 6-byte sequence (won't ever occur)
PCRE_UTF8_ERR20 Isolated 0x80 byte (not within UTF-8 character)
PCRE_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
PCRE_UTF8_ERR22 Non-character
PCRE_UTF8_ERR22 Unused (was non-character)
Arguments:
string points to the string
@ -118,7 +118,6 @@ if (length < 0)
for (p = string; length-- > 0; p++)
{
register pcre_uchar ab, c, d;
pcre_uint32 v = 0;
c = *p;
if (c < 128) continue; /* ASCII character */
@ -187,7 +186,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 2;
return PCRE_UTF8_ERR14;
}
v = ((c & 0x0f) << 12) | ((d & 0x3f) << 6) | (*p & 0x3f);
break;
/* 4-byte character. Check 3rd and 4th bytes for 0x80. Then check first 2
@ -215,7 +213,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - 3;
return PCRE_UTF8_ERR13;
}
v = ((c & 0x07) << 18) | ((d & 0x3f) << 12) | ((p[-1] & 0x3f) << 6) | (*p & 0x3f);
break;
/* 5-byte and 6-byte characters are not allowed by RFC 3629, and will be
@ -290,14 +287,6 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string) - ab;
return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
}
/* Reject non-characters. The pointer p is currently at the last byte of the
character. */
if ((v & 0xfffeu) == 0xfffeu || (v >= 0xfdd0 && v <= 0xfdef))
{
*erroroffset = (int)(p - string) - ab;
return PCRE_UTF8_ERR22;
}
}
#else /* Not SUPPORT_UTF */

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge
Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -128,55 +128,120 @@ while ((t = *data++) != XCL_END)
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
BOOL isprop = t == XCL_PROP;
switch(*data)
{
case PT_ANY:
if (t == XCL_PROP) return !negated;
if (isprop) return !negated;
break;
case PT_LAMP:
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == (t == XCL_PROP)) return !negated;
prop->chartype == ucp_Lt) == isprop) return !negated;
break;
case PT_GC:
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
return !negated;
break;
case PT_PC:
if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->chartype) == isprop) return !negated;
break;
case PT_SC:
if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->script) == isprop) return !negated;
break;
case PT_ALNUM:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
return !negated;
break;
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
case PT_SPACE: /* Perl space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
== (t == XCL_PROP))
return !negated;
break;
case PT_PXSPACE: /* POSIX space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
return !negated;
switch(c)
{
HSPACE_CASES:
VSPACE_CASES:
if (isprop) return !negated;
break;
default:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
return !negated;
break;
}
break;
case PT_WORD:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
== (t == XCL_PROP))
== isprop)
return !negated;
break;
case PT_UCNC:
if (c < 0xa0)
{
if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
c == CHAR_GRAVE_ACCENT) == isprop)
return !negated;
}
else
{
if ((c < 0xd800 || c > 0xdfff) == isprop)
return !negated;
}
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */
/* Graphic character. Implement this as not Z (space or separator) and
not C (other), except for Cf (format) with a few exceptions. This seems
to be what Perl does. The exceptional characters are:
U+061C Arabic Letter Mark
U+180E Mongolian Vowel Separator
U+2066 - U+2069 Various "isolate"s
*/
case PT_PXGRAPH:
if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Printable character: same as graphic, with the addition of Zs, i.e.
not Zl and not Zp, and U+180E. */
case PT_PXPRINT:
if ((prop->chartype != ucp_Zl &&
prop->chartype != ucp_Zp &&
(PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
(prop->chartype == ucp_Cf &&
c != 0x061c && (c < 0x2066 || c > 0x2069))
)) == isprop)
return !negated;
break;
/* Punctuation: all Unicode punctuation, plus ASCII characters that
Unicode treats as symbols rather than punctuation, for Perl
compatibility (these are $+<=>^`|~). */
case PT_PXPUNCT:
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
(c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
return !negated;
break;

View File

@ -48,6 +48,7 @@
/* #define SLJIT_CONFIG_PPC_64 1 */
/* #define SLJIT_CONFIG_MIPS_32 1 */
/* #define SLJIT_CONFIG_SPARC_32 1 */
/* #define SLJIT_CONFIG_TILEGX 1 */
/* #define SLJIT_CONFIG_AUTO 1 */
/* #define SLJIT_CONFIG_UNSUPPORTED 1 */

View File

@ -63,6 +63,7 @@
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
|| (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
|| (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
|| (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
|| (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED))
#error "An architecture must be selected"
@ -76,6 +77,7 @@
+ (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
+ (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
@ -104,10 +106,12 @@
#define SLJIT_CONFIG_PPC_64 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
#define SLJIT_CONFIG_PPC_32 1
#elif defined(__mips__)
#elif defined(__mips__) && !defined(_LP64)
#define SLJIT_CONFIG_MIPS_32 1
#elif defined(__sparc__) || defined(__sparc)
#define SLJIT_CONFIG_SPARC_32 1
#elif defined(__tilegx__)
#define SLJIT_CONFIG_TILEGX 1
#else
/* Unsupported architecture */
#define SLJIT_CONFIG_UNSUPPORTED 1
@ -173,9 +177,13 @@
#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */
#ifndef SLJIT_INLINE
/* Inline functions. */
/* Inline functions. Some old compilers do not support them. */
#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
#define SLJIT_INLINE
#else
#define SLJIT_INLINE __inline
#endif
#endif /* !SLJIT_INLINE */
#ifndef SLJIT_CONST
/* Const variables. */
@ -266,7 +274,9 @@ typedef signed int sljit_si;
#define SLJIT_WORD_SHIFT 0
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
typedef unsigned int sljit_uw;
@ -311,7 +321,7 @@ typedef double sljit_d;
/* ABI (Application Binary Interface) types. */
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if defined(__GNUC__)
#if defined(__GNUC__) && !defined(__APPLE__)
#define SLJIT_CALL __attribute__ ((fastcall))
#define SLJIT_X86_32_FASTCALL 1
@ -420,6 +430,7 @@ typedef double sljit_d;
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
#endif

View File

@ -287,3 +287,26 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
allocator_release_lock();
}
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
{
struct free_block* free_block;
struct free_block* next_free_block;
allocator_grab_lock();
free_block = free_blocks;
while (free_block) {
next_free_block = free_block->next;
if (!free_block->header.prev_size &&
AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
total_size -= free_block->size;
sljit_remove_free_block(free_block);
free_chunk(free_block, free_block->size + sizeof(struct block_header));
}
free_block = next_free_block;
}
SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
allocator_release_lock();
}

View File

@ -170,6 +170,14 @@
# define FCSR_FCC 33
#endif
#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
# define IS_JAL 0x04
# define IS_COND 0x08
# define PATCH_B 0x10
# define PATCH_J 0x20
#endif
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
# define IS_MOVABLE 0x04
# define IS_COND 0x08
@ -652,14 +660,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp
}
static char* reg_names[] = {
(char*)"<noreg>", (char*)"t1", (char*)"t2", (char*)"t3",
(char*)"te1", (char*)"te2", (char*)"s1", (char*)"s2",
(char*)"s3", (char*)"se1", (char*)"se2", (char*)"lcr"
(char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3",
(char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2",
(char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc"
};
static char* freg_names[] = {
(char*)"<noreg>", (char*)"float_r1", (char*)"float_r2", (char*)"float_r3",
(char*)"float_r4", (char*)"float_r5", (char*)"float_r6"
(char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3",
(char*)"f4", (char*)"f5", (char*)"f6"
};
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@ -736,17 +744,17 @@ static SLJIT_CONST char* op_names[] = {
};
static char* jump_names[] = {
(char*)"c_equal", (char*)"c_not_equal",
(char*)"c_less", (char*)"c_greater_equal",
(char*)"c_greater", (char*)"c_less_equal",
(char*)"c_sig_less", (char*)"c_sig_greater_equal",
(char*)"c_sig_greater", (char*)"c_sig_less_equal",
(char*)"c_overflow", (char*)"c_not_overflow",
(char*)"c_mul_overflow", (char*)"c_mul_not_overflow",
(char*)"c_float_equal", (char*)"c_float_not_equal",
(char*)"c_float_less", (char*)"c_float_greater_equal",
(char*)"c_float_greater", (char*)"c_float_less_equal",
(char*)"c_float_unordered", (char*)"c_float_ordered",
(char*)"equal", (char*)"not_equal",
(char*)"less", (char*)"greater_equal",
(char*)"greater", (char*)"less_equal",
(char*)"sig_less", (char*)"sig_greater_equal",
(char*)"sig_greater", (char*)"sig_less_equal",
(char*)"overflow", (char*)"not_overflow",
(char*)"mul_overflow", (char*)"mul_not_overflow",
(char*)"float_equal", (char*)"float_not_equal",
(char*)"float_less", (char*)"float_greater_equal",
(char*)"float_greater", (char*)"float_less_equal",
(char*)"float_unordered", (char*)"float_ordered",
(char*)"jump", (char*)"fast_call",
(char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3"
};
@ -993,6 +1001,12 @@ static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg)
SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS);
}
static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg)
{
SLJIT_UNUSED_ARG(reg);
SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS);
}
static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
@ -1104,7 +1118,7 @@ static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler,
SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
fprintf(compiler->verbose, " jump%s<%s>\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
fprintf(compiler->verbose, " jump%s.%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
#endif
}
@ -1127,7 +1141,7 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, s
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %scmp%s<%s> ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
fprintf(compiler->verbose, " %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
sljit_verbose_param(src1, src1w);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(src2, src2w);
@ -1156,7 +1170,7 @@ static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler,
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %scmp%s<%s> ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
fprintf(compiler->verbose, " %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d",
!(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]);
sljit_verbose_fparam(src1, src1w);
fprintf(compiler->verbose, ", ");
@ -1187,7 +1201,7 @@ static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler,
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " ijump<%s> ", jump_names[type]);
fprintf(compiler->verbose, " ijump.%s ", jump_names[type]);
sljit_verbose_param(src, srcw);
fprintf(compiler->verbose, "\n");
}
@ -1223,14 +1237,14 @@ static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compil
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " op_flags<%s%s%s%s> ", !(op & SLJIT_INT_OP) ? "" : "i",
fprintf(compiler->verbose, " %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i",
op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k");
sljit_verbose_param(dst, dstw);
if (src != SLJIT_UNUSED) {
fprintf(compiler->verbose, ", ");
sljit_verbose_param(src, srcw);
}
fprintf(compiler->verbose, ", <%s>\n", jump_names[type]);
fprintf(compiler->verbose, ", %s\n", jump_names[type]);
}
#endif
}
@ -1339,6 +1353,8 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi
# include "sljitNativeMIPS_common.c"
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
# include "sljitNativeSPARC_common.c"
#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
# include "sljitNativeTILEGX.c"
#endif
#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)

View File

@ -77,7 +77,7 @@
#endif
/* The following header file defines useful macros for fine tuning
sljit based code generators. They are listed in the begining
sljit based code generators. They are listed in the beginning
of sljitConfigInternal.h */
#include "sljitConfigInternal.h"
@ -161,12 +161,14 @@ of sljitConfigInternal.h */
/* Floating point operations are performed on double or
single precision values. */
#define SLJIT_FLOAT_REG1 1
#define SLJIT_FLOAT_REG2 2
#define SLJIT_FLOAT_REG3 3
#define SLJIT_FLOAT_REG4 4
#define SLJIT_FLOAT_REG5 5
#define SLJIT_FLOAT_REG6 6
#define SLJIT_FLOAT_REG1 1
#define SLJIT_FLOAT_REG2 2
#define SLJIT_FLOAT_REG3 3
#define SLJIT_FLOAT_REG4 4
#define SLJIT_FLOAT_REG5 5
#define SLJIT_FLOAT_REG6 6
#define SLJIT_NO_FLOAT_REGISTERS 6
/* --------------------------------------------------------------------- */
/* Main structures and functions */
@ -281,6 +283,11 @@ struct sljit_compiler {
sljit_sw cache_argw;
#endif
#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
sljit_si cache_arg;
sljit_sw cache_argw;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
FILE* verbose;
#endif
@ -306,7 +313,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void);
/* Free everything except the compiled machine code. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler);
/* Returns the current error code. If an error is occured, future sljit
/* Returns the current error code. If an error is occurred, future sljit
calls which uses the same compiler argument returns early with the same
error code. Thus there is no need for checking the error after every
call, it is enough to do it before the code is compiled. Removing
@ -447,7 +454,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
sequences. This information could help to improve those code
generators which focuses only a few architectures.
x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full adress space on x86-32)
x86: [reg+imm], -2^32+1 <= imm <= 2^32-1 (full address space on x86-32)
[reg+(reg<<imm)] is supported
[imm], -2^32+1 <= imm <= 2^32-1 is supported
Write-back is not supported
@ -698,12 +705,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index of any SLJIT_SCRATCH
SLJIT_SAVED or SLJIT_LOCALS register.
Note: it returns with -1 for virtual registers (all EREGs on x86-32).
Note: register returned by SLJIT_LOCALS_REG is not necessary the real
stack pointer register of the target architecture. */
Note: it returns with -1 for virtual registers (all EREGs on x86-32). */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index of any SLJIT_FLOAT register.
Note: the index is divided by 2 on ARM 32 bit architectures. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
/* Any instruction can be inserted into the instruction stream by
sljit_emit_op_custom. It has a similar purpose as inline assembly.
The size parameter must match to the instruction size of the target
@ -896,7 +907,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
/* --------------------------------------------------------------------- */
#define SLJIT_MAJOR_VERSION 0
#define SLJIT_MINOR_VERSION 90
#define SLJIT_MINOR_VERSION 91
/* Get the human readable name of the platform. Can be useful on platforms
like ARM, where ARM and Thumb2 functions can be mixed, and

View File

@ -418,9 +418,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = compiler->size * sizeof(sljit_uh);
compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh);
SLJIT_CACHE_FLUSH(code, code_ptr);
/* Set thumb mode flag. */
return (void*)((sljit_uw)code | 0x1);
}
@ -1526,6 +1526,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{

View File

@ -405,7 +405,6 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw
if (diff & 0x3)
return 0;
diff >>= 2;
if (jump->flags & IS_BL) {
if (diff <= 0x01ffffff && diff >= -0x02000000) {
*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
@ -431,7 +430,6 @@ static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw
if (diff & 0x3)
return 0;
diff >>= 2;
if (diff <= 0x01ffffff && diff >= -0x02000000) {
code_ptr -= 2;
*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
@ -787,9 +785,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
SLJIT_ASSERT(code_ptr - code <= (sljit_si)size);
SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = size * sizeof(sljit_uw);
compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
@ -1991,6 +1989,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{

View File

@ -30,7 +30,7 @@
SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
{
#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
return "MIPS" SLJIT_CPUINFO;
return "MIPS(32)" SLJIT_CPUINFO;
#else
return "MIPS III" SLJIT_CPUINFO;
#endif
@ -398,7 +398,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = compiler->size * sizeof(sljit_ins);
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
#ifndef __GNUC__
SLJIT_CACHE_FLUSH(code, code_ptr);
#else
@ -1099,6 +1099,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg << 1;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{

View File

@ -402,9 +402,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
jump = jump->next;
}
SLJIT_CACHE_FLUSH(code, code_ptr);
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = compiler->size * sizeof(sljit_ins);
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
SLJIT_CACHE_FLUSH(code, code_ptr);
#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@ -1507,6 +1507,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{

View File

@ -35,6 +35,30 @@ typedef sljit_ui sljit_ins;
static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
{
#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
__asm (
/* if (from == to) return */
"cmp %i0, %i1\n"
"be .leave\n"
"nop\n"
/* loop until from >= to */
".mainloop:\n"
"flush %i0\n"
"add %i0, 8, %i0\n"
"cmp %i0, %i1\n"
"bcs .mainloop\n"
"nop\n"
/* The comparison was done above. */
"bne .leave\n"
/* nop is not necessary here, since the
sub operation has no side effect. */
"sub %i0, 4, %i0\n"
"flush %i0\n"
".leave:"
);
#else
if (SLJIT_UNLIKELY(from == to))
return;
@ -49,12 +73,13 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
if (from == to) {
/* Flush the last word. */
to --;
from --;
__asm__ volatile (
"flush %0\n"
: : "r"(to)
: : "r"(from)
);
}
#endif
}
/* TMP_REG2 is not used by getput_arg */
@ -344,7 +369,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = compiler->size * sizeof(sljit_ins);
compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
SLJIT_CACHE_FLUSH(code, code_ptr);
return code;
}
@ -896,6 +921,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg << 1;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{

View File

@ -149,7 +149,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
if (saveds > 3)
locals_offset += (saveds - 3) * sizeof(sljit_uw);
compiler->locals_offset = locals_offset;
#if defined(__APPLE__)
saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
#else
local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
#endif
compiler->local_size = local_size;
#ifdef _WIN32
@ -197,7 +202,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
if (saveds > 3)
locals_offset += (saveds - 3) * sizeof(sljit_uw);
compiler->locals_offset = locals_offset;
#if defined(__APPLE__)
saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds;
#else
compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1));
#endif
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)

View File

@ -206,6 +206,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define OR_r_rm 0x0b
#define OR_EAX_i32 0x0d
#define OR_rm_r 0x09
#define OR_rm8_r8 0x08
#define POP_r 0x58
#define POP_rm 0x8f
#define POPF 0x9d
@ -267,75 +268,54 @@ static sljit_si cpu_has_sse2 = -1;
#endif
static sljit_si cpu_has_cmov = -1;
#if defined(_MSC_VER) && (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#if _MSC_VER >= 1400
#if defined(_MSC_VER) && _MSC_VER >= 1400
#include <intrin.h>
#else
#error "MSVC does not support inline assembly in 64 bit mode"
#endif
#endif /* _MSC_VER && SLJIT_CONFIG_X86_64 */
static void get_cpu_features(void)
{
sljit_ui features;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#if defined(_MSC_VER) && _MSC_VER >= 1400
int CPUInfo[4];
__cpuid(CPUInfo, 1);
features = (sljit_ui)CPUInfo[3];
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
/* AT&T syntax. */
__asm__ (
"pushl %%ebx\n"
"movl $0x1, %%eax\n"
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* On x86-32, there is no red zone, so this
should work (no need for a local variable). */
"push %%ebx\n"
#endif
"cpuid\n"
"popl %%ebx\n"
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
"pop %%ebx\n"
#endif
"movl %%edx, %0\n"
: "=g" (features)
:
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
: "%eax", "%ecx", "%edx"
#else
: "%rax", "%rbx", "%rcx", "%rdx"
#endif
);
#elif defined(_MSC_VER) || defined(__BORLANDC__)
#else /* _MSC_VER && _MSC_VER >= 1400 */
/* Intel syntax. */
__asm {
mov eax, 1
push ebx
cpuid
pop ebx
mov features, edx
}
#else
# error "SLJIT_DETECT_SSE2 is not implemented for this C compiler"
#endif
#else /* SLJIT_CONFIG_X86_32 */
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
/* AT&T syntax. */
__asm__ (
"pushq %%rbx\n"
"movl $0x1, %%eax\n"
"cpuid\n"
"popq %%rbx\n"
"movl %%edx, %0\n"
: "=g" (features)
:
: "%rax", "%rcx", "%rdx"
);
#elif defined(_MSC_VER) && _MSC_VER >= 1400
int CPUInfo[4];
__cpuid(CPUInfo, 1);
features = (sljit_ui)CPUInfo[3];
#else
__asm {
mov eax, 1
push rbx
cpuid
pop rbx
mov features, edx
}
#endif
#endif /* SLJIT_CONFIG_X86_32 */
#endif /* _MSC_VER && _MSC_VER >= 1400 */
#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
cpu_has_sse2 = (features >> 26) & 0x1;
@ -570,7 +550,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* Maybe we waste some space because of short jumps. */
SLJIT_ASSERT(code_ptr <= code + compiler->size);
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_size = compiler->size;
compiler->executable_size = code_ptr - code;
return (void*)code;
}
@ -650,9 +630,10 @@ static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
This function touches all 4k pages belongs to the requested stack space,
which size is passed in local_size. This is necessary on Windows where
the stack can only grow in 4k steps. However, this function just burn
CPU cycles if the stack is large enough, but you don't know it in advance.
I think this is a bad design even if it has some reasons. */
alloca(local_size);
CPU cycles if the stack is large enough. However, you don't know it in
advance, so it must always be called. I think this is a bad design in
general even if it has some reasons. */
*(sljit_si*)alloca(local_size) = 0;
}
#endif
@ -1785,7 +1766,7 @@ static sljit_si emit_mul(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
static sljit_si emit_lea_binary(struct sljit_compiler *compiler,
static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
sljit_si dst, sljit_sw dstw,
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
@ -1794,10 +1775,12 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler,
sljit_si dst_r, done = 0;
/* These cases better be left to handled by normal way. */
if (dst == src1 && dstw == src1w)
return SLJIT_ERR_UNSUPPORTED;
if (dst == src2 && dstw == src2w)
return SLJIT_ERR_UNSUPPORTED;
if (!keep_flags) {
if (dst == src1 && dstw == src1w)
return SLJIT_ERR_UNSUPPORTED;
if (dst == src2 && dstw == src2w)
return SLJIT_ERR_UNSUPPORTED;
}
dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
@ -2153,7 +2136,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
if (!GET_FLAGS(op)) {
if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
else
@ -2173,7 +2156,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB:
if (!GET_FLAGS(op)) {
if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error;
}
else
@ -2231,6 +2214,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
return reg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
{
check_sljit_get_float_register_index(reg);
return reg;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size)
{
@ -2637,6 +2626,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
cond_set = get_jump_code(type) + 0x10;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src) {
inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
FAIL_IF(!inst);
INC_SIZE(4 + 3);
/* Set low register to conditional flag. */
*inst++ = (reg_map[TMP_REGISTER] <= 7) ? REX : REX_B;
*inst++ = GROUP_0F;
*inst++ = cond_set;
*inst++ = MOD_REG | reg_lmap[TMP_REGISTER];
*inst++ = REX | (reg_map[TMP_REGISTER] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
*inst++ = OR_rm8_r8;
*inst++ = MOD_REG | (reg_lmap[TMP_REGISTER] << 3) | reg_lmap[dst];
return SLJIT_SUCCESS;
}
reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
@ -2717,6 +2721,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
return SLJIT_SUCCESS;
}
if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src && reg_map[dst] <= 4) {
SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
if (dst != SLJIT_SCRATCH_REG1) {
inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
FAIL_IF(!inst);
INC_SIZE(1 + 3 + 2 + 1);
/* Set low register to conditional flag. */
*inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
*inst++ = GROUP_0F;
*inst++ = cond_set;
*inst++ = MOD_REG | 0 /* eax */;
*inst++ = OR_rm8_r8;
*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
*inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
}
else {
inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
FAIL_IF(!inst);
INC_SIZE(2 + 3 + 2 + 2);
/* Set low register to conditional flag. */
*inst++ = XCHG_r_rm;
*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
*inst++ = GROUP_0F;
*inst++ = cond_set;
*inst++ = MOD_REG | 1 /* ecx */;
*inst++ = OR_rm8_r8;
*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
*inst++ = XCHG_r_rm;
*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
}
return SLJIT_SUCCESS;
}
/* Set TMP_REGISTER to the bit. */
inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
FAIL_IF(!inst);
@ -2761,16 +2798,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co
if (NOT_HALFWORD(offset)) {
FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
return compiler->error;
#else
return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
#endif
}
#endif
if (offset != 0)
return emit_lea_binary(compiler, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
}

View File

@ -11,7 +11,10 @@ should always be at the end of each enum, for backwards compatibility.
IMPORTANT: Note also that the specific numeric values of the enums have to be
the same as the values that are generated by the maint/MultiStage2.py script,
where the equivalent property descriptive names are listed in vectors. */
where the equivalent property descriptive names are listed in vectors.
ALSO: The specific values of the first two enums are assumed for the table
called catposstab in pcre_compile.c. */
/* These are the general character categories. */