[regexp] implement case-insensitive unicode regexps.
BUG=v8:2952 LOG=N Review URL: https://codereview.chromium.org/1599303002 Cr-Commit-Position: refs/heads/master@{#33538}
This commit is contained in:
parent
2a0e4225dd
commit
a2baaaac93
@ -891,7 +891,7 @@ class Isolate {
|
|||||||
|
|
||||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
|
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
|
||||||
interp_canonicalize_mapping() {
|
interp_canonicalize_mapping() {
|
||||||
return &interp_canonicalize_mapping_;
|
return ®exp_macro_assembler_canonicalize_;
|
||||||
}
|
}
|
||||||
|
|
||||||
Debug* debug() { return debug_; }
|
Debug* debug() { return debug_; }
|
||||||
@ -1245,7 +1245,6 @@ class Isolate {
|
|||||||
regexp_macro_assembler_canonicalize_;
|
regexp_macro_assembler_canonicalize_;
|
||||||
RegExpStack* regexp_stack_;
|
RegExpStack* regexp_stack_;
|
||||||
DateCache* date_cache_;
|
DateCache* date_cache_;
|
||||||
unibrow::Mapping<unibrow::Ecma262Canonicalize> interp_canonicalize_mapping_;
|
|
||||||
CallInterfaceDescriptorData* call_descriptor_data_;
|
CallInterfaceDescriptorData* call_descriptor_data_;
|
||||||
base::RandomNumberGenerator* random_number_generator_;
|
base::RandomNumberGenerator* random_number_generator_;
|
||||||
|
|
||||||
|
@ -210,7 +210,7 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
__ ldr(r0, register_location(start_reg)); // Index of start of capture
|
__ ldr(r0, register_location(start_reg)); // Index of start of capture
|
||||||
__ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
|
__ ldr(r1, register_location(start_reg + 1)); // Index of end of capture
|
||||||
@ -302,7 +302,7 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
|
|||||||
// r0: Address byte_offset1 - Address captured substring's start.
|
// r0: Address byte_offset1 - Address captured substring's start.
|
||||||
// r1: Address byte_offset2 - Address of current character position.
|
// r1: Address byte_offset2 - Address of current character position.
|
||||||
// r2: size_t byte_length - length of capture in bytes(!)
|
// r2: size_t byte_length - length of capture in bytes(!)
|
||||||
// r3: Isolate* isolate
|
// r3: Isolate* isolate or 0 if unicode flag.
|
||||||
|
|
||||||
// Address of start of capture.
|
// Address of start of capture.
|
||||||
__ add(r0, r0, Operand(end_of_input_address()));
|
__ add(r0, r0, Operand(end_of_input_address()));
|
||||||
@ -316,7 +316,14 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
|
|||||||
__ sub(r1, r1, r4);
|
__ sub(r1, r1, r4);
|
||||||
}
|
}
|
||||||
// Isolate.
|
// Isolate.
|
||||||
__ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (unicode) {
|
||||||
|
__ mov(r3, Operand(0));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
AllowExternalCallThatCantCauseGC scope(masm_);
|
AllowExternalCallThatCantCauseGC scope(masm_);
|
||||||
|
@ -38,7 +38,7 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
||||||
|
@ -274,7 +274,7 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
|
|
||||||
Register capture_start_offset = w10;
|
Register capture_start_offset = w10;
|
||||||
@ -388,7 +388,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
|
|||||||
// x0: Address byte_offset1 - Address captured substring's start.
|
// x0: Address byte_offset1 - Address captured substring's start.
|
||||||
// x1: Address byte_offset2 - Address of current character position.
|
// x1: Address byte_offset2 - Address of current character position.
|
||||||
// w2: size_t byte_length - length of capture in bytes(!)
|
// w2: size_t byte_length - length of capture in bytes(!)
|
||||||
// x3: Isolate* isolate
|
// x3: Isolate* isolate or 0 if unicode flag
|
||||||
|
|
||||||
// Address of start of capture.
|
// Address of start of capture.
|
||||||
__ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
|
__ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
|
||||||
@ -400,7 +400,14 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
|
|||||||
__ Sub(x1, x1, Operand(capture_length, SXTW));
|
__ Sub(x1, x1, Operand(capture_length, SXTW));
|
||||||
}
|
}
|
||||||
// Isolate.
|
// Isolate.
|
||||||
__ Mov(x3, ExternalReference::isolate_address(isolate()));
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (unicode) {
|
||||||
|
__ Mov(x3, Operand(0));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ Mov(x3, ExternalReference::isolate_address(isolate()));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
AllowExternalCallThatCantCauseGC scope(masm_);
|
AllowExternalCallThatCantCauseGC scope(masm_);
|
||||||
|
@ -43,7 +43,7 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
||||||
|
@ -20,56 +20,58 @@ const unsigned int MAX_FIRST_ARG = 0x7fffffu;
|
|||||||
const int BYTECODE_SHIFT = 8;
|
const int BYTECODE_SHIFT = 8;
|
||||||
|
|
||||||
#define BYTECODE_ITERATOR(V) \
|
#define BYTECODE_ITERATOR(V) \
|
||||||
V(BREAK, 0, 4) /* bc8 */ \
|
V(BREAK, 0, 4) /* bc8 */ \
|
||||||
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
|
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
|
||||||
V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \
|
V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \
|
||||||
V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \
|
V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \
|
||||||
V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \
|
V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \
|
||||||
V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \
|
V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \
|
||||||
V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \
|
V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \
|
||||||
V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \
|
V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \
|
||||||
V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \
|
V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \
|
||||||
V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \
|
V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \
|
||||||
V(POP_CP, 10, 4) /* bc8 pad24 */ \
|
V(POP_CP, 10, 4) /* bc8 pad24 */ \
|
||||||
V(POP_BT, 11, 4) /* bc8 pad24 */ \
|
V(POP_BT, 11, 4) /* bc8 pad24 */ \
|
||||||
V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \
|
V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \
|
||||||
V(FAIL, 13, 4) /* bc8 pad24 */ \
|
V(FAIL, 13, 4) /* bc8 pad24 */ \
|
||||||
V(SUCCEED, 14, 4) /* bc8 pad24 */ \
|
V(SUCCEED, 14, 4) /* bc8 pad24 */ \
|
||||||
V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
|
V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
|
||||||
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
|
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
|
||||||
V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
|
V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
|
||||||
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
|
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
|
||||||
V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
|
V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
|
||||||
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
|
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
|
||||||
V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
|
V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
|
||||||
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
|
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
|
||||||
V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
|
V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
|
||||||
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
|
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
|
||||||
V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
|
V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
|
||||||
V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
|
V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
|
||||||
V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
||||||
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
||||||
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
||||||
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
||||||
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
|
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
|
||||||
V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
||||||
V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
||||||
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
|
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
|
||||||
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
|
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
|
||||||
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
|
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
|
||||||
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
|
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
|
||||||
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
|
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
|
||||||
V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
|
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) \
|
||||||
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
|
V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
|
||||||
V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
|
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
|
||||||
V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) \
|
||||||
V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
|
||||||
V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
|
V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||||
V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
|
V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
|
||||||
V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \
|
V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
|
||||||
V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
|
V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
|
||||||
V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
|
V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
|
||||||
V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */
|
V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
|
||||||
|
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
|
||||||
|
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */
|
||||||
|
|
||||||
#define DECLARE_BYTECODES(name, code, length) \
|
#define DECLARE_BYTECODES(name, code, length) \
|
||||||
static const int BC_##name = code;
|
static const int BC_##name = code;
|
||||||
|
@ -189,7 +189,7 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
__ mov(edx, register_location(start_reg)); // Index of start of capture
|
__ mov(edx, register_location(start_reg)); // Index of start of capture
|
||||||
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
|
__ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
|
||||||
@ -296,11 +296,18 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
|
|||||||
// Address byte_offset1 - Address captured substring's start.
|
// Address byte_offset1 - Address captured substring's start.
|
||||||
// Address byte_offset2 - Address of current character position.
|
// Address byte_offset2 - Address of current character position.
|
||||||
// size_t byte_length - length of capture in bytes(!)
|
// size_t byte_length - length of capture in bytes(!)
|
||||||
// Isolate* isolate
|
// Isolate* isolate or 0 if unicode flag.
|
||||||
|
|
||||||
// Set isolate.
|
// Set isolate.
|
||||||
__ mov(Operand(esp, 3 * kPointerSize),
|
#ifdef V8_I18N_SUPPORT
|
||||||
Immediate(ExternalReference::isolate_address(isolate())));
|
if (unicode) {
|
||||||
|
__ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ mov(Operand(esp, 3 * kPointerSize),
|
||||||
|
Immediate(ExternalReference::isolate_address(isolate())));
|
||||||
|
}
|
||||||
// Set byte_length.
|
// Set byte_length.
|
||||||
__ mov(Operand(esp, 2 * kPointerSize), ebx);
|
__ mov(Operand(esp, 2 * kPointerSize), ebx);
|
||||||
// Set byte_offset2.
|
// Set byte_offset2.
|
||||||
|
@ -37,7 +37,7 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
||||||
|
@ -15,37 +15,32 @@
|
|||||||
#include "src/unicode.h"
|
#include "src/unicode.h"
|
||||||
#include "src/utils.h"
|
#include "src/utils.h"
|
||||||
|
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
#include "unicode/uchar.h"
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
|
||||||
namespace v8 {
|
namespace v8 {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
|
typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
|
||||||
|
|
||||||
static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
|
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||||
int from,
|
int len, Vector<const uc16> subject,
|
||||||
int current,
|
bool unicode) {
|
||||||
int len,
|
Address offset_a =
|
||||||
Vector<const uc16> subject) {
|
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
|
||||||
for (int i = 0; i < len; i++) {
|
Address offset_b =
|
||||||
unibrow::uchar old_char = subject[from++];
|
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
|
||||||
unibrow::uchar new_char = subject[current++];
|
size_t length = len * kUC16Size;
|
||||||
if (old_char == new_char) continue;
|
return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
|
||||||
unibrow::uchar old_string[1] = { old_char };
|
offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
|
||||||
unibrow::uchar new_string[1] = { new_char };
|
|
||||||
interp_canonicalize->get(old_char, '\0', old_string);
|
|
||||||
interp_canonicalize->get(new_char, '\0', new_string);
|
|
||||||
if (old_string[0] != new_string[0]) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
|
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||||
int from,
|
int len, Vector<const uint8_t> subject,
|
||||||
int current,
|
bool unicode) {
|
||||||
int len,
|
// For Latin1 characters the unicode flag makes no difference.
|
||||||
Vector<const uint8_t> subject) {
|
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
unsigned int old_char = subject[from++];
|
unsigned int old_char = subject[from++];
|
||||||
unsigned int new_char = subject[current++];
|
unsigned int new_char = subject[current++];
|
||||||
@ -523,13 +518,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
|
|||||||
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
|
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
|
||||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
|
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
|
||||||
|
bool unicode =
|
||||||
|
(insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
|
||||||
int from = registers[insn >> BYTECODE_SHIFT];
|
int from = registers[insn >> BYTECODE_SHIFT];
|
||||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||||
if (from >= 0 && len > 0) {
|
if (from >= 0 && len > 0) {
|
||||||
if (current + len > subject.length() ||
|
if (current + len > subject.length() ||
|
||||||
!BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
|
!BackRefMatchesNoCase(isolate, from, current, len, subject,
|
||||||
from, current, len, subject)) {
|
unicode)) {
|
||||||
pc = code_base + Load32Aligned(pc + 4);
|
pc = code_base + Load32Aligned(pc + 4);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -538,13 +536,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
|
|||||||
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
|
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
|
||||||
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
|
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
|
||||||
|
bool unicode = (insn & BYTECODE_MASK) ==
|
||||||
|
BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
|
||||||
int from = registers[insn >> BYTECODE_SHIFT];
|
int from = registers[insn >> BYTECODE_SHIFT];
|
||||||
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
|
||||||
if (from >= 0 && len > 0) {
|
if (from >= 0 && len > 0) {
|
||||||
if (current - len < 0 ||
|
if (current - len < 0 ||
|
||||||
!BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
|
!BackRefMatchesNoCase(isolate, from, current - len, len, subject,
|
||||||
from, current - len, len, subject)) {
|
unicode)) {
|
||||||
pc = code_base + Load32Aligned(pc + 4);
|
pc = code_base + Load32Aligned(pc + 4);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,11 @@
|
|||||||
#include "src/string-search.h"
|
#include "src/string-search.h"
|
||||||
#include "src/unicode-decoder.h"
|
#include "src/unicode-decoder.h"
|
||||||
|
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
#include "unicode/uset.h"
|
||||||
|
#include "unicode/utypes.h"
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
|
||||||
#ifndef V8_INTERPRETED_REGEXP
|
#ifndef V8_INTERPRETED_REGEXP
|
||||||
#if V8_TARGET_ARCH_IA32
|
#if V8_TARGET_ARCH_IA32
|
||||||
#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
|
#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
|
||||||
@ -3420,10 +3425,7 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
|
|||||||
// independent case and it slows us down if we don't know that.
|
// independent case and it slows us down if we don't know that.
|
||||||
if (cc->is_standard(zone())) continue;
|
if (cc->is_standard(zone())) continue;
|
||||||
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
|
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
|
||||||
int range_count = ranges->length();
|
CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
|
||||||
for (int j = 0; j < range_count; j++) {
|
|
||||||
ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3586,13 +3588,6 @@ class AlternativeGenerationList {
|
|||||||
AlternativeGeneration a_few_alt_gens_[kAFew];
|
AlternativeGeneration a_few_alt_gens_[kAFew];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static const uc32 kLeadSurrogateStart = 0xd800;
|
|
||||||
static const uc32 kLeadSurrogateEnd = 0xdbff;
|
|
||||||
static const uc32 kTrailSurrogateStart = 0xdc00;
|
|
||||||
static const uc32 kTrailSurrogateEnd = 0xdfff;
|
|
||||||
static const uc32 kNonBmpStart = 0x10000;
|
|
||||||
static const uc32 kNonBmpEnd = 0x10ffff;
|
|
||||||
static const uc32 kRangeEndMarker = 0x110000;
|
static const uc32 kRangeEndMarker = 0x110000;
|
||||||
|
|
||||||
// The '2' variant is has inclusive from and exclusive to.
|
// The '2' variant is has inclusive from and exclusive to.
|
||||||
@ -4395,8 +4390,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
|
|
||||||
DCHECK_EQ(start_reg_ + 1, end_reg_);
|
DCHECK_EQ(start_reg_ + 1, end_reg_);
|
||||||
if (compiler->ignore_case()) {
|
if (compiler->ignore_case()) {
|
||||||
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
|
assembler->CheckNotBackReferenceIgnoreCase(
|
||||||
trace->backtrack());
|
start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
|
||||||
} else {
|
} else {
|
||||||
assembler->CheckNotBackReference(start_reg_, read_backward(),
|
assembler->CheckNotBackReference(start_reg_, read_backward(),
|
||||||
trace->backtrack());
|
trace->backtrack());
|
||||||
@ -4866,21 +4861,6 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
|
|
||||||
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
|
||||||
CharacterRange::Canonicalize(ranges);
|
|
||||||
for (int i = ranges->length() - 1; i >= 0; i--) {
|
|
||||||
uc32 from = ranges->at(i).from();
|
|
||||||
uc32 to = ranges->at(i).to();
|
|
||||||
// Check for non-BMP characters.
|
|
||||||
if (to >= kNonBmpStart) return true;
|
|
||||||
// Check for lone surrogates.
|
|
||||||
if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
|
UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
|
||||||
ZoneList<CharacterRange>* base)
|
ZoneList<CharacterRange>* base)
|
||||||
: zone_(zone),
|
: zone_(zone),
|
||||||
@ -5120,11 +5100,53 @@ void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
|
||||||
|
ZoneList<CharacterRange>* ranges) {
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
// Use ICU to compute the case fold closure over the ranges.
|
||||||
|
DCHECK(compiler->unicode());
|
||||||
|
DCHECK(compiler->ignore_case());
|
||||||
|
USet* set = uset_openEmpty();
|
||||||
|
for (int i = 0; i < ranges->length(); i++) {
|
||||||
|
uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
|
||||||
|
}
|
||||||
|
ranges->Clear();
|
||||||
|
uset_closeOver(set, USET_CASE_INSENSITIVE);
|
||||||
|
// Full case mapping map single characters to multiple characters.
|
||||||
|
// Those are represented as strings in the set. Remove them so that
|
||||||
|
// we end up with only simple and common case mappings.
|
||||||
|
uset_removeAllStrings(set);
|
||||||
|
int item_count = uset_getItemCount(set);
|
||||||
|
int item_result = 0;
|
||||||
|
UErrorCode ec = U_ZERO_ERROR;
|
||||||
|
Zone* zone = compiler->zone();
|
||||||
|
for (int i = 0; i < item_count; i++) {
|
||||||
|
uc32 start = 0;
|
||||||
|
uc32 end = 0;
|
||||||
|
item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
|
||||||
|
ranges->Add(CharacterRange::Range(start, end), zone);
|
||||||
|
}
|
||||||
|
// No errors and everything we collected have been ranges.
|
||||||
|
DCHECK_EQ(U_ZERO_ERROR, ec);
|
||||||
|
DCHECK_EQ(0, item_result);
|
||||||
|
uset_close(set);
|
||||||
|
#else
|
||||||
|
// Fallback if ICU is not included.
|
||||||
|
CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
|
||||||
|
ranges, compiler->one_byte());
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
CharacterRange::Canonicalize(ranges);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||||
RegExpNode* on_success) {
|
RegExpNode* on_success) {
|
||||||
set_.Canonicalize();
|
set_.Canonicalize();
|
||||||
Zone* zone = compiler->zone();
|
Zone* zone = compiler->zone();
|
||||||
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
||||||
|
if (compiler->unicode() && compiler->ignore_case()) {
|
||||||
|
AddUnicodeCaseEquivalents(compiler, ranges);
|
||||||
|
}
|
||||||
if (compiler->unicode() && !compiler->one_byte()) {
|
if (compiler->unicode() && !compiler->one_byte()) {
|
||||||
if (is_negated()) {
|
if (is_negated()) {
|
||||||
ZoneList<CharacterRange>* negated =
|
ZoneList<CharacterRange>* negated =
|
||||||
@ -5853,16 +5875,19 @@ Vector<const int> CharacterRange::GetWordBounds() {
|
|||||||
void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
||||||
ZoneList<CharacterRange>* ranges,
|
ZoneList<CharacterRange>* ranges,
|
||||||
bool is_one_byte) {
|
bool is_one_byte) {
|
||||||
uc32 bottom = from();
|
int range_count = ranges->length();
|
||||||
uc32 top = to();
|
for (int i = 0; i < range_count; i++) {
|
||||||
// Nothing to be done for surrogates.
|
CharacterRange range = ranges->at(i);
|
||||||
if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
|
uc32 bottom = range.from();
|
||||||
if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
|
uc32 top = range.to();
|
||||||
if (bottom > String::kMaxOneByteCharCode) return;
|
// Nothing to be done for surrogates.
|
||||||
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
|
if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
|
||||||
}
|
if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
|
||||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
if (bottom > String::kMaxOneByteCharCode) return;
|
||||||
if (top == bottom) {
|
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
|
||||||
|
}
|
||||||
|
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||||
|
if (top == bottom) {
|
||||||
// If this is a singleton we just expand the one character.
|
// If this is a singleton we just expand the one character.
|
||||||
int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
|
int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
@ -5914,6 +5939,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
|||||||
pos = end + 1;
|
pos = end + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -6284,7 +6310,7 @@ void TextNode::CalculateOffsets() {
|
|||||||
|
|
||||||
|
|
||||||
void Analysis::VisitText(TextNode* that) {
|
void Analysis::VisitText(TextNode* that) {
|
||||||
if (ignore_case_) {
|
if (ignore_case()) {
|
||||||
that->MakeCaseIndependent(isolate(), is_one_byte_);
|
that->MakeCaseIndependent(isolate(), is_one_byte_);
|
||||||
}
|
}
|
||||||
EnsureAnalyzed(that->on_success());
|
EnsureAnalyzed(that->on_success());
|
||||||
@ -6649,7 +6675,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
|
|||||||
|
|
||||||
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
|
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
|
||||||
data->node = node;
|
data->node = node;
|
||||||
Analysis analysis(isolate, ignore_case, is_one_byte);
|
Analysis analysis(isolate, flags, is_one_byte);
|
||||||
analysis.EnsureAnalyzed(node);
|
analysis.EnsureAnalyzed(node);
|
||||||
if (analysis.has_failed()) {
|
if (analysis.has_failed()) {
|
||||||
const char* error_message = analysis.error_message();
|
const char* error_message = analysis.error_message();
|
||||||
|
@ -19,6 +19,15 @@ class RegExpNode;
|
|||||||
class RegExpTree;
|
class RegExpTree;
|
||||||
class BoyerMooreLookahead;
|
class BoyerMooreLookahead;
|
||||||
|
|
||||||
|
|
||||||
|
static const uc32 kLeadSurrogateStart = 0xd800;
|
||||||
|
static const uc32 kLeadSurrogateEnd = 0xdbff;
|
||||||
|
static const uc32 kTrailSurrogateStart = 0xdc00;
|
||||||
|
static const uc32 kTrailSurrogateEnd = 0xdfff;
|
||||||
|
static const uc32 kNonBmpStart = 0x10000;
|
||||||
|
static const uc32 kNonBmpEnd = 0x10ffff;
|
||||||
|
|
||||||
|
|
||||||
class RegExpImpl {
|
class RegExpImpl {
|
||||||
public:
|
public:
|
||||||
// Whether V8 is compiled with native regexp support or not.
|
// Whether V8 is compiled with native regexp support or not.
|
||||||
@ -1478,9 +1487,9 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
|||||||
// +-------+ +------------+
|
// +-------+ +------------+
|
||||||
class Analysis: public NodeVisitor {
|
class Analysis: public NodeVisitor {
|
||||||
public:
|
public:
|
||||||
Analysis(Isolate* isolate, bool ignore_case, bool is_one_byte)
|
Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
|
||||||
: isolate_(isolate),
|
: isolate_(isolate),
|
||||||
ignore_case_(ignore_case),
|
flags_(flags),
|
||||||
is_one_byte_(is_one_byte),
|
is_one_byte_(is_one_byte),
|
||||||
error_message_(NULL) {}
|
error_message_(NULL) {}
|
||||||
void EnsureAnalyzed(RegExpNode* node);
|
void EnsureAnalyzed(RegExpNode* node);
|
||||||
@ -1502,9 +1511,12 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
|||||||
|
|
||||||
Isolate* isolate() const { return isolate_; }
|
Isolate* isolate() const { return isolate_; }
|
||||||
|
|
||||||
|
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||||
|
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Isolate* isolate_;
|
Isolate* isolate_;
|
||||||
bool ignore_case_;
|
JSRegExp::Flags flags_;
|
||||||
bool is_one_byte_;
|
bool is_one_byte_;
|
||||||
const char* error_message_;
|
const char* error_message_;
|
||||||
|
|
||||||
|
@ -215,7 +215,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
__ lw(a0, register_location(start_reg)); // Index of start of capture.
|
__ lw(a0, register_location(start_reg)); // Index of start of capture.
|
||||||
__ lw(a1, register_location(start_reg + 1)); // Index of end of capture.
|
__ lw(a1, register_location(start_reg + 1)); // Index of end of capture.
|
||||||
@ -310,7 +310,7 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
|||||||
// a0: Address byte_offset1 - Address captured substring's start.
|
// a0: Address byte_offset1 - Address captured substring's start.
|
||||||
// a1: Address byte_offset2 - Address of current character position.
|
// a1: Address byte_offset2 - Address of current character position.
|
||||||
// a2: size_t byte_length - length of capture in bytes(!).
|
// a2: size_t byte_length - length of capture in bytes(!).
|
||||||
// a3: Isolate* isolate.
|
// a3: Isolate* isolate or 0 if unicode flag.
|
||||||
|
|
||||||
// Address of start of capture.
|
// Address of start of capture.
|
||||||
__ Addu(a0, a0, Operand(end_of_input_address()));
|
__ Addu(a0, a0, Operand(end_of_input_address()));
|
||||||
@ -324,7 +324,14 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
|||||||
__ Subu(a1, a1, Operand(s3));
|
__ Subu(a1, a1, Operand(s3));
|
||||||
}
|
}
|
||||||
// Isolate.
|
// Isolate.
|
||||||
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (unicode) {
|
||||||
|
__ li(a3, Operand(zero_reg));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
AllowExternalCallThatCantCauseGC scope(masm_);
|
AllowExternalCallThatCantCauseGC scope(masm_);
|
||||||
|
@ -37,7 +37,7 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
||||||
|
@ -251,7 +251,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
__ ld(a0, register_location(start_reg)); // Index of start of capture.
|
__ ld(a0, register_location(start_reg)); // Index of start of capture.
|
||||||
__ ld(a1, register_location(start_reg + 1)); // Index of end of capture.
|
__ ld(a1, register_location(start_reg + 1)); // Index of end of capture.
|
||||||
@ -346,7 +346,7 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
|||||||
// a0: Address byte_offset1 - Address captured substring's start.
|
// a0: Address byte_offset1 - Address captured substring's start.
|
||||||
// a1: Address byte_offset2 - Address of current character position.
|
// a1: Address byte_offset2 - Address of current character position.
|
||||||
// a2: size_t byte_length - length of capture in bytes(!).
|
// a2: size_t byte_length - length of capture in bytes(!).
|
||||||
// a3: Isolate* isolate.
|
// a3: Isolate* isolate or 0 if unicode flag.
|
||||||
|
|
||||||
// Address of start of capture.
|
// Address of start of capture.
|
||||||
__ Daddu(a0, a0, Operand(end_of_input_address()));
|
__ Daddu(a0, a0, Operand(end_of_input_address()));
|
||||||
@ -360,7 +360,14 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
|
|||||||
__ Dsubu(a1, a1, Operand(s3));
|
__ Dsubu(a1, a1, Operand(s3));
|
||||||
}
|
}
|
||||||
// Isolate.
|
// Isolate.
|
||||||
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (unicode) {
|
||||||
|
__ li(a3, Operand(zero_reg));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
AllowExternalCallThatCantCauseGC scope(masm_);
|
AllowExternalCallThatCantCauseGC scope(masm_);
|
||||||
|
@ -37,7 +37,7 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
||||||
|
@ -108,8 +108,9 @@ class CharacterRange {
|
|||||||
bool is_valid() { return from_ <= to_; }
|
bool is_valid() { return from_ <= to_; }
|
||||||
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
|
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
|
||||||
bool IsSingleton() { return (from_ == to_); }
|
bool IsSingleton() { return (from_ == to_); }
|
||||||
void AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
static void AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
||||||
ZoneList<CharacterRange>* ranges, bool is_one_byte);
|
ZoneList<CharacterRange>* ranges,
|
||||||
|
bool is_one_byte);
|
||||||
// Whether a range list is in canonical form: Ranges ordered by from value,
|
// Whether a range list is in canonical form: Ranges ordered by from value,
|
||||||
// and ranges non-overlapping and non-adjacent.
|
// and ranges non-overlapping and non-adjacent.
|
||||||
static bool IsCanonical(ZoneList<CharacterRange>* ranges);
|
static bool IsCanonical(ZoneList<CharacterRange>* ranges);
|
||||||
@ -293,7 +294,6 @@ class RegExpCharacterClass final : public RegExpTree {
|
|||||||
RegExpCharacterClass* AsCharacterClass() override;
|
RegExpCharacterClass* AsCharacterClass() override;
|
||||||
bool IsCharacterClass() override;
|
bool IsCharacterClass() override;
|
||||||
bool IsTextElement() override { return true; }
|
bool IsTextElement() override { return true; }
|
||||||
bool NeedsDesugaringForUnicode(Zone* zone);
|
|
||||||
int min_match() override { return 1; }
|
int min_match() override { return 1; }
|
||||||
int max_match() override { return 1; }
|
int max_match() override { return 1; }
|
||||||
void AppendToText(RegExpText* text, Zone* zone) override;
|
void AppendToText(RegExpText* text, Zone* zone) override;
|
||||||
@ -310,7 +310,7 @@ class RegExpCharacterClass final : public RegExpTree {
|
|||||||
// W : non-ASCII word character
|
// W : non-ASCII word character
|
||||||
// d : ASCII digit
|
// d : ASCII digit
|
||||||
// D : non-ASCII digit
|
// D : non-ASCII digit
|
||||||
// . : non-unicode non-newline
|
// . : non-newline
|
||||||
// * : All characters, for advancing unanchored regexp
|
// * : All characters, for advancing unanchored regexp
|
||||||
uc16 standard_type() { return set_.standard_set_type(); }
|
uc16 standard_type() { return set_.standard_set_type(); }
|
||||||
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
|
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
|
||||||
|
@ -381,11 +381,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_not_equal) {
|
int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
|
||||||
DCHECK(start_reg >= 0);
|
DCHECK(start_reg >= 0);
|
||||||
DCHECK(start_reg <= kMaxRegister);
|
DCHECK(start_reg <= kMaxRegister);
|
||||||
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
|
Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
|
||||||
: BC_CHECK_NOT_BACK_REF_NO_CASE,
|
: BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
|
||||||
|
: (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
|
||||||
|
: BC_CHECK_NOT_BACK_REF_NO_CASE),
|
||||||
start_reg);
|
start_reg);
|
||||||
EmitOrLink(on_not_equal);
|
EmitOrLink(on_not_equal);
|
||||||
}
|
}
|
||||||
|
@ -82,16 +82,10 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
|
|||||||
uc16 to,
|
uc16 to,
|
||||||
Label* on_not_in_range);
|
Label* on_not_in_range);
|
||||||
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
|
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
|
||||||
virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
|
|
||||||
LoadCurrentCharacter(cp_offset, on_outside_input, true);
|
|
||||||
}
|
|
||||||
virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) {
|
|
||||||
return false; // No custom support for character classes.
|
|
||||||
}
|
|
||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
|
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
|
||||||
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
|
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
|
||||||
|
@ -360,11 +360,11 @@ void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
|
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s %s, label[%08x]);\n",
|
||||||
start_reg, read_backward ? "backward" : "forward",
|
start_reg, read_backward ? "backward" : "forward",
|
||||||
LabelToInt(on_no_match));
|
unicode ? "unicode" : "non-unicode", LabelToInt(on_no_match));
|
||||||
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
|
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward, unicode,
|
||||||
on_no_match);
|
on_no_match);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
virtual void CheckNotCharacterAfterAnd(unsigned c,
|
||||||
|
@ -9,6 +9,10 @@
|
|||||||
#include "src/regexp/regexp-stack.h"
|
#include "src/regexp/regexp-stack.h"
|
||||||
#include "src/simulator.h"
|
#include "src/simulator.h"
|
||||||
|
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
#include "unicode/uchar.h"
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
|
||||||
namespace v8 {
|
namespace v8 {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
@ -23,6 +27,67 @@ RegExpMacroAssembler::~RegExpMacroAssembler() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
|
||||||
|
Address byte_offset2,
|
||||||
|
size_t byte_length,
|
||||||
|
Isolate* isolate) {
|
||||||
|
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||||
|
isolate->regexp_macro_assembler_canonicalize();
|
||||||
|
// This function is not allowed to cause a garbage collection.
|
||||||
|
// A GC might move the calling generated code and invalidate the
|
||||||
|
// return address on the stack.
|
||||||
|
DCHECK(byte_length % 2 == 0);
|
||||||
|
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
|
||||||
|
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
|
||||||
|
size_t length = byte_length >> 1;
|
||||||
|
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (isolate == nullptr) {
|
||||||
|
for (size_t i = 0; i < length; i++) {
|
||||||
|
uc32 c1 = substring1[i];
|
||||||
|
uc32 c2 = substring2[i];
|
||||||
|
if (unibrow::Utf16::IsLeadSurrogate(c1)) {
|
||||||
|
// Non-BMP characters do not have case-equivalents in the BMP.
|
||||||
|
// Both have to be non-BMP for them to be able to match.
|
||||||
|
if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
|
||||||
|
if (i + 1 < length) {
|
||||||
|
uc16 c1t = substring1[i + 1];
|
||||||
|
uc16 c2t = substring2[i + 1];
|
||||||
|
if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
|
||||||
|
unibrow::Utf16::IsTrailSurrogate(c2t)) {
|
||||||
|
c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
|
||||||
|
c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
|
||||||
|
c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
|
||||||
|
if (c1 != c2) return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
DCHECK_NOT_NULL(isolate);
|
||||||
|
for (size_t i = 0; i < length; i++) {
|
||||||
|
unibrow::uchar c1 = substring1[i];
|
||||||
|
unibrow::uchar c2 = substring2[i];
|
||||||
|
if (c1 != c2) {
|
||||||
|
unibrow::uchar s1[1] = {c1};
|
||||||
|
canonicalize->get(c1, '\0', s1);
|
||||||
|
if (s1[0] != c2) {
|
||||||
|
unibrow::uchar s2[1] = {c2};
|
||||||
|
canonicalize->get(c2, '\0', s2);
|
||||||
|
if (s1[0] != s2[0]) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
|
#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM.
|
||||||
|
|
||||||
NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
|
NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
|
||||||
@ -245,40 +310,6 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
|
|
||||||
Address byte_offset1,
|
|
||||||
Address byte_offset2,
|
|
||||||
size_t byte_length,
|
|
||||||
Isolate* isolate) {
|
|
||||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
|
||||||
isolate->regexp_macro_assembler_canonicalize();
|
|
||||||
// This function is not allowed to cause a garbage collection.
|
|
||||||
// A GC might move the calling generated code and invalidate the
|
|
||||||
// return address on the stack.
|
|
||||||
DCHECK(byte_length % 2 == 0);
|
|
||||||
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
|
|
||||||
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
|
|
||||||
size_t length = byte_length >> 1;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < length; i++) {
|
|
||||||
unibrow::uchar c1 = substring1[i];
|
|
||||||
unibrow::uchar c2 = substring2[i];
|
|
||||||
if (c1 != c2) {
|
|
||||||
unibrow::uchar s1[1] = { c1 };
|
|
||||||
canonicalize->get(c1, '\0', s1);
|
|
||||||
if (s1[0] != c2) {
|
|
||||||
unibrow::uchar s2[1] = { c2 };
|
|
||||||
canonicalize->get(c2, '\0', s2);
|
|
||||||
if (s1[0] != s2[0]) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
|
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
|
||||||
Address* stack_base,
|
Address* stack_base,
|
||||||
Isolate* isolate) {
|
Isolate* isolate) {
|
||||||
|
@ -76,7 +76,7 @@ class RegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match) = 0;
|
Label* on_no_match) = 0;
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match) = 0;
|
Label* on_no_match) = 0;
|
||||||
// Check the current character for a match with a literal character. If we
|
// Check the current character for a match with a literal character. If we
|
||||||
// fail to match then goto the on_failure label. End of input always
|
// fail to match then goto the on_failure label. End of input always
|
||||||
@ -146,6 +146,12 @@ class RegExpMacroAssembler {
|
|||||||
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
|
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
|
||||||
virtual void WriteStackPointerToRegister(int reg) = 0;
|
virtual void WriteStackPointerToRegister(int reg) = 0;
|
||||||
|
|
||||||
|
// Compares two-byte strings case insensitively.
|
||||||
|
// Called from generated RegExp code.
|
||||||
|
static int CaseInsensitiveCompareUC16(Address byte_offset1,
|
||||||
|
Address byte_offset2,
|
||||||
|
size_t byte_length, Isolate* isolate);
|
||||||
|
|
||||||
// Controls the generation of large inlined constants in the code.
|
// Controls the generation of large inlined constants in the code.
|
||||||
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
|
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
|
||||||
bool slow_safe() { return slow_safe_compiler_; }
|
bool slow_safe() { return slow_safe_compiler_; }
|
||||||
@ -199,13 +205,6 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
|
|||||||
int previous_index,
|
int previous_index,
|
||||||
Isolate* isolate);
|
Isolate* isolate);
|
||||||
|
|
||||||
// Compares two-byte strings case insensitively.
|
|
||||||
// Called from generated RegExp code.
|
|
||||||
static int CaseInsensitiveCompareUC16(Address byte_offset1,
|
|
||||||
Address byte_offset2,
|
|
||||||
size_t byte_length,
|
|
||||||
Isolate* isolate);
|
|
||||||
|
|
||||||
// Called from RegExp if the backtrack stack limit is hit.
|
// Called from RegExp if the backtrack stack limit is hit.
|
||||||
// Tries to expand the stack. Returns the new stack-pointer if
|
// Tries to expand the stack. Returns the new stack-pointer if
|
||||||
// successful, and updates the stack_top address, or returns 0 if unable
|
// successful, and updates the stack_top address, or returns 0 if unable
|
||||||
|
@ -11,6 +11,10 @@
|
|||||||
#include "src/regexp/jsregexp.h"
|
#include "src/regexp/jsregexp.h"
|
||||||
#include "src/utils.h"
|
#include "src/utils.h"
|
||||||
|
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
#include "unicode/uset.h"
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
|
||||||
namespace v8 {
|
namespace v8 {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
@ -1064,13 +1068,20 @@ void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
|
|||||||
DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
|
DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
|
||||||
if (pending_surrogate_ != kNoPendingSurrogate) {
|
if (pending_surrogate_ != kNoPendingSurrogate) {
|
||||||
uc16 lead_surrogate = pending_surrogate_;
|
uc16 lead_surrogate = pending_surrogate_;
|
||||||
DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
|
|
||||||
ZoneList<uc16> surrogate_pair(2, zone());
|
|
||||||
surrogate_pair.Add(lead_surrogate, zone());
|
|
||||||
surrogate_pair.Add(trail_surrogate, zone());
|
|
||||||
RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
|
|
||||||
pending_surrogate_ = kNoPendingSurrogate;
|
pending_surrogate_ = kNoPendingSurrogate;
|
||||||
AddAtom(atom);
|
DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
|
||||||
|
uc32 combined =
|
||||||
|
unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate);
|
||||||
|
if (NeedsDesugaringForIgnoreCase(combined)) {
|
||||||
|
AddCharacterClass(combined);
|
||||||
|
} else {
|
||||||
|
ZoneList<uc16> surrogate_pair(2, zone());
|
||||||
|
surrogate_pair.Add(lead_surrogate, zone());
|
||||||
|
surrogate_pair.Add(trail_surrogate, zone());
|
||||||
|
RegExpAtom* atom =
|
||||||
|
new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
|
||||||
|
AddAtom(atom);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
pending_surrogate_ = trail_surrogate;
|
pending_surrogate_ = trail_surrogate;
|
||||||
FlushPendingSurrogate();
|
FlushPendingSurrogate();
|
||||||
@ -1080,14 +1091,10 @@ void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
|
|||||||
|
|
||||||
void RegExpBuilder::FlushPendingSurrogate() {
|
void RegExpBuilder::FlushPendingSurrogate() {
|
||||||
if (pending_surrogate_ != kNoPendingSurrogate) {
|
if (pending_surrogate_ != kNoPendingSurrogate) {
|
||||||
// Use character class to desugar lone surrogate matching.
|
|
||||||
RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(
|
|
||||||
CharacterRange::List(zone(),
|
|
||||||
CharacterRange::Singleton(pending_surrogate_)),
|
|
||||||
false);
|
|
||||||
pending_surrogate_ = kNoPendingSurrogate;
|
|
||||||
DCHECK(unicode());
|
DCHECK(unicode());
|
||||||
AddCharacterClass(cc);
|
uc32 c = pending_surrogate_;
|
||||||
|
pending_surrogate_ = kNoPendingSurrogate;
|
||||||
|
AddCharacterClass(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1123,11 +1130,15 @@ void RegExpBuilder::FlushText() {
|
|||||||
void RegExpBuilder::AddCharacter(uc16 c) {
|
void RegExpBuilder::AddCharacter(uc16 c) {
|
||||||
FlushPendingSurrogate();
|
FlushPendingSurrogate();
|
||||||
pending_empty_ = false;
|
pending_empty_ = false;
|
||||||
if (characters_ == NULL) {
|
if (NeedsDesugaringForIgnoreCase(c)) {
|
||||||
characters_ = new (zone()) ZoneList<uc16>(4, zone());
|
AddCharacterClass(c);
|
||||||
|
} else {
|
||||||
|
if (characters_ == NULL) {
|
||||||
|
characters_ = new (zone()) ZoneList<uc16>(4, zone());
|
||||||
|
}
|
||||||
|
characters_->Add(c, zone());
|
||||||
|
LAST(ADD_CHAR);
|
||||||
}
|
}
|
||||||
characters_->Add(c, zone());
|
|
||||||
LAST(ADD_CHAR);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1150,7 +1161,7 @@ void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
|
|||||||
|
|
||||||
|
|
||||||
void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
|
void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
|
||||||
if (unicode() && cc->NeedsDesugaringForUnicode(zone())) {
|
if (NeedsDesugaringForUnicode(cc)) {
|
||||||
// In unicode mode, character class needs to be desugared, so it
|
// In unicode mode, character class needs to be desugared, so it
|
||||||
// must be a standalone term instead of being part of a RegExpText.
|
// must be a standalone term instead of being part of a RegExpText.
|
||||||
AddTerm(cc);
|
AddTerm(cc);
|
||||||
@ -1160,6 +1171,12 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void RegExpBuilder::AddCharacterClass(uc32 c) {
|
||||||
|
AddCharacterClass(new (zone()) RegExpCharacterClass(
|
||||||
|
CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void RegExpBuilder::AddAtom(RegExpTree* term) {
|
void RegExpBuilder::AddAtom(RegExpTree* term) {
|
||||||
if (term->IsEmpty()) {
|
if (term->IsEmpty()) {
|
||||||
AddEmpty();
|
AddEmpty();
|
||||||
@ -1210,6 +1227,47 @@ void RegExpBuilder::FlushTerms() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
|
||||||
|
if (!unicode()) return false;
|
||||||
|
switch (cc->standard_type()) {
|
||||||
|
case 's': // white space
|
||||||
|
case 'w': // ASCII word character
|
||||||
|
case 'd': // ASCII digit
|
||||||
|
return false; // These characters do not need desugaring.
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
|
||||||
|
CharacterRange::Canonicalize(ranges);
|
||||||
|
for (int i = ranges->length() - 1; i >= 0; i--) {
|
||||||
|
uc32 from = ranges->at(i).from();
|
||||||
|
uc32 to = ranges->at(i).to();
|
||||||
|
// Check for non-BMP characters.
|
||||||
|
if (to >= kNonBmpStart) return true;
|
||||||
|
// Check for lone surrogates.
|
||||||
|
if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
|
||||||
|
#ifdef V8_I18N_SUPPORT
|
||||||
|
if (unicode() && ignore_case()) {
|
||||||
|
USet* set = uset_open(c, c);
|
||||||
|
uset_closeOver(set, USET_CASE_INSENSITIVE);
|
||||||
|
uset_removeAllStrings(set);
|
||||||
|
bool result = uset_size(set) > 1;
|
||||||
|
uset_close(set);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// In the case where ICU is not included, we act as if the unicode flag is
|
||||||
|
// not set, and do not desugar.
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
RegExpTree* RegExpBuilder::ToRegExp() {
|
RegExpTree* RegExpBuilder::ToRegExp() {
|
||||||
FlushTerms();
|
FlushTerms();
|
||||||
int num_alternatives = alternatives_.length();
|
int num_alternatives = alternatives_.length();
|
||||||
|
@ -106,6 +106,7 @@ class RegExpBuilder : public ZoneObject {
|
|||||||
// following quantifier
|
// following quantifier
|
||||||
void AddEmpty();
|
void AddEmpty();
|
||||||
void AddCharacterClass(RegExpCharacterClass* cc);
|
void AddCharacterClass(RegExpCharacterClass* cc);
|
||||||
|
void AddCharacterClass(uc32 c);
|
||||||
void AddAtom(RegExpTree* tree);
|
void AddAtom(RegExpTree* tree);
|
||||||
void AddTerm(RegExpTree* tree);
|
void AddTerm(RegExpTree* tree);
|
||||||
void AddAssertion(RegExpTree* tree);
|
void AddAssertion(RegExpTree* tree);
|
||||||
@ -122,8 +123,11 @@ class RegExpBuilder : public ZoneObject {
|
|||||||
void FlushCharacters();
|
void FlushCharacters();
|
||||||
void FlushText();
|
void FlushText();
|
||||||
void FlushTerms();
|
void FlushTerms();
|
||||||
|
bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
|
||||||
|
bool NeedsDesugaringForIgnoreCase(uc32 c);
|
||||||
Zone* zone() const { return zone_; }
|
Zone* zone() const { return zone_; }
|
||||||
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||||
|
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||||
|
|
||||||
Zone* zone_;
|
Zone* zone_;
|
||||||
bool pending_empty_;
|
bool pending_empty_;
|
||||||
|
@ -203,7 +203,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
|
|||||||
|
|
||||||
|
|
||||||
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
|
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
|
||||||
int start_reg, bool read_backward, Label* on_no_match) {
|
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
|
||||||
Label fallthrough;
|
Label fallthrough;
|
||||||
ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
|
ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
|
||||||
ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
|
ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
|
||||||
@ -308,8 +308,10 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
|
|||||||
// Address byte_offset1 - Address captured substring's start.
|
// Address byte_offset1 - Address captured substring's start.
|
||||||
// Address byte_offset2 - Address of current character position.
|
// Address byte_offset2 - Address of current character position.
|
||||||
// size_t byte_length - length of capture in bytes(!)
|
// size_t byte_length - length of capture in bytes(!)
|
||||||
// Isolate* isolate
|
// Isolate* isolate or 0 if unicode flag.
|
||||||
#ifdef _WIN64
|
#ifdef _WIN64
|
||||||
|
DCHECK(rcx.is(arg_reg_1));
|
||||||
|
DCHECK(rdx.is(arg_reg_2));
|
||||||
// Compute and set byte_offset1 (start of capture).
|
// Compute and set byte_offset1 (start of capture).
|
||||||
__ leap(rcx, Operand(rsi, rdx, times_1, 0));
|
__ leap(rcx, Operand(rsi, rdx, times_1, 0));
|
||||||
// Set byte_offset2.
|
// Set byte_offset2.
|
||||||
@ -317,11 +319,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
|
|||||||
if (read_backward) {
|
if (read_backward) {
|
||||||
__ subq(rdx, rbx);
|
__ subq(rdx, rbx);
|
||||||
}
|
}
|
||||||
// Set byte_length.
|
|
||||||
__ movp(r8, rbx);
|
|
||||||
// Isolate.
|
|
||||||
__ LoadAddress(r9, ExternalReference::isolate_address(isolate()));
|
|
||||||
#else // AMD64 calling convention
|
#else // AMD64 calling convention
|
||||||
|
DCHECK(rdi.is(arg_reg_1));
|
||||||
|
DCHECK(rsi.is(arg_reg_2));
|
||||||
// Compute byte_offset2 (current position = rsi+rdi).
|
// Compute byte_offset2 (current position = rsi+rdi).
|
||||||
__ leap(rax, Operand(rsi, rdi, times_1, 0));
|
__ leap(rax, Operand(rsi, rdi, times_1, 0));
|
||||||
// Compute and set byte_offset1 (start of capture).
|
// Compute and set byte_offset1 (start of capture).
|
||||||
@ -331,11 +331,19 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
|
|||||||
if (read_backward) {
|
if (read_backward) {
|
||||||
__ subq(rsi, rbx);
|
__ subq(rsi, rbx);
|
||||||
}
|
}
|
||||||
|
#endif // _WIN64
|
||||||
|
|
||||||
// Set byte_length.
|
// Set byte_length.
|
||||||
__ movp(rdx, rbx);
|
__ movp(arg_reg_3, rbx);
|
||||||
// Isolate.
|
// Isolate.
|
||||||
__ LoadAddress(rcx, ExternalReference::isolate_address(isolate()));
|
#ifdef V8_I18N_SUPPORT
|
||||||
#endif
|
if (unicode) {
|
||||||
|
__ movp(arg_reg_4, Immediate(0));
|
||||||
|
} else // NOLINT
|
||||||
|
#endif // V8_I18N_SUPPORT
|
||||||
|
{
|
||||||
|
__ LoadAddress(arg_reg_4, ExternalReference::isolate_address(isolate()));
|
||||||
|
}
|
||||||
|
|
||||||
{ // NOLINT: Can't find a way to open this scope without confusing the
|
{ // NOLINT: Can't find a way to open this scope without confusing the
|
||||||
// linter.
|
// linter.
|
||||||
|
@ -38,7 +38,7 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
|
|||||||
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
virtual void CheckNotBackReference(int start_reg, bool read_backward,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
|
||||||
bool read_backward,
|
bool read_backward, bool unicode,
|
||||||
Label* on_no_match);
|
Label* on_no_match);
|
||||||
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
|
||||||
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
virtual void CheckNotCharacterAfterAnd(uint32_t c,
|
||||||
|
@ -1186,16 +1186,16 @@ TEST(MacroAssemblerNativeBackRefNoCase) {
|
|||||||
m.WriteCurrentPositionToRegister(2, 0);
|
m.WriteCurrentPositionToRegister(2, 0);
|
||||||
m.AdvanceCurrentPosition(3);
|
m.AdvanceCurrentPosition(3);
|
||||||
m.WriteCurrentPositionToRegister(3, 0);
|
m.WriteCurrentPositionToRegister(3, 0);
|
||||||
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "AbC".
|
m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
|
||||||
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "ABC".
|
m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
|
||||||
Label expected_fail;
|
Label expected_fail;
|
||||||
m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
|
m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
|
||||||
m.Bind(&fail);
|
m.Bind(&fail);
|
||||||
m.Fail();
|
m.Fail();
|
||||||
|
|
||||||
m.Bind(&expected_fail);
|
m.Bind(&expected_fail);
|
||||||
m.AdvanceCurrentPosition(3); // Skip "xYz"
|
m.AdvanceCurrentPosition(3); // Skip "xYz"
|
||||||
m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
|
m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
|
||||||
m.Fail();
|
m.Fail();
|
||||||
|
|
||||||
m.Bind(&succ);
|
m.Bind(&succ);
|
||||||
@ -1629,7 +1629,9 @@ static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
|
|||||||
int count = expected.length();
|
int count = expected.length();
|
||||||
ZoneList<CharacterRange>* list =
|
ZoneList<CharacterRange>* list =
|
||||||
new(&zone) ZoneList<CharacterRange>(count, &zone);
|
new(&zone) ZoneList<CharacterRange>(count, &zone);
|
||||||
input.AddCaseEquivalents(isolate, &zone, list, false);
|
list->Add(input, &zone);
|
||||||
|
CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
|
||||||
|
list->Remove(0); // Remove the input before checking results.
|
||||||
CHECK_EQ(count, list->length());
|
CHECK_EQ(count, list->length());
|
||||||
for (int i = 0; i < list->length(); i++) {
|
for (int i = 0; i < list->length(); i++) {
|
||||||
CHECK_EQ(expected[i].from(), list->at(i).from());
|
CHECK_EQ(expected[i].from(), list->at(i).from());
|
||||||
|
52
test/mjsunit/harmony/unicode-regexp-ignore-case-noi18n.js
Normal file
52
test/mjsunit/harmony/unicode-regexp-ignore-case-noi18n.js
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
// Flags: --harmony-unicode-regexps
|
||||||
|
|
||||||
|
// Non-unicode use toUpperCase mappings.
|
||||||
|
assertFalse(/[\u00e5]/i.test("\u212b"));
|
||||||
|
assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
|
||||||
|
assertFalse(/[\u212b]/i.test("\u00e5"));
|
||||||
|
|
||||||
|
assertTrue("\u212b".toLowerCase() == "\u00e5");
|
||||||
|
assertTrue("\u00c5".toLowerCase() == "\u00e5");
|
||||||
|
assertTrue("\u00e5".toUpperCase() == "\u00c5");
|
||||||
|
|
||||||
|
// Unicode uses case folding mappings.
|
||||||
|
assertFalse(/\u00e5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00e5/ui.test("\u00c5"));
|
||||||
|
assertTrue(/\u00e5/ui.test("\u00e5"));
|
||||||
|
assertFalse(/\u00e5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00c5/ui.test("\u00e5"));
|
||||||
|
assertFalse(/\u00c5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00c5/ui.test("\u00c5"));
|
||||||
|
assertFalse(/\u212b/ui.test("\u00c5"));
|
||||||
|
assertFalse(/\u212b/ui.test("\u00e5"));
|
||||||
|
assertTrue(/\u212b/ui.test("\u212b"));
|
||||||
|
|
||||||
|
// Non-BMP.
|
||||||
|
assertFalse(/\u{10400}/i.test("\u{10428}"));
|
||||||
|
assertFalse(/\u{10400}/ui.test("\u{10428}"));
|
||||||
|
assertFalse(/\ud801\udc00/ui.test("\u{10428}"));
|
||||||
|
assertFalse(/[\u{10428}]/ui.test("\u{10400}"));
|
||||||
|
assertFalse(/[\ud801\udc28]/ui.test("\u{10400}"));
|
||||||
|
assertEquals(["\uff21\u{10400}"],
|
||||||
|
/[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
|
||||||
|
assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
|
||||||
|
assertEquals(["\uff53\u24bb"],
|
||||||
|
/[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
|
||||||
|
|
||||||
|
// Full mappings are ignored.
|
||||||
|
assertFalse(/\u00df/ui.test("SS"));
|
||||||
|
assertFalse(/\u1f8d/ui.test("\u1f05\u03b9"));
|
||||||
|
|
||||||
|
// Simple mappings.
|
||||||
|
assertFalse(/\u1f8d/ui.test("\u1f85"));
|
||||||
|
|
||||||
|
// Common mappings.
|
||||||
|
assertTrue(/\u1f6b/ui.test("\u1f63"));
|
||||||
|
|
||||||
|
// Back references.
|
||||||
|
assertNull(/(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
|
||||||
|
assertNull(/(.)\1/ui.exec("\u{118aa}\u{118ca}"));
|
54
test/mjsunit/harmony/unicode-regexp-ignore-case.js
Normal file
54
test/mjsunit/harmony/unicode-regexp-ignore-case.js
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
// Flags: --harmony-unicode-regexps
|
||||||
|
|
||||||
|
// Non-unicode use toUpperCase mappings.
|
||||||
|
assertFalse(/[\u00e5]/i.test("\u212b"));
|
||||||
|
assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
|
||||||
|
assertFalse(/[\u212b]/i.test("\u00e5"));
|
||||||
|
|
||||||
|
assertTrue("\u212b".toLowerCase() == "\u00e5");
|
||||||
|
assertTrue("\u00c5".toLowerCase() == "\u00e5");
|
||||||
|
assertTrue("\u00e5".toUpperCase() == "\u00c5");
|
||||||
|
|
||||||
|
// Unicode uses case folding mappings.
|
||||||
|
assertTrue(/\u00e5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00e5/ui.test("\u00c5"));
|
||||||
|
assertTrue(/\u00e5/ui.test("\u00e5"));
|
||||||
|
assertTrue(/\u00e5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00c5/ui.test("\u00e5"));
|
||||||
|
assertTrue(/\u00c5/ui.test("\u212b"));
|
||||||
|
assertTrue(/\u00c5/ui.test("\u00c5"));
|
||||||
|
assertTrue(/\u212b/ui.test("\u00c5"));
|
||||||
|
assertTrue(/\u212b/ui.test("\u00e5"));
|
||||||
|
assertTrue(/\u212b/ui.test("\u212b"));
|
||||||
|
|
||||||
|
// Non-BMP.
|
||||||
|
assertFalse(/\u{10400}/i.test("\u{10428}"));
|
||||||
|
assertTrue(/\u{10400}/ui.test("\u{10428}"));
|
||||||
|
assertTrue(/\ud801\udc00/ui.test("\u{10428}"));
|
||||||
|
assertTrue(/[\u{10428}]/ui.test("\u{10400}"));
|
||||||
|
assertTrue(/[\ud801\udc28]/ui.test("\u{10400}"));
|
||||||
|
assertEquals(["\uff21\u{10400}"],
|
||||||
|
/[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
|
||||||
|
assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
|
||||||
|
assertEquals(["\uff53\u24bb"],
|
||||||
|
/[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
|
||||||
|
|
||||||
|
// Full mappings are ignored.
|
||||||
|
assertFalse(/\u00df/ui.test("SS"));
|
||||||
|
assertFalse(/\u1f8d/ui.test("\u1f05\u03b9"));
|
||||||
|
|
||||||
|
// Simple mappings work.
|
||||||
|
assertTrue(/\u1f8d/ui.test("\u1f85"));
|
||||||
|
|
||||||
|
// Common mappings work.
|
||||||
|
assertTrue(/\u1f6b/ui.test("\u1f63"));
|
||||||
|
|
||||||
|
// Back references.
|
||||||
|
assertEquals(["\u00e5\u212b\u00c5", "\u00e5"],
|
||||||
|
/(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
|
||||||
|
assertEquals(["\u{118aa}\u{118ca}", "\u{118aa}"],
|
||||||
|
/(.)\1/ui.exec("\u{118aa}\u{118ca}"));
|
@ -289,6 +289,10 @@
|
|||||||
|
|
||||||
# TODO(titzer): SSE 4.1 required for asm-wasm test (floor).
|
# TODO(titzer): SSE 4.1 required for asm-wasm test (floor).
|
||||||
'wasm/asm-wasm': [SKIP],
|
'wasm/asm-wasm': [SKIP],
|
||||||
|
|
||||||
|
# case-insensitive unicode regexp relies on case mapping provided by ICU.
|
||||||
|
'harmony/unicode-regexp-ignore-case': [PASS, ['no_i18n == True', FAIL]],
|
||||||
|
'harmony/unicode-regexp-ignore-case-noi18n': [FAIL, ['no_i18n == True', PASS]],
|
||||||
}], # ALWAYS
|
}], # ALWAYS
|
||||||
|
|
||||||
['novfp3 == True', {
|
['novfp3 == True', {
|
||||||
|
Loading…
Reference in New Issue
Block a user