RegExp bitmap test for word character.
Review URL: http://codereview.chromium.org/547024 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3626 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
a5ac66628d
commit
eee6c6405e
@ -526,64 +526,54 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
|
||||
return true;
|
||||
}
|
||||
case 'n': {
|
||||
// Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
|
||||
__ eor(r0, current_character(), Operand(0x01));
|
||||
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
|
||||
__ sub(r0, r0, Operand(0x0b));
|
||||
__ cmp(r0, Operand(0x0c - 0x0b));
|
||||
if (mode_ == ASCII) {
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
} else {
|
||||
Label done;
|
||||
__ b(ls, &done);
|
||||
// Compare original value to 0x2028 and 0x2029, using the already
|
||||
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
|
||||
// 0x201d (0x2028 - 0x0b) or 0x201e.
|
||||
__ sub(r0, r0, Operand(0x2028 - 0x0b));
|
||||
__ cmp(r0, Operand(1));
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
__ bind(&done);
|
||||
}
|
||||
return true;
|
||||
// Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
|
||||
__ eor(r0, current_character(), Operand(0x01));
|
||||
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
|
||||
__ sub(r0, r0, Operand(0x0b));
|
||||
__ cmp(r0, Operand(0x0c - 0x0b));
|
||||
if (mode_ == ASCII) {
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
} else {
|
||||
Label done;
|
||||
__ b(ls, &done);
|
||||
// Compare original value to 0x2028 and 0x2029, using the already
|
||||
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
|
||||
// 0x201d (0x2028 - 0x0b) or 0x201e.
|
||||
__ sub(r0, r0, Operand(0x2028 - 0x0b));
|
||||
__ cmp(r0, Operand(1));
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
__ bind(&done);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case 'w': {
|
||||
// Match word character (0-9, A-Z, a-z and _).
|
||||
Label digits, done;
|
||||
__ cmp(current_character(), Operand('9'));
|
||||
__ b(ls, &digits);
|
||||
__ cmp(current_character(), Operand('_'));
|
||||
__ b(eq, &done);
|
||||
__ orr(r0, current_character(), Operand(0x20));
|
||||
__ sub(r0, r0, Operand('a'));
|
||||
__ cmp(r0, Operand('z' - 'a'));
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
__ jmp(&done);
|
||||
|
||||
__ bind(&digits);
|
||||
__ cmp(current_character(), Operand('0'));
|
||||
BranchOrBacktrack(lo, on_no_match);
|
||||
__ bind(&done);
|
||||
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmp(current_character(), Operand('z'));
|
||||
BranchOrBacktrack(hi, on_no_match);
|
||||
}
|
||||
ExternalReference map = ExternalReference::re_word_character_map();
|
||||
__ mov(r0, Operand(map));
|
||||
__ ldrb(r0, MemOperand(r0, current_character()));
|
||||
__ tst(r0, Operand(r0));
|
||||
BranchOrBacktrack(eq, on_no_match);
|
||||
return true;
|
||||
}
|
||||
case 'W': {
|
||||
// Match non-word character (not 0-9, A-Z, a-z and _).
|
||||
Label digits, done;
|
||||
__ cmp(current_character(), Operand('9'));
|
||||
__ b(ls, &digits);
|
||||
__ cmp(current_character(), Operand('_'));
|
||||
BranchOrBacktrack(eq, on_no_match);
|
||||
__ orr(r0, current_character(), Operand(0x20));
|
||||
__ sub(r0, r0, Operand('a'));
|
||||
__ cmp(r0, Operand('z' - 'a'));
|
||||
BranchOrBacktrack(ls, on_no_match);
|
||||
__ jmp(&done);
|
||||
|
||||
__ bind(&digits);
|
||||
__ cmp(current_character(), Operand('0'));
|
||||
BranchOrBacktrack(hs, on_no_match);
|
||||
__ bind(&done);
|
||||
|
||||
Label done;
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmp(current_character(), Operand('z'));
|
||||
__ b(hi, &done);
|
||||
}
|
||||
ExternalReference map = ExternalReference::re_word_character_map();
|
||||
__ mov(r0, Operand(map));
|
||||
__ ldrb(r0, MemOperand(r0, current_character()));
|
||||
__ tst(r0, Operand(r0));
|
||||
BranchOrBacktrack(ne, on_no_match);
|
||||
if (mode_ != ASCII) {
|
||||
__ bind(&done);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case '*':
|
||||
|
@ -670,6 +670,10 @@ ExternalReference ExternalReference::re_case_insensitive_compare_uc16() {
|
||||
FUNCTION_ADDR(NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)));
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::re_word_character_map() {
|
||||
return ExternalReference(
|
||||
NativeRegExpMacroAssembler::word_character_map_address());
|
||||
}
|
||||
|
||||
ExternalReference ExternalReference::address_of_static_offsets_vector() {
|
||||
return ExternalReference(OffsetsVector::static_offsets_vector_address());
|
||||
|
@ -462,6 +462,10 @@ class ExternalReference BASE_EMBEDDED {
|
||||
|
||||
// Function NativeRegExpMacroAssembler::GrowStack()
|
||||
static ExternalReference re_grow_stack();
|
||||
|
||||
// byte NativeRegExpMacroAssembler::word_character_bitmap
|
||||
static ExternalReference re_word_character_map();
|
||||
|
||||
#endif
|
||||
|
||||
// This lets you register a function that rewrites all external references.
|
||||
|
@ -1261,6 +1261,14 @@ void Assembler::test(Register reg, const Operand& op) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::test_b(Register reg, const Operand& op) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
EMIT(0x84);
|
||||
emit_operand(reg, op);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::test(const Operand& op, const Immediate& imm) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
|
@ -624,6 +624,7 @@ class Assembler : public Malloced {
|
||||
|
||||
void test(Register reg, const Immediate& imm);
|
||||
void test(Register reg, const Operand& op);
|
||||
void test_b(Register reg, const Operand& op);
|
||||
void test(const Operand& op, const Immediate& imm);
|
||||
|
||||
void xor_(Register dst, int32_t imm32);
|
||||
|
@ -63,6 +63,7 @@ static ByteMnemonic two_operands_instr[] = {
|
||||
{0x29, "sub", OPER_REG_OP_ORDER},
|
||||
{0x2A, "subb", REG_OPER_OP_ORDER},
|
||||
{0x2B, "sub", REG_OPER_OP_ORDER},
|
||||
{0x84, "test_b", REG_OPER_OP_ORDER},
|
||||
{0x85, "test", REG_OPER_OP_ORDER},
|
||||
{0x31, "xor", OPER_REG_OP_ORDER},
|
||||
{0x33, "xor", REG_OPER_OP_ORDER},
|
||||
|
@ -539,46 +539,33 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
|
||||
return true;
|
||||
}
|
||||
case 'w': {
|
||||
Label done, check_digits;
|
||||
__ cmp(Operand(current_character()), Immediate('9'));
|
||||
__ j(less_equal, &check_digits);
|
||||
__ cmp(Operand(current_character()), Immediate('_'));
|
||||
__ j(equal, &done);
|
||||
// Convert to lower case if letter.
|
||||
__ mov(Operand(eax), current_character());
|
||||
__ or_(eax, 0x20);
|
||||
// check current character in range ['a'..'z'], nondestructively.
|
||||
__ sub(Operand(eax), Immediate('a'));
|
||||
__ cmp(Operand(eax), Immediate('z' - 'a'));
|
||||
BranchOrBacktrack(above, on_no_match);
|
||||
__ jmp(&done);
|
||||
__ bind(&check_digits);
|
||||
// Check current character in range ['0'..'9'].
|
||||
__ cmp(Operand(current_character()), Immediate('0'));
|
||||
BranchOrBacktrack(below, on_no_match);
|
||||
__ bind(&done);
|
||||
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmp(Operand(current_character()), Immediate('z'));
|
||||
BranchOrBacktrack(above, on_no_match);
|
||||
}
|
||||
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
|
||||
ExternalReference word_map = ExternalReference::re_word_character_map();
|
||||
__ test_b(current_character(),
|
||||
Operand::StaticArray(current_character(), times_1, word_map));
|
||||
BranchOrBacktrack(zero, on_no_match);
|
||||
return true;
|
||||
}
|
||||
case 'W': {
|
||||
Label done, check_digits;
|
||||
__ cmp(Operand(current_character()), Immediate('9'));
|
||||
__ j(less_equal, &check_digits);
|
||||
__ cmp(Operand(current_character()), Immediate('_'));
|
||||
BranchOrBacktrack(equal, on_no_match);
|
||||
// Convert to lower case if letter.
|
||||
__ mov(Operand(eax), current_character());
|
||||
__ or_(eax, 0x20);
|
||||
// check current character in range ['a'..'z'], nondestructively.
|
||||
__ sub(Operand(eax), Immediate('a'));
|
||||
__ cmp(Operand(eax), Immediate('z' - 'a'));
|
||||
BranchOrBacktrack(below_equal, on_no_match);
|
||||
__ jmp(&done);
|
||||
__ bind(&check_digits);
|
||||
// Check current character in range ['0'..'9'].
|
||||
__ cmp(Operand(current_character()), Immediate('0'));
|
||||
BranchOrBacktrack(above_equal, on_no_match);
|
||||
__ bind(&done);
|
||||
Label done;
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmp(Operand(current_character()), Immediate('z'));
|
||||
__ j(above, &done);
|
||||
}
|
||||
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
|
||||
ExternalReference word_map = ExternalReference::re_word_character_map();
|
||||
__ test_b(current_character(),
|
||||
Operand::StaticArray(current_character(), times_1, word_map));
|
||||
BranchOrBacktrack(not_zero, on_no_match);
|
||||
if (mode_ != ASCII) {
|
||||
__ bind(&done);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// Non-standard classes (with no syntactic shorthand) used internally.
|
||||
|
@ -189,6 +189,30 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
|
||||
|
||||
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
|
||||
|
||||
|
||||
byte NativeRegExpMacroAssembler::word_character_map[] = {
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
|
||||
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
|
||||
0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
|
||||
|
||||
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
|
||||
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
|
||||
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
|
||||
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
|
||||
|
||||
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
|
||||
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
|
||||
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
|
||||
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
|
||||
};
|
||||
|
||||
|
||||
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
|
||||
Address byte_offset1,
|
||||
Address byte_offset2,
|
||||
|
@ -204,6 +204,15 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
|
||||
|
||||
static const byte* StringCharacterPosition(String* subject, int start_index);
|
||||
|
||||
// Byte map of ASCII characters with a 0xff if the character is a word
|
||||
// character (digit, letter or underscore) and 0x00 otherwise.
|
||||
// Used by generated RegExp code.
|
||||
static byte word_character_map[128];
|
||||
|
||||
static Address word_character_map_address() {
|
||||
return &word_character_map[0];
|
||||
}
|
||||
|
||||
static Result Execute(Code* code,
|
||||
String* input,
|
||||
int start_offset,
|
||||
|
@ -479,15 +479,19 @@ void ExternalReferenceTable::PopulateTable() {
|
||||
UNCLASSIFIED,
|
||||
21,
|
||||
"NativeRegExpMacroAssembler::GrowStack()");
|
||||
Add(ExternalReference::re_word_character_map().address(),
|
||||
UNCLASSIFIED,
|
||||
22,
|
||||
"NativeRegExpMacroAssembler::word_character_map");
|
||||
#endif
|
||||
// Keyed lookup cache.
|
||||
Add(ExternalReference::keyed_lookup_cache_keys().address(),
|
||||
UNCLASSIFIED,
|
||||
22,
|
||||
23,
|
||||
"KeyedLookupCache::keys()");
|
||||
Add(ExternalReference::keyed_lookup_cache_field_offsets().address(),
|
||||
UNCLASSIFIED,
|
||||
23,
|
||||
24,
|
||||
"KeyedLookupCache::field_offsets()");
|
||||
}
|
||||
|
||||
|
@ -1880,6 +1880,20 @@ void Assembler::testb(const Operand& op, Immediate mask) {
|
||||
}
|
||||
|
||||
|
||||
void Assembler::testb(const Operand& op, Register reg) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
if (reg.code() > 3) {
|
||||
// Register is not one of al, bl, cl, dl. Its encoding needs REX.
|
||||
emit_rex_32(reg, op);
|
||||
} else {
|
||||
emit_optional_rex_32(reg, op);
|
||||
}
|
||||
emit(0x84);
|
||||
emit_operand(reg, op);
|
||||
}
|
||||
|
||||
|
||||
void Assembler::testl(Register dst, Register src) {
|
||||
EnsureSpace ensure_space(this);
|
||||
last_pc_ = pc_;
|
||||
|
@ -931,6 +931,7 @@ class Assembler : public Malloced {
|
||||
void testb(Register dst, Register src);
|
||||
void testb(Register reg, Immediate mask);
|
||||
void testb(const Operand& op, Immediate mask);
|
||||
void testb(const Operand& op, Register reg);
|
||||
void testl(Register dst, Register src);
|
||||
void testl(Register reg, Immediate mask);
|
||||
void testl(const Operand& op, Immediate mask);
|
||||
|
@ -582,49 +582,38 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
|
||||
return true;
|
||||
}
|
||||
case 'w': {
|
||||
Label done, check_digits;
|
||||
__ cmpl(current_character(), Immediate('9'));
|
||||
__ j(less_equal, &check_digits);
|
||||
__ cmpl(current_character(), Immediate('_'));
|
||||
__ j(equal, &done);
|
||||
// Convert to lower case if letter.
|
||||
__ movl(rax, current_character());
|
||||
__ orl(rax, Immediate(0x20));
|
||||
// check rax in range ['a'..'z'].
|
||||
__ subl(rax, Immediate('a'));
|
||||
__ cmpl(rax, Immediate('z' - 'a'));
|
||||
BranchOrBacktrack(above, on_no_match);
|
||||
__ jmp(&done);
|
||||
__ bind(&check_digits);
|
||||
// Check current character in range ['0'..'9'].
|
||||
__ cmpl(current_character(), Immediate('0'));
|
||||
BranchOrBacktrack(below, on_no_match);
|
||||
__ bind(&done);
|
||||
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmpl(current_character(), Immediate('z'));
|
||||
BranchOrBacktrack(above, on_no_match);
|
||||
}
|
||||
__ movq(rbx, ExternalReference::re_word_character_map());
|
||||
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
|
||||
ExternalReference word_map = ExternalReference::re_word_character_map();
|
||||
__ testb(Operand(rbx, current_character(), times_1, 0),
|
||||
current_character());
|
||||
BranchOrBacktrack(zero, on_no_match);
|
||||
return true;
|
||||
}
|
||||
case 'W': {
|
||||
Label done, check_digits;
|
||||
__ cmpl(current_character(), Immediate('9'));
|
||||
__ j(less_equal, &check_digits);
|
||||
__ cmpl(current_character(), Immediate('_'));
|
||||
BranchOrBacktrack(equal, on_no_match);
|
||||
// Convert to lower case if letter.
|
||||
__ movl(rax, current_character());
|
||||
__ orl(rax, Immediate(0x20));
|
||||
// check current character in range ['a'..'z'], nondestructively.
|
||||
__ subl(rax, Immediate('a'));
|
||||
__ cmpl(rax, Immediate('z' - 'a'));
|
||||
BranchOrBacktrack(below_equal, on_no_match);
|
||||
__ jmp(&done);
|
||||
__ bind(&check_digits);
|
||||
// Check current character in range ['0'..'9'].
|
||||
__ cmpl(current_character(), Immediate('0'));
|
||||
BranchOrBacktrack(above_equal, on_no_match);
|
||||
__ bind(&done);
|
||||
|
||||
Label done;
|
||||
if (mode_ != ASCII) {
|
||||
// Table is 128 entries, so all ASCII characters can be tested.
|
||||
__ cmpl(current_character(), Immediate('z'));
|
||||
__ j(above, &done);
|
||||
}
|
||||
__ movq(rbx, ExternalReference::re_word_character_map());
|
||||
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
|
||||
ExternalReference word_map = ExternalReference::re_word_character_map();
|
||||
__ testb(Operand(rbx, current_character(), times_1, 0),
|
||||
current_character());
|
||||
BranchOrBacktrack(not_zero, on_no_match);
|
||||
if (mode_ != ASCII) {
|
||||
__ bind(&done);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
case '*':
|
||||
// Match any character.
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user