RegExp bitmap test for word character.

Review URL: http://codereview.chromium.org/547024

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3626 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-01-18 09:49:50 +00:00
parent a5ac66628d
commit eee6c6405e
13 changed files with 166 additions and 130 deletions

View File

@ -526,64 +526,54 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
return true;
}
case 'n': {
// Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
__ sub(r0, r0, Operand(0x0b));
__ cmp(r0, Operand(0x0c - 0x0b));
if (mode_ == ASCII) {
BranchOrBacktrack(hi, on_no_match);
} else {
Label done;
__ b(ls, &done);
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
__ sub(r0, r0, Operand(0x2028 - 0x0b));
__ cmp(r0, Operand(1));
BranchOrBacktrack(hi, on_no_match);
__ bind(&done);
}
return true;
// Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
__ eor(r0, current_character(), Operand(0x01));
// See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
__ sub(r0, r0, Operand(0x0b));
__ cmp(r0, Operand(0x0c - 0x0b));
if (mode_ == ASCII) {
BranchOrBacktrack(hi, on_no_match);
} else {
Label done;
__ b(ls, &done);
// Compare original value to 0x2028 and 0x2029, using the already
// computed (current_char ^ 0x01 - 0x0b). I.e., check for
// 0x201d (0x2028 - 0x0b) or 0x201e.
__ sub(r0, r0, Operand(0x2028 - 0x0b));
__ cmp(r0, Operand(1));
BranchOrBacktrack(hi, on_no_match);
__ bind(&done);
}
return true;
}
case 'w': {
// Match word character (0-9, A-Z, a-z and _).
Label digits, done;
__ cmp(current_character(), Operand('9'));
__ b(ls, &digits);
__ cmp(current_character(), Operand('_'));
__ b(eq, &done);
__ orr(r0, current_character(), Operand(0x20));
__ sub(r0, r0, Operand('a'));
__ cmp(r0, Operand('z' - 'a'));
BranchOrBacktrack(hi, on_no_match);
__ jmp(&done);
__ bind(&digits);
__ cmp(current_character(), Operand('0'));
BranchOrBacktrack(lo, on_no_match);
__ bind(&done);
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmp(current_character(), Operand('z'));
BranchOrBacktrack(hi, on_no_match);
}
ExternalReference map = ExternalReference::re_word_character_map();
__ mov(r0, Operand(map));
__ ldrb(r0, MemOperand(r0, current_character()));
__ tst(r0, Operand(r0));
BranchOrBacktrack(eq, on_no_match);
return true;
}
case 'W': {
// Match non-word character (not 0-9, A-Z, a-z and _).
Label digits, done;
__ cmp(current_character(), Operand('9'));
__ b(ls, &digits);
__ cmp(current_character(), Operand('_'));
BranchOrBacktrack(eq, on_no_match);
__ orr(r0, current_character(), Operand(0x20));
__ sub(r0, r0, Operand('a'));
__ cmp(r0, Operand('z' - 'a'));
BranchOrBacktrack(ls, on_no_match);
__ jmp(&done);
__ bind(&digits);
__ cmp(current_character(), Operand('0'));
BranchOrBacktrack(hs, on_no_match);
__ bind(&done);
Label done;
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmp(current_character(), Operand('z'));
__ b(hi, &done);
}
ExternalReference map = ExternalReference::re_word_character_map();
__ mov(r0, Operand(map));
__ ldrb(r0, MemOperand(r0, current_character()));
__ tst(r0, Operand(r0));
BranchOrBacktrack(ne, on_no_match);
if (mode_ != ASCII) {
__ bind(&done);
}
return true;
}
case '*':

View File

@ -670,6 +670,10 @@ ExternalReference ExternalReference::re_case_insensitive_compare_uc16() {
FUNCTION_ADDR(NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)));
}
ExternalReference ExternalReference::re_word_character_map() {
return ExternalReference(
NativeRegExpMacroAssembler::word_character_map_address());
}
ExternalReference ExternalReference::address_of_static_offsets_vector() {
return ExternalReference(OffsetsVector::static_offsets_vector_address());

View File

@ -462,6 +462,10 @@ class ExternalReference BASE_EMBEDDED {
// Function NativeRegExpMacroAssembler::GrowStack()
static ExternalReference re_grow_stack();
// byte NativeRegExpMacroAssembler::word_character_bitmap
static ExternalReference re_word_character_map();
#endif
// This lets you register a function that rewrites all external references.

View File

@ -1261,6 +1261,14 @@ void Assembler::test(Register reg, const Operand& op) {
}
void Assembler::test_b(Register reg, const Operand& op) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
EMIT(0x84);
emit_operand(reg, op);
}
void Assembler::test(const Operand& op, const Immediate& imm) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;

View File

@ -624,6 +624,7 @@ class Assembler : public Malloced {
void test(Register reg, const Immediate& imm);
void test(Register reg, const Operand& op);
void test_b(Register reg, const Operand& op);
void test(const Operand& op, const Immediate& imm);
void xor_(Register dst, int32_t imm32);

View File

@ -63,6 +63,7 @@ static ByteMnemonic two_operands_instr[] = {
{0x29, "sub", OPER_REG_OP_ORDER},
{0x2A, "subb", REG_OPER_OP_ORDER},
{0x2B, "sub", REG_OPER_OP_ORDER},
{0x84, "test_b", REG_OPER_OP_ORDER},
{0x85, "test", REG_OPER_OP_ORDER},
{0x31, "xor", OPER_REG_OP_ORDER},
{0x33, "xor", REG_OPER_OP_ORDER},

View File

@ -539,46 +539,33 @@ bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
return true;
}
case 'w': {
Label done, check_digits;
__ cmp(Operand(current_character()), Immediate('9'));
__ j(less_equal, &check_digits);
__ cmp(Operand(current_character()), Immediate('_'));
__ j(equal, &done);
// Convert to lower case if letter.
__ mov(Operand(eax), current_character());
__ or_(eax, 0x20);
// check current character in range ['a'..'z'], nondestructively.
__ sub(Operand(eax), Immediate('a'));
__ cmp(Operand(eax), Immediate('z' - 'a'));
BranchOrBacktrack(above, on_no_match);
__ jmp(&done);
__ bind(&check_digits);
// Check current character in range ['0'..'9'].
__ cmp(Operand(current_character()), Immediate('0'));
BranchOrBacktrack(below, on_no_match);
__ bind(&done);
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmp(Operand(current_character()), Immediate('z'));
BranchOrBacktrack(above, on_no_match);
}
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
ExternalReference word_map = ExternalReference::re_word_character_map();
__ test_b(current_character(),
Operand::StaticArray(current_character(), times_1, word_map));
BranchOrBacktrack(zero, on_no_match);
return true;
}
case 'W': {
Label done, check_digits;
__ cmp(Operand(current_character()), Immediate('9'));
__ j(less_equal, &check_digits);
__ cmp(Operand(current_character()), Immediate('_'));
BranchOrBacktrack(equal, on_no_match);
// Convert to lower case if letter.
__ mov(Operand(eax), current_character());
__ or_(eax, 0x20);
// check current character in range ['a'..'z'], nondestructively.
__ sub(Operand(eax), Immediate('a'));
__ cmp(Operand(eax), Immediate('z' - 'a'));
BranchOrBacktrack(below_equal, on_no_match);
__ jmp(&done);
__ bind(&check_digits);
// Check current character in range ['0'..'9'].
__ cmp(Operand(current_character()), Immediate('0'));
BranchOrBacktrack(above_equal, on_no_match);
__ bind(&done);
Label done;
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmp(Operand(current_character()), Immediate('z'));
__ j(above, &done);
}
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
ExternalReference word_map = ExternalReference::re_word_character_map();
__ test_b(current_character(),
Operand::StaticArray(current_character(), times_1, word_map));
BranchOrBacktrack(not_zero, on_no_match);
if (mode_ != ASCII) {
__ bind(&done);
}
return true;
}
// Non-standard classes (with no syntactic shorthand) used internally.

View File

@ -189,6 +189,30 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7'
0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G'
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O'
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W'
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_'
0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g'
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o'
0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w'
0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
};
int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
Address byte_offset1,
Address byte_offset2,

View File

@ -204,6 +204,15 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
static const byte* StringCharacterPosition(String* subject, int start_index);
// Byte map of ASCII characters with a 0xff if the character is a word
// character (digit, letter or underscore) and 0x00 otherwise.
// Used by generated RegExp code.
static byte word_character_map[128];
static Address word_character_map_address() {
return &word_character_map[0];
}
static Result Execute(Code* code,
String* input,
int start_offset,

View File

@ -479,15 +479,19 @@ void ExternalReferenceTable::PopulateTable() {
UNCLASSIFIED,
21,
"NativeRegExpMacroAssembler::GrowStack()");
Add(ExternalReference::re_word_character_map().address(),
UNCLASSIFIED,
22,
"NativeRegExpMacroAssembler::word_character_map");
#endif
// Keyed lookup cache.
Add(ExternalReference::keyed_lookup_cache_keys().address(),
UNCLASSIFIED,
22,
23,
"KeyedLookupCache::keys()");
Add(ExternalReference::keyed_lookup_cache_field_offsets().address(),
UNCLASSIFIED,
23,
24,
"KeyedLookupCache::field_offsets()");
}

View File

@ -1880,6 +1880,20 @@ void Assembler::testb(const Operand& op, Immediate mask) {
}
void Assembler::testb(const Operand& op, Register reg) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;
if (reg.code() > 3) {
// Register is not one of al, bl, cl, dl. Its encoding needs REX.
emit_rex_32(reg, op);
} else {
emit_optional_rex_32(reg, op);
}
emit(0x84);
emit_operand(reg, op);
}
void Assembler::testl(Register dst, Register src) {
EnsureSpace ensure_space(this);
last_pc_ = pc_;

View File

@ -931,6 +931,7 @@ class Assembler : public Malloced {
void testb(Register dst, Register src);
void testb(Register reg, Immediate mask);
void testb(const Operand& op, Immediate mask);
void testb(const Operand& op, Register reg);
void testl(Register dst, Register src);
void testl(Register reg, Immediate mask);
void testl(const Operand& op, Immediate mask);

View File

@ -582,49 +582,38 @@ bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
return true;
}
case 'w': {
Label done, check_digits;
__ cmpl(current_character(), Immediate('9'));
__ j(less_equal, &check_digits);
__ cmpl(current_character(), Immediate('_'));
__ j(equal, &done);
// Convert to lower case if letter.
__ movl(rax, current_character());
__ orl(rax, Immediate(0x20));
// check rax in range ['a'..'z'].
__ subl(rax, Immediate('a'));
__ cmpl(rax, Immediate('z' - 'a'));
BranchOrBacktrack(above, on_no_match);
__ jmp(&done);
__ bind(&check_digits);
// Check current character in range ['0'..'9'].
__ cmpl(current_character(), Immediate('0'));
BranchOrBacktrack(below, on_no_match);
__ bind(&done);
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmpl(current_character(), Immediate('z'));
BranchOrBacktrack(above, on_no_match);
}
__ movq(rbx, ExternalReference::re_word_character_map());
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
ExternalReference word_map = ExternalReference::re_word_character_map();
__ testb(Operand(rbx, current_character(), times_1, 0),
current_character());
BranchOrBacktrack(zero, on_no_match);
return true;
}
case 'W': {
Label done, check_digits;
__ cmpl(current_character(), Immediate('9'));
__ j(less_equal, &check_digits);
__ cmpl(current_character(), Immediate('_'));
BranchOrBacktrack(equal, on_no_match);
// Convert to lower case if letter.
__ movl(rax, current_character());
__ orl(rax, Immediate(0x20));
// check current character in range ['a'..'z'], nondestructively.
__ subl(rax, Immediate('a'));
__ cmpl(rax, Immediate('z' - 'a'));
BranchOrBacktrack(below_equal, on_no_match);
__ jmp(&done);
__ bind(&check_digits);
// Check current character in range ['0'..'9'].
__ cmpl(current_character(), Immediate('0'));
BranchOrBacktrack(above_equal, on_no_match);
__ bind(&done);
Label done;
if (mode_ != ASCII) {
// Table is 128 entries, so all ASCII characters can be tested.
__ cmpl(current_character(), Immediate('z'));
__ j(above, &done);
}
__ movq(rbx, ExternalReference::re_word_character_map());
ASSERT_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
ExternalReference word_map = ExternalReference::re_word_character_map();
__ testb(Operand(rbx, current_character(), times_1, 0),
current_character());
BranchOrBacktrack(not_zero, on_no_match);
if (mode_ != ASCII) {
__ bind(&done);
}
return true;
}
case '*':
// Match any character.
return true;