RegExp macro assembler clean up.

Removes never called methods and fix a bug on ARM.

BUG=none
TEST=none

Review URL: https://chromiumcodereview.appspot.com/16280005

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@14967 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
rodolph.perfetta@gmail.com 2013-06-06 10:17:07 +00:00
parent a9a80fb422
commit e19a55df0d
14 changed files with 44 additions and 378 deletions

View File

@ -235,54 +235,6 @@ void RegExpMacroAssemblerARM::CheckCharacterLT(uc16 limit, Label* on_less) {
}
void RegExpMacroAssemblerARM::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
if (on_failure == NULL) {
// Instead of inlining a backtrack for each test, (re)use the global
// backtrack target.
on_failure = &backtrack_label_;
}
if (check_end_of_string) {
// Is last character of required match inside string.
CheckPosition(cp_offset + str.length() - 1, on_failure);
}
__ add(r0, end_of_input_address(), Operand(current_input_offset()));
if (cp_offset != 0) {
int byte_offset = cp_offset * char_size();
__ add(r0, r0, Operand(byte_offset));
}
// r0 : Address of characters to match against str.
int stored_high_byte = 0;
for (int i = 0; i < str.length(); i++) {
if (mode_ == ASCII) {
__ ldrb(r1, MemOperand(r0, char_size(), PostIndex));
ASSERT(str[i] <= String::kMaxOneByteCharCode);
__ cmp(r1, Operand(str[i]));
} else {
__ ldrh(r1, MemOperand(r0, char_size(), PostIndex));
uc16 match_char = str[i];
int match_high_byte = (match_char >> 8);
if (match_high_byte == 0) {
__ cmp(r1, Operand(str[i]));
} else {
if (match_high_byte != stored_high_byte) {
__ mov(r2, Operand(match_high_byte));
stored_high_byte = match_high_byte;
}
__ add(r3, r2, Operand(match_char & 0xff));
__ cmp(r1, r3);
}
}
BranchOrBacktrack(ne, on_failure);
}
}
void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
__ ldr(r0, MemOperand(backtrack_stackpointer(), 0));
__ cmp(current_input_offset(), r0);
@ -556,7 +508,7 @@ bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type,
case 'd':
// Match ASCII digits ('0'..'9')
__ sub(r0, current_character(), Operand('0'));
__ cmp(current_character(), Operand('9' - '0'));
__ cmp(r0, Operand('9' - '0'));
BranchOrBacktrack(hi, on_no_match);
return true;
case 'D':

View File

@ -53,10 +53,6 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);

View File

@ -209,86 +209,6 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
}
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
#ifdef DEBUG
// If input is ASCII, don't even bother calling here if the string to
// match contains a non-ASCII character.
if (mode_ == ASCII) {
ASSERT(String::IsOneByte(str.start(), str.length()));
}
#endif
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
// Check that there are at least str.length() characters left in the input.
__ cmp(edi, Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (on_failure == NULL) {
// Instead of inlining a backtrack, (re)use the global backtrack target.
on_failure = &backtrack_label_;
}
// Do one character test first to minimize loading for the case that
// we don't match at all (loading more than one character introduces that
// chance of reading unaligned and reading across cache boundaries).
// If the first character matches, expect a larger chance of matching the
// string, and start loading more characters at a time.
if (mode_ == ASCII) {
__ cmpb(Operand(esi, edi, times_1, byte_offset),
static_cast<int8_t>(str[0]));
} else {
// Don't use 16-bit immediate. The size changing prefix throws off
// pre-decoding.
__ movzx_w(eax,
Operand(esi, edi, times_1, byte_offset));
__ cmp(eax, static_cast<int32_t>(str[0]));
}
BranchOrBacktrack(not_equal, on_failure);
__ lea(ebx, Operand(esi, edi, times_1, 0));
for (int i = 1, n = str.length(); i < n;) {
if (mode_ == ASCII) {
if (i <= n - 4) {
int combined_chars =
(static_cast<uint32_t>(str[i + 0]) << 0) |
(static_cast<uint32_t>(str[i + 1]) << 8) |
(static_cast<uint32_t>(str[i + 2]) << 16) |
(static_cast<uint32_t>(str[i + 3]) << 24);
__ cmp(Operand(ebx, byte_offset + i), Immediate(combined_chars));
i += 4;
} else {
__ cmpb(Operand(ebx, byte_offset + i),
static_cast<int8_t>(str[i]));
i += 1;
}
} else {
ASSERT(mode_ == UC16);
if (i <= n - 2) {
__ cmp(Operand(ebx, byte_offset + i * sizeof(uc16)),
Immediate(*reinterpret_cast<const int*>(&str[i])));
i += 2;
} else {
// Avoid a 16-bit immediate operation. It uses the length-changing
// 0x66 prefix which causes pre-decoder misprediction and pipeline
// stalls. See
// "Intel(R) 64 and IA-32 Architectures Optimization Reference Manual"
// (248966.pdf) section 3.4.2.3 "Length-Changing Prefixes (LCP)"
__ movzx_w(eax,
Operand(ebx, byte_offset + i * sizeof(uc16)));
__ cmp(eax, static_cast<int32_t>(str[i]));
i += 1;
}
}
BranchOrBacktrack(not_equal, on_failure);
}
}
void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmp(edi, Operand(backtrack_stackpointer(), 0));

View File

@ -52,10 +52,6 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);

View File

@ -235,55 +235,6 @@ void RegExpMacroAssemblerMIPS::CheckCharacterLT(uc16 limit, Label* on_less) {
}
void RegExpMacroAssemblerMIPS::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
if (on_failure == NULL) {
// Instead of inlining a backtrack for each test, (re)use the global
// backtrack target.
on_failure = &backtrack_label_;
}
if (check_end_of_string) {
// Is last character of required match inside string.
CheckPosition(cp_offset + str.length() - 1, on_failure);
}
__ Addu(a0, end_of_input_address(), Operand(current_input_offset()));
if (cp_offset != 0) {
int byte_offset = cp_offset * char_size();
__ Addu(a0, a0, Operand(byte_offset));
}
// a0 : Address of characters to match against str.
int stored_high_byte = 0;
for (int i = 0; i < str.length(); i++) {
if (mode_ == ASCII) {
__ lbu(a1, MemOperand(a0, 0));
__ addiu(a0, a0, char_size());
ASSERT(str[i] <= String::kMaxOneByteCharCode);
BranchOrBacktrack(on_failure, ne, a1, Operand(str[i]));
} else {
__ lhu(a1, MemOperand(a0, 0));
__ addiu(a0, a0, char_size());
uc16 match_char = str[i];
int match_high_byte = (match_char >> 8);
if (match_high_byte == 0) {
BranchOrBacktrack(on_failure, ne, a1, Operand(str[i]));
} else {
if (match_high_byte != stored_high_byte) {
__ li(a2, Operand(match_high_byte));
stored_high_byte = match_high_byte;
}
__ Addu(a3, a2, Operand(match_char & 0xff));
BranchOrBacktrack(on_failure, ne, a1, Operand(a3));
}
}
}
}
void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
Label backtrack_non_equal;
__ lw(a0, MemOperand(backtrack_stackpointer(), 0));

View File

@ -55,10 +55,6 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);

View File

@ -410,28 +410,6 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
}
void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
ASSERT(cp_offset >= kMinCPOffset);
ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset);
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = str.length() - 1; i >= 0; i--) {
if (check_end_of_string && i == str.length() - 1) {
Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i);
EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR, str[i]);
EmitOrLink(on_failure);
}
}
void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
int comparand,
Label* on_less_than) {

View File

@ -103,10 +103,6 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual void IfRegisterEqPos(int register_index, Label* if_eq);

View File

@ -383,21 +383,6 @@ void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
}
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) {
PrintF("0x%04x", str[i]);
}
PrintF("\", cp_offset=%d, label[%08x])\n",
cp_offset, LabelToInt(on_failure));
assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
}
bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
uc16 type,
Label* on_no_match) {

View File

@ -49,11 +49,6 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);

View File

@ -87,17 +87,6 @@ class RegExpMacroAssembler {
Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. If check_eos is set then
// the end of input always fails. If check_eos is clear then it is the
// caller's responsibility to ensure that the end of string is not hit.
// If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_eos) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;

View File

@ -226,101 +226,6 @@ void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) {
}
void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string) {
#ifdef DEBUG
// If input is ASCII, don't even bother calling here if the string to
// match contains a non-ASCII character.
if (mode_ == ASCII) {
ASSERT(String::IsOneByte(str.start(), str.length()));
}
#endif
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
// Check that there are at least str.length() characters left in the input.
__ cmpl(rdi, Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (on_failure == NULL) {
// Instead of inlining a backtrack, (re)use the global backtrack target.
on_failure = &backtrack_label_;
}
// Do one character test first to minimize loading for the case that
// we don't match at all (loading more than one character introduces that
// chance of reading unaligned and reading across cache boundaries).
// If the first character matches, expect a larger chance of matching the
// string, and start loading more characters at a time.
if (mode_ == ASCII) {
__ cmpb(Operand(rsi, rdi, times_1, byte_offset),
Immediate(static_cast<int8_t>(str[0])));
} else {
// Don't use 16-bit immediate. The size changing prefix throws off
// pre-decoding.
__ movzxwl(rax,
Operand(rsi, rdi, times_1, byte_offset));
__ cmpl(rax, Immediate(static_cast<int32_t>(str[0])));
}
BranchOrBacktrack(not_equal, on_failure);
__ lea(rbx, Operand(rsi, rdi, times_1, 0));
for (int i = 1, n = str.length(); i < n; ) {
if (mode_ == ASCII) {
if (i + 8 <= n) {
uint64_t combined_chars =
(static_cast<uint64_t>(str[i + 0]) << 0) ||
(static_cast<uint64_t>(str[i + 1]) << 8) ||
(static_cast<uint64_t>(str[i + 2]) << 16) ||
(static_cast<uint64_t>(str[i + 3]) << 24) ||
(static_cast<uint64_t>(str[i + 4]) << 32) ||
(static_cast<uint64_t>(str[i + 5]) << 40) ||
(static_cast<uint64_t>(str[i + 6]) << 48) ||
(static_cast<uint64_t>(str[i + 7]) << 56);
__ movq(rax, combined_chars, RelocInfo::NONE64);
__ cmpq(rax, Operand(rbx, byte_offset + i));
i += 8;
} else if (i + 4 <= n) {
uint32_t combined_chars =
(static_cast<uint32_t>(str[i + 0]) << 0) ||
(static_cast<uint32_t>(str[i + 1]) << 8) ||
(static_cast<uint32_t>(str[i + 2]) << 16) ||
(static_cast<uint32_t>(str[i + 3]) << 24);
__ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars));
i += 4;
} else {
__ cmpb(Operand(rbx, byte_offset + i),
Immediate(static_cast<int8_t>(str[i])));
i++;
}
} else {
ASSERT(mode_ == UC16);
if (i + 4 <= n) {
uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]);
__ movq(rax, combined_chars, RelocInfo::NONE64);
__ cmpq(rax,
Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
i += 4;
} else if (i + 2 <= n) {
uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]);
__ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
Immediate(combined_chars));
i += 2;
} else {
__ movzxwl(rax,
Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
__ cmpl(rax, Immediate(str[i]));
i++;
}
}
BranchOrBacktrack(not_equal, on_failure);
}
}
void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmpl(rdi, Operand(backtrack_stackpointer(), 0));

View File

@ -55,10 +55,6 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
Label* on_equal);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure,
bool check_end_of_string);
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);

View File

@ -784,15 +784,22 @@ TEST(MacroAssemblerNativeSimple) {
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4,
Isolate::Current()->runtime_zone());
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail, true);
Label fail, backtrack;
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.LoadCurrentCharacter(2, NULL);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(1, NULL, false);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(0, NULL, false);
m.CheckNotCharacter('f', NULL);
m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(1, 3);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PushBacktrack(&backtrack);
m.Succeed();
m.Bind(&backtrack);
m.Backtrack();
m.Bind(&fail);
m.Fail();
@ -842,15 +849,22 @@ TEST(MacroAssemblerNativeSimpleUC16) {
ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4,
Isolate::Current()->runtime_zone());
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail, true);
Label fail, backtrack;
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.LoadCurrentCharacter(2, NULL);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(1, NULL, false);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(0, NULL, false);
m.CheckNotCharacter('f', NULL);
m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(1, 3);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PushBacktrack(&backtrack);
m.Succeed();
m.Bind(&backtrack);
m.Backtrack();
m.Bind(&fail);
m.Fail();
@ -1349,36 +1363,33 @@ TEST(MacroAssembler) {
RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024),
Isolate::Current()->runtime_zone());
// ^f(o)o.
Label fail, fail2, start;
uc16 foo_chars[3];
foo_chars[0] = 'f';
foo_chars[1] = 'o';
foo_chars[2] = 'o';
Vector<const uc16> foo(foo_chars, 3);
Label start, fail, backtrack;
m.SetRegister(4, 42);
m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
m.AdvanceRegister(4, 42);
m.GoTo(&start);
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail, true);
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.LoadCurrentCharacter(0, NULL);
m.CheckNotCharacter('f', NULL);
m.LoadCurrentCharacter(1, NULL);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(2, NULL);
m.CheckNotCharacter('o', NULL);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.WriteCurrentPositionToRegister(1, 3);
m.WriteCurrentPositionToRegister(2, 1);
m.WriteCurrentPositionToRegister(3, 2);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3, 0);
m.PushBacktrack(&backtrack);
m.Succeed();
m.Bind(&fail);
m.Bind(&backtrack);
m.ClearRegisters(2, 3);
m.Backtrack();
m.Succeed();
m.Bind(&fail2);
m.Bind(&fail);
m.PopRegister(0);
m.Fail();