RegExp: Remove use of 16-bit immediates on ia32/x64.
Also check more than one character at a time. 16-bit immediates requires a prefix that changes the length of the instruction. This causes predecoder mispredictions and subsequent pipeline stalls. Also removed redundant "atStart" local variable which is equivalent to startIndex == 0. Review URL: http://codereview.chromium.org/1988009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4639 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
b8f5c5e1ac
commit
e867e63281
@ -51,7 +51,7 @@ namespace internal {
|
||||
* - esp : points to tip of C stack.
|
||||
* - ecx : points to tip of backtrack stack
|
||||
*
|
||||
* The registers eax, ebx and ecx are free to use for computations.
|
||||
* The registers eax and ebx are free to use for computations.
|
||||
*
|
||||
* Each call to a public method should retain this convention.
|
||||
* The stack will have the following structure:
|
||||
@ -72,8 +72,6 @@ namespace internal {
|
||||
* - backup of caller ebx
|
||||
* - Offset of location before start of input (effectively character
|
||||
* position -1). Used to initialize capture registers to a non-position.
|
||||
* - Boolean at start (if 1, we are starting at the start of the string,
|
||||
* otherwise 0)
|
||||
* - register 0 ebp[-4] (Only positions must be stored in the first
|
||||
* - register 1 ebp[-8] num_saved_registers_ registers)
|
||||
* - ...
|
||||
@ -178,8 +176,8 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
|
||||
void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
|
||||
Label not_at_start;
|
||||
// Did we start the match at the start of the string at all?
|
||||
__ cmp(Operand(ebp, kAtStart), Immediate(0));
|
||||
BranchOrBacktrack(equal, ¬_at_start);
|
||||
__ cmp(Operand(ebp, kStartIndex), Immediate(0));
|
||||
BranchOrBacktrack(not_equal, ¬_at_start);
|
||||
// If we did, are we still at the start of the input?
|
||||
__ lea(eax, Operand(esi, edi, times_1, 0));
|
||||
__ cmp(eax, Operand(ebp, kInputStart));
|
||||
@ -190,8 +188,8 @@ void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) {
|
||||
// Did we start the match at the start of the string at all?
|
||||
__ cmp(Operand(ebp, kAtStart), Immediate(0));
|
||||
BranchOrBacktrack(equal, on_not_at_start);
|
||||
__ cmp(Operand(ebp, kStartIndex), Immediate(0));
|
||||
BranchOrBacktrack(not_equal, on_not_at_start);
|
||||
// If we did, are we still at the start of the input?
|
||||
__ lea(eax, Operand(esi, edi, times_1, 0));
|
||||
__ cmp(eax, Operand(ebp, kInputStart));
|
||||
@ -209,6 +207,15 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure,
|
||||
bool check_end_of_string) {
|
||||
#ifdef DEBUG
|
||||
// If input is ASCII, don't even bother calling here if the string to
|
||||
// match contains a non-ascii character.
|
||||
if (mode_ == ASCII) {
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int byte_length = str.length() * char_size();
|
||||
int byte_offset = cp_offset * char_size();
|
||||
if (check_end_of_string) {
|
||||
@ -222,14 +229,56 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
|
||||
on_failure = &backtrack_label_;
|
||||
}
|
||||
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
// Do one character test first to minimize loading for the case that
|
||||
// we don't match at all (loading more than one character introduces that
|
||||
// chance of reading unaligned and reading across cache boundaries).
|
||||
// If the first character matches, expect a larger chance of matching the
|
||||
// string, and start loading more characters at a time.
|
||||
if (mode_ == ASCII) {
|
||||
__ cmpb(Operand(esi, edi, times_1, byte_offset + i),
|
||||
__ cmpb(Operand(esi, edi, times_1, byte_offset),
|
||||
static_cast<int8_t>(str[0]));
|
||||
} else {
|
||||
// Don't use 16-bit immediate. The size changing prefix throws off
|
||||
// pre-decoding.
|
||||
__ movzx_w(eax,
|
||||
Operand(esi, edi, times_1, byte_offset));
|
||||
__ cmp(eax, static_cast<int32_t>(str[0]));
|
||||
}
|
||||
BranchOrBacktrack(not_equal, on_failure);
|
||||
|
||||
__ lea(ebx, Operand(esi, edi, times_1, 0));
|
||||
for (int i = 1, n = str.length(); i < n;) {
|
||||
if (mode_ == ASCII) {
|
||||
if (i <= n - 4) {
|
||||
int combined_chars =
|
||||
(static_cast<uint32_t>(str[i + 0]) << 0) |
|
||||
(static_cast<uint32_t>(str[i + 1]) << 8) |
|
||||
(static_cast<uint32_t>(str[i + 2]) << 16) |
|
||||
(static_cast<uint32_t>(str[i + 3]) << 24);
|
||||
__ cmp(Operand(ebx, byte_offset + i), Immediate(combined_chars));
|
||||
i += 4;
|
||||
} else {
|
||||
__ cmpb(Operand(ebx, byte_offset + i),
|
||||
static_cast<int8_t>(str[i]));
|
||||
i += 1;
|
||||
}
|
||||
} else {
|
||||
ASSERT(mode_ == UC16);
|
||||
__ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)),
|
||||
Immediate(str[i]));
|
||||
if (i <= n - 2) {
|
||||
__ cmp(Operand(ebx, byte_offset + i * sizeof(uc16)),
|
||||
Immediate(*reinterpret_cast<const int*>(&str[i])));
|
||||
i += 2;
|
||||
} else {
|
||||
// Avoid a 16-bit immediate operation. It uses the length-changing
|
||||
// 0x66 prefix which causes pre-decoder misprediction and pipeline
|
||||
// stalls. See
|
||||
// "Intel(R) 64 and IA-32 Architectures Optimization Reference Manual"
|
||||
// (248966.pdf) section 3.4.2.3 "Length-Changing Prefixes (LCP)"
|
||||
__ movzx_w(eax,
|
||||
Operand(ebx, byte_offset + i * sizeof(uc16)));
|
||||
__ cmp(eax, static_cast<int32_t>(str[i]));
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
BranchOrBacktrack(not_equal, on_failure);
|
||||
}
|
||||
@ -625,7 +674,6 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
|
||||
__ push(edi);
|
||||
__ push(ebx); // Callee-save on MacOS.
|
||||
__ push(Immediate(0)); // Make room for "input start - 1" constant.
|
||||
__ push(Immediate(0)); // Make room for "at start" constant.
|
||||
|
||||
// Check if we have space on the stack for registers.
|
||||
Label stack_limit_hit;
|
||||
@ -677,14 +725,6 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
|
||||
// position registers.
|
||||
__ mov(Operand(ebp, kInputStartMinusOne), eax);
|
||||
|
||||
// Determine whether the start index is zero, that is at the start of the
|
||||
// string, and store that value in a local variable.
|
||||
__ xor_(Operand(ecx), ecx); // setcc only operates on cl (lower byte of ecx).
|
||||
// Register ebx still holds -stringIndex.
|
||||
__ test(ebx, Operand(ebx));
|
||||
__ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive.
|
||||
__ mov(Operand(ebp, kAtStart), ecx);
|
||||
|
||||
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
|
||||
// Fill saved registers with initial value = start offset - 1
|
||||
// Fill in stack push order, to avoid accessing across an unwritten
|
||||
@ -712,8 +752,8 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
|
||||
__ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
|
||||
// Load previous char as initial value of current-character.
|
||||
Label at_start;
|
||||
__ cmp(Operand(ebp, kAtStart), Immediate(0));
|
||||
__ j(not_equal, &at_start);
|
||||
__ cmp(Operand(ebp, kStartIndex), Immediate(0));
|
||||
__ j(equal, &at_start);
|
||||
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
|
||||
__ jmp(&start_label_);
|
||||
__ bind(&at_start);
|
||||
|
@ -132,9 +132,8 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
|
||||
static const int kBackup_edi = kBackup_esi - kPointerSize;
|
||||
static const int kBackup_ebx = kBackup_edi - kPointerSize;
|
||||
static const int kInputStartMinusOne = kBackup_ebx - kPointerSize;
|
||||
static const int kAtStart = kInputStartMinusOne - kPointerSize;
|
||||
// First register address. Following registers are below it on the stack.
|
||||
static const int kRegisterZero = kAtStart - kPointerSize;
|
||||
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
|
||||
|
||||
// Initial size of code buffer.
|
||||
static const size_t kRegExpCodeSize = 1024;
|
||||
|
@ -188,8 +188,8 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
|
||||
void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
|
||||
Label not_at_start;
|
||||
// Did we start the match at the start of the string at all?
|
||||
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
|
||||
BranchOrBacktrack(equal, ¬_at_start);
|
||||
__ cmpb(Operand(rbp, kStartIndex), Immediate(0));
|
||||
BranchOrBacktrack(not_equal, ¬_at_start);
|
||||
// If we did, are we still at the start of the input?
|
||||
__ lea(rax, Operand(rsi, rdi, times_1, 0));
|
||||
__ cmpq(rax, Operand(rbp, kInputStart));
|
||||
@ -200,8 +200,8 @@ void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
|
||||
|
||||
void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
|
||||
// Did we start the match at the start of the string at all?
|
||||
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
|
||||
BranchOrBacktrack(equal, on_not_at_start);
|
||||
__ cmpb(Operand(rbp, kStartIndex), Immediate(0));
|
||||
BranchOrBacktrack(not_equal, on_not_at_start);
|
||||
// If we did, are we still at the start of the input?
|
||||
__ lea(rax, Operand(rsi, rdi, times_1, 0));
|
||||
__ cmpq(rax, Operand(rbp, kInputStart));
|
||||
@ -219,6 +219,15 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
|
||||
int cp_offset,
|
||||
Label* on_failure,
|
||||
bool check_end_of_string) {
|
||||
#ifdef DEBUG
|
||||
// If input is ASCII, don't even bother calling here if the string to
|
||||
// match contains a non-ascii character.
|
||||
if (mode_ == ASCII) {
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
ASSERT(str[i] <= String::kMaxAsciiCharCodeU);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int byte_length = str.length() * char_size();
|
||||
int byte_offset = cp_offset * char_size();
|
||||
if (check_end_of_string) {
|
||||
@ -232,16 +241,71 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector<const uc16> str,
|
||||
on_failure = &backtrack_label_;
|
||||
}
|
||||
|
||||
// TODO(lrn): Test multiple characters at a time by loading 4 or 8 bytes
|
||||
// at a time.
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
// Do one character test first to minimize loading for the case that
|
||||
// we don't match at all (loading more than one character introduces that
|
||||
// chance of reading unaligned and reading across cache boundaries).
|
||||
// If the first character matches, expect a larger chance of matching the
|
||||
// string, and start loading more characters at a time.
|
||||
if (mode_ == ASCII) {
|
||||
__ cmpb(Operand(rsi, rdi, times_1, byte_offset + i),
|
||||
__ cmpb(Operand(rsi, rdi, times_1, byte_offset),
|
||||
Immediate(static_cast<int8_t>(str[0])));
|
||||
} else {
|
||||
// Don't use 16-bit immediate. The size changing prefix throws off
|
||||
// pre-decoding.
|
||||
__ movzxwl(rax,
|
||||
Operand(rsi, rdi, times_1, byte_offset));
|
||||
__ cmpl(rax, Immediate(static_cast<int32_t>(str[0])));
|
||||
}
|
||||
BranchOrBacktrack(not_equal, on_failure);
|
||||
|
||||
__ lea(rbx, Operand(rsi, rdi, times_1, 0));
|
||||
for (int i = 1, n = str.length(); i < n; ) {
|
||||
if (mode_ == ASCII) {
|
||||
if (i + 8 <= n) {
|
||||
uint64_t combined_chars =
|
||||
(static_cast<uint64_t>(str[i + 0]) << 0) ||
|
||||
(static_cast<uint64_t>(str[i + 1]) << 8) ||
|
||||
(static_cast<uint64_t>(str[i + 2]) << 16) ||
|
||||
(static_cast<uint64_t>(str[i + 3]) << 24) ||
|
||||
(static_cast<uint64_t>(str[i + 4]) << 32) ||
|
||||
(static_cast<uint64_t>(str[i + 5]) << 40) ||
|
||||
(static_cast<uint64_t>(str[i + 6]) << 48) ||
|
||||
(static_cast<uint64_t>(str[i + 7]) << 56);
|
||||
__ movq(rax, combined_chars, RelocInfo::NONE);
|
||||
__ cmpq(rax, Operand(rbx, byte_offset + i));
|
||||
i += 8;
|
||||
} else if (i + 4 <= n) {
|
||||
uint32_t combined_chars =
|
||||
(static_cast<uint32_t>(str[i + 0]) << 0) ||
|
||||
(static_cast<uint32_t>(str[i + 1]) << 8) ||
|
||||
(static_cast<uint32_t>(str[i + 2]) << 16) ||
|
||||
(static_cast<uint32_t>(str[i + 3]) << 24);
|
||||
__ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars));
|
||||
i += 4;
|
||||
} else {
|
||||
__ cmpb(Operand(rbx, byte_offset + i),
|
||||
Immediate(static_cast<int8_t>(str[i])));
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
ASSERT(mode_ == UC16);
|
||||
__ cmpw(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
|
||||
Immediate(str[i]));
|
||||
if (i + 4 <= n) {
|
||||
uint64_t combined_chars = *reinterpret_cast<const uint64_t*>(&str[i]);
|
||||
__ movq(rax, combined_chars, RelocInfo::NONE);
|
||||
__ cmpq(rax,
|
||||
Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
|
||||
i += 4;
|
||||
} else if (i + 2 <= n) {
|
||||
uint32_t combined_chars = *reinterpret_cast<const uint32_t*>(&str[i]);
|
||||
__ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)),
|
||||
Immediate(combined_chars));
|
||||
i += 2;
|
||||
} else {
|
||||
__ movzxwl(rax,
|
||||
Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)));
|
||||
__ cmpl(rax, Immediate(str[i]));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
BranchOrBacktrack(not_equal, on_failure);
|
||||
}
|
||||
@ -671,7 +735,6 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
|
||||
__ push(rbx); // Callee-save
|
||||
#endif
|
||||
|
||||
__ push(Immediate(0)); // Make room for "input start - 1" constant.
|
||||
__ push(Immediate(0)); // Make room for "at start" constant.
|
||||
|
||||
// Check if we have space on the stack for registers.
|
||||
@ -724,14 +787,6 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
|
||||
// position registers.
|
||||
__ movq(Operand(rbp, kInputStartMinusOne), rax);
|
||||
|
||||
// Determine whether the start index is zero, that is at the start of the
|
||||
// string, and store that value in a local variable.
|
||||
__ movq(rbx, Operand(rbp, kStartIndex));
|
||||
__ xor_(rcx, rcx); // setcc only operates on cl (lower byte of rcx).
|
||||
__ testq(rbx, rbx);
|
||||
__ setcc(zero, rcx); // 1 if 0 (start of string), 0 if positive.
|
||||
__ movq(Operand(rbp, kAtStart), rcx);
|
||||
|
||||
if (num_saved_registers_ > 0) {
|
||||
// Fill saved registers with initial value = start offset - 1
|
||||
// Fill in stack push order, to avoid accessing across an unwritten
|
||||
@ -761,8 +816,8 @@ Handle<Object> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
|
||||
__ Move(code_object_pointer(), masm_->CodeObject());
|
||||
// Load previous char as initial value of current-character.
|
||||
Label at_start;
|
||||
__ cmpb(Operand(rbp, kAtStart), Immediate(0));
|
||||
__ j(not_equal, &at_start);
|
||||
__ cmpb(Operand(rbp, kStartIndex), Immediate(0));
|
||||
__ j(equal, &at_start);
|
||||
LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
|
||||
__ jmp(&start_label_);
|
||||
__ bind(&at_start);
|
||||
|
@ -173,10 +173,9 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
|
||||
// the frame in GetCode.
|
||||
static const int kInputStartMinusOne =
|
||||
kLastCalleeSaveRegister - kPointerSize;
|
||||
static const int kAtStart = kInputStartMinusOne - kPointerSize;
|
||||
|
||||
// First register address. Following registers are below it on the stack.
|
||||
static const int kRegisterZero = kAtStart - kPointerSize;
|
||||
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
|
||||
|
||||
// Initial size of code buffer.
|
||||
static const size_t kRegExpCodeSize = 1024;
|
||||
|
Loading…
Reference in New Issue
Block a user