From e867e63281cf9878b3c5d27c6eeda08643711147 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Tue, 11 May 2010 07:29:10 +0000 Subject: [PATCH] RegExp: Remove use of 16-bit immediates on ia32/x64. Also check more than one character at a time. 16-bit immediates requires a prefix that changes the length of the instruction. This causes predecoder mispredictions and subsequent pipeline stalls. Also removed redundant "atStart" local variable which is equivalent to startIndex == 0. Review URL: http://codereview.chromium.org/1988009 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4639 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/regexp-macro-assembler-ia32.cc | 86 +++++++++++++++------ src/ia32/regexp-macro-assembler-ia32.h | 3 +- src/x64/regexp-macro-assembler-x64.cc | 99 +++++++++++++++++++------ src/x64/regexp-macro-assembler-x64.h | 3 +- 4 files changed, 142 insertions(+), 49 deletions(-) diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc index fdf3b9febb..d9dddd6897 100644 --- a/src/ia32/regexp-macro-assembler-ia32.cc +++ b/src/ia32/regexp-macro-assembler-ia32.cc @@ -51,7 +51,7 @@ namespace internal { * - esp : points to tip of C stack. * - ecx : points to tip of backtrack stack * - * The registers eax, ebx and ecx are free to use for computations. + * The registers eax and ebx are free to use for computations. * * Each call to a public method should retain this convention. * The stack will have the following structure: @@ -72,8 +72,6 @@ namespace internal { * - backup of caller ebx * - Offset of location before start of input (effectively character * position -1). Used to initialize capture registers to a non-position. - * - Boolean at start (if 1, we are starting at the start of the string, - * otherwise 0) * - register 0 ebp[-4] (Only positions must be stored in the first * - register 1 ebp[-8] num_saved_registers_ registers) * - ... @@ -178,8 +176,8 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { Label not_at_start; // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kAtStart), Immediate(0)); - BranchOrBacktrack(equal, ¬_at_start); + __ cmp(Operand(ebp, kStartIndex), Immediate(0)); + BranchOrBacktrack(not_equal, ¬_at_start); // If we did, are we still at the start of the input? __ lea(eax, Operand(esi, edi, times_1, 0)); __ cmp(eax, Operand(ebp, kInputStart)); @@ -190,8 +188,8 @@ void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) { void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) { // Did we start the match at the start of the string at all? - __ cmp(Operand(ebp, kAtStart), Immediate(0)); - BranchOrBacktrack(equal, on_not_at_start); + __ cmp(Operand(ebp, kStartIndex), Immediate(0)); + BranchOrBacktrack(not_equal, on_not_at_start); // If we did, are we still at the start of the input? __ lea(eax, Operand(esi, edi, times_1, 0)); __ cmp(eax, Operand(ebp, kInputStart)); @@ -209,6 +207,15 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector str, int cp_offset, Label* on_failure, bool check_end_of_string) { +#ifdef DEBUG + // If input is ASCII, don't even bother calling here if the string to + // match contains a non-ascii character. + if (mode_ == ASCII) { + for (int i = 0; i < str.length(); i++) { + ASSERT(str[i] <= String::kMaxAsciiCharCodeU); + } + } +#endif int byte_length = str.length() * char_size(); int byte_offset = cp_offset * char_size(); if (check_end_of_string) { @@ -222,14 +229,56 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector str, on_failure = &backtrack_label_; } - for (int i = 0; i < str.length(); i++) { + // Do one character test first to minimize loading for the case that + // we don't match at all (loading more than one character introduces that + // chance of reading unaligned and reading across cache boundaries). + // If the first character matches, expect a larger chance of matching the + // string, and start loading more characters at a time. + if (mode_ == ASCII) { + __ cmpb(Operand(esi, edi, times_1, byte_offset), + static_cast(str[0])); + } else { + // Don't use 16-bit immediate. The size changing prefix throws off + // pre-decoding. + __ movzx_w(eax, + Operand(esi, edi, times_1, byte_offset)); + __ cmp(eax, static_cast(str[0])); + } + BranchOrBacktrack(not_equal, on_failure); + + __ lea(ebx, Operand(esi, edi, times_1, 0)); + for (int i = 1, n = str.length(); i < n;) { if (mode_ == ASCII) { - __ cmpb(Operand(esi, edi, times_1, byte_offset + i), - static_cast(str[i])); + if (i <= n - 4) { + int combined_chars = + (static_cast(str[i + 0]) << 0) | + (static_cast(str[i + 1]) << 8) | + (static_cast(str[i + 2]) << 16) | + (static_cast(str[i + 3]) << 24); + __ cmp(Operand(ebx, byte_offset + i), Immediate(combined_chars)); + i += 4; + } else { + __ cmpb(Operand(ebx, byte_offset + i), + static_cast(str[i])); + i += 1; + } } else { ASSERT(mode_ == UC16); - __ cmpw(Operand(esi, edi, times_1, byte_offset + i * sizeof(uc16)), - Immediate(str[i])); + if (i <= n - 2) { + __ cmp(Operand(ebx, byte_offset + i * sizeof(uc16)), + Immediate(*reinterpret_cast(&str[i]))); + i += 2; + } else { + // Avoid a 16-bit immediate operation. It uses the length-changing + // 0x66 prefix which causes pre-decoder misprediction and pipeline + // stalls. See + // "Intel(R) 64 and IA-32 Architectures Optimization Reference Manual" + // (248966.pdf) section 3.4.2.3 "Length-Changing Prefixes (LCP)" + __ movzx_w(eax, + Operand(ebx, byte_offset + i * sizeof(uc16))); + __ cmp(eax, static_cast(str[i])); + i += 1; + } } BranchOrBacktrack(not_equal, on_failure); } @@ -625,7 +674,6 @@ Handle RegExpMacroAssemblerIA32::GetCode(Handle source) { __ push(edi); __ push(ebx); // Callee-save on MacOS. __ push(Immediate(0)); // Make room for "input start - 1" constant. - __ push(Immediate(0)); // Make room for "at start" constant. // Check if we have space on the stack for registers. Label stack_limit_hit; @@ -677,14 +725,6 @@ Handle RegExpMacroAssemblerIA32::GetCode(Handle source) { // position registers. __ mov(Operand(ebp, kInputStartMinusOne), eax); - // Determine whether the start index is zero, that is at the start of the - // string, and store that value in a local variable. - __ xor_(Operand(ecx), ecx); // setcc only operates on cl (lower byte of ecx). - // Register ebx still holds -stringIndex. - __ test(ebx, Operand(ebx)); - __ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive. - __ mov(Operand(ebp, kAtStart), ecx); - if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. // Fill saved registers with initial value = start offset - 1 // Fill in stack push order, to avoid accessing across an unwritten @@ -712,8 +752,8 @@ Handle RegExpMacroAssemblerIA32::GetCode(Handle source) { __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd)); // Load previous char as initial value of current-character. Label at_start; - __ cmp(Operand(ebp, kAtStart), Immediate(0)); - __ j(not_equal, &at_start); + __ cmp(Operand(ebp, kStartIndex), Immediate(0)); + __ j(equal, &at_start); LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. __ jmp(&start_label_); __ bind(&at_start); diff --git a/src/ia32/regexp-macro-assembler-ia32.h b/src/ia32/regexp-macro-assembler-ia32.h index 823bc03312..8b8eeed6ce 100644 --- a/src/ia32/regexp-macro-assembler-ia32.h +++ b/src/ia32/regexp-macro-assembler-ia32.h @@ -132,9 +132,8 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler { static const int kBackup_edi = kBackup_esi - kPointerSize; static const int kBackup_ebx = kBackup_edi - kPointerSize; static const int kInputStartMinusOne = kBackup_ebx - kPointerSize; - static const int kAtStart = kInputStartMinusOne - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kAtStart - kPointerSize; + static const int kRegisterZero = kInputStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024; diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc index 50b4120a5d..d9b75b1a41 100644 --- a/src/x64/regexp-macro-assembler-x64.cc +++ b/src/x64/regexp-macro-assembler-x64.cc @@ -188,8 +188,8 @@ void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) { void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { Label not_at_start; // Did we start the match at the start of the string at all? - __ cmpb(Operand(rbp, kAtStart), Immediate(0)); - BranchOrBacktrack(equal, ¬_at_start); + __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); + BranchOrBacktrack(not_equal, ¬_at_start); // If we did, are we still at the start of the input? __ lea(rax, Operand(rsi, rdi, times_1, 0)); __ cmpq(rax, Operand(rbp, kInputStart)); @@ -200,8 +200,8 @@ void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) { void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) { // Did we start the match at the start of the string at all? - __ cmpb(Operand(rbp, kAtStart), Immediate(0)); - BranchOrBacktrack(equal, on_not_at_start); + __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); + BranchOrBacktrack(not_equal, on_not_at_start); // If we did, are we still at the start of the input? __ lea(rax, Operand(rsi, rdi, times_1, 0)); __ cmpq(rax, Operand(rbp, kInputStart)); @@ -219,6 +219,15 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector str, int cp_offset, Label* on_failure, bool check_end_of_string) { +#ifdef DEBUG + // If input is ASCII, don't even bother calling here if the string to + // match contains a non-ascii character. + if (mode_ == ASCII) { + for (int i = 0; i < str.length(); i++) { + ASSERT(str[i] <= String::kMaxAsciiCharCodeU); + } + } +#endif int byte_length = str.length() * char_size(); int byte_offset = cp_offset * char_size(); if (check_end_of_string) { @@ -232,16 +241,71 @@ void RegExpMacroAssemblerX64::CheckCharacters(Vector str, on_failure = &backtrack_label_; } - // TODO(lrn): Test multiple characters at a time by loading 4 or 8 bytes - // at a time. - for (int i = 0; i < str.length(); i++) { + // Do one character test first to minimize loading for the case that + // we don't match at all (loading more than one character introduces that + // chance of reading unaligned and reading across cache boundaries). + // If the first character matches, expect a larger chance of matching the + // string, and start loading more characters at a time. + if (mode_ == ASCII) { + __ cmpb(Operand(rsi, rdi, times_1, byte_offset), + Immediate(static_cast(str[0]))); + } else { + // Don't use 16-bit immediate. The size changing prefix throws off + // pre-decoding. + __ movzxwl(rax, + Operand(rsi, rdi, times_1, byte_offset)); + __ cmpl(rax, Immediate(static_cast(str[0]))); + } + BranchOrBacktrack(not_equal, on_failure); + + __ lea(rbx, Operand(rsi, rdi, times_1, 0)); + for (int i = 1, n = str.length(); i < n; ) { if (mode_ == ASCII) { - __ cmpb(Operand(rsi, rdi, times_1, byte_offset + i), - Immediate(static_cast(str[i]))); + if (i + 8 <= n) { + uint64_t combined_chars = + (static_cast(str[i + 0]) << 0) || + (static_cast(str[i + 1]) << 8) || + (static_cast(str[i + 2]) << 16) || + (static_cast(str[i + 3]) << 24) || + (static_cast(str[i + 4]) << 32) || + (static_cast(str[i + 5]) << 40) || + (static_cast(str[i + 6]) << 48) || + (static_cast(str[i + 7]) << 56); + __ movq(rax, combined_chars, RelocInfo::NONE); + __ cmpq(rax, Operand(rbx, byte_offset + i)); + i += 8; + } else if (i + 4 <= n) { + uint32_t combined_chars = + (static_cast(str[i + 0]) << 0) || + (static_cast(str[i + 1]) << 8) || + (static_cast(str[i + 2]) << 16) || + (static_cast(str[i + 3]) << 24); + __ cmpl(Operand(rbx, byte_offset + i), Immediate(combined_chars)); + i += 4; + } else { + __ cmpb(Operand(rbx, byte_offset + i), + Immediate(static_cast(str[i]))); + i++; + } } else { ASSERT(mode_ == UC16); - __ cmpw(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)), - Immediate(str[i])); + if (i + 4 <= n) { + uint64_t combined_chars = *reinterpret_cast(&str[i]); + __ movq(rax, combined_chars, RelocInfo::NONE); + __ cmpq(rax, + Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); + i += 4; + } else if (i + 2 <= n) { + uint32_t combined_chars = *reinterpret_cast(&str[i]); + __ cmpl(Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16)), + Immediate(combined_chars)); + i += 2; + } else { + __ movzxwl(rax, + Operand(rsi, rdi, times_1, byte_offset + i * sizeof(uc16))); + __ cmpl(rax, Immediate(str[i])); + i++; + } } BranchOrBacktrack(not_equal, on_failure); } @@ -671,7 +735,6 @@ Handle RegExpMacroAssemblerX64::GetCode(Handle source) { __ push(rbx); // Callee-save #endif - __ push(Immediate(0)); // Make room for "input start - 1" constant. __ push(Immediate(0)); // Make room for "at start" constant. // Check if we have space on the stack for registers. @@ -724,14 +787,6 @@ Handle RegExpMacroAssemblerX64::GetCode(Handle source) { // position registers. __ movq(Operand(rbp, kInputStartMinusOne), rax); - // Determine whether the start index is zero, that is at the start of the - // string, and store that value in a local variable. - __ movq(rbx, Operand(rbp, kStartIndex)); - __ xor_(rcx, rcx); // setcc only operates on cl (lower byte of rcx). - __ testq(rbx, rbx); - __ setcc(zero, rcx); // 1 if 0 (start of string), 0 if positive. - __ movq(Operand(rbp, kAtStart), rcx); - if (num_saved_registers_ > 0) { // Fill saved registers with initial value = start offset - 1 // Fill in stack push order, to avoid accessing across an unwritten @@ -761,8 +816,8 @@ Handle RegExpMacroAssemblerX64::GetCode(Handle source) { __ Move(code_object_pointer(), masm_->CodeObject()); // Load previous char as initial value of current-character. Label at_start; - __ cmpb(Operand(rbp, kAtStart), Immediate(0)); - __ j(not_equal, &at_start); + __ cmpb(Operand(rbp, kStartIndex), Immediate(0)); + __ j(equal, &at_start); LoadCurrentCharacterUnchecked(-1, 1); // Load previous char. __ jmp(&start_label_); __ bind(&at_start); diff --git a/src/x64/regexp-macro-assembler-x64.h b/src/x64/regexp-macro-assembler-x64.h index 490326971f..3bcc3ac165 100644 --- a/src/x64/regexp-macro-assembler-x64.h +++ b/src/x64/regexp-macro-assembler-x64.h @@ -173,10 +173,9 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler { // the frame in GetCode. static const int kInputStartMinusOne = kLastCalleeSaveRegister - kPointerSize; - static const int kAtStart = kInputStartMinusOne - kPointerSize; // First register address. Following registers are below it on the stack. - static const int kRegisterZero = kAtStart - kPointerSize; + static const int kRegisterZero = kInputStartMinusOne - kPointerSize; // Initial size of code buffer. static const size_t kRegExpCodeSize = 1024;