Makes irregexp-ia32 feature complete wrt. regexps.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@920 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2008-12-05 09:18:55 +00:00
parent 30204a0c00
commit ed4e792cd5
4 changed files with 215 additions and 7 deletions

View File

@ -27,6 +27,7 @@
#include <string.h>
#include "v8.h"
#include "unicode.h"
#include "log.h"
#include "ast.h"
#include "macro-assembler.h"
@ -240,22 +241,97 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
int start_reg, Label* on_no_match) {
int start_reg,
Label* on_no_match) {
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
__ sub(ecx, Operand(eax)); // Length to check.
__ j(less, on_no_match);
BranchOrBacktrack(less, on_no_match);
__ j(equal, &fallthrough);
UNIMPLEMENTED(); // TODO(lrn): Call runtime function to do test.
if (mode_ == ASCII) {
Label success;
Label fail;
__ push(esi);
__ push(edi);
__ add(edi, Operand(esi));
__ add(esi, Operand(eax));
Label loop;
__ bind(&loop);
__ rep_cmpsb();
__ j(equal, &success);
// Compare lower-case if letters.
__ movzx_b(eax, Operand(edi, -1));
__ or_(eax, 0x20); // To-lower-case
__ lea(ebx, Operand(eax, -'a'));
__ cmp(ebx, static_cast<int32_t>('z' - 'a'));
__ j(above, &fail);
__ movzx_b(ebx, Operand(esi, -1));
__ or_(ebx, 0x20); // To-lower-case
__ cmp(eax, Operand(ebx));
__ j(not_equal, &fail);
__ or_(ecx, Operand(ecx));
__ j(not_equal, &loop);
__ jmp(&success);
__ bind(&fail);
__ pop(edi);
__ pop(esi);
BranchOrBacktrack(no_condition, on_no_match);
__ bind(&success);
__ pop(eax); // discard original value of edi
__ pop(esi);
__ sub(edi, Operand(esi));
} else {
// store state
__ push(esi);
__ push(edi);
__ push(ecx);
// align stack
int frameAlignment = OS::ActivationFrameAlignment();
if (frameAlignment != 0) {
__ mov(ebx, esp);
__ sub(Operand(esp), Immediate(5 * kPointerSize)); // args + esp.
ASSERT(IsPowerOf2(frameAlignment));
__ and_(esp, -frameAlignment);
__ mov(Operand(esp, 4 * kPointerSize), ebx);
} else {
__ sub(Operand(esp), Immediate(4 * kPointerSize));
}
// Put arguments on stack.
__ mov(Operand(esp, 3 * kPointerSize), ecx);
__ mov(ebx, Operand(ebp, kInputEndOffset));
__ add(edi, Operand(ebx));
__ mov(Operand(esp, 2 * kPointerSize), edi);
__ add(eax, Operand(ebx));
__ mov(Operand(esp, 1 * kPointerSize), eax);
__ mov(eax, Operand(ebp, kInputBuffer));
__ mov(Operand(esp, 0 * kPointerSize), eax);
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
__ mov(Operand(eax),
Immediate(reinterpret_cast<int32_t>(function_address)));
__ call(Operand(eax));
if (frameAlignment != 0) {
__ mov(esp, Operand(esp, 4 * kPointerSize));
} else {
__ add(Operand(esp), Immediate(4 * sizeof(int32_t)));
}
__ pop(ecx);
__ pop(edi);
__ pop(esi);
__ or_(eax, Operand(eax));
BranchOrBacktrack(zero, on_no_match);
__ add(edi, Operand(ecx));
}
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::CheckNotBackReference(
int start_reg, Label* on_no_match) {
int start_reg,
Label* on_no_match) {
Label fallthrough;
__ mov(eax, register_location(start_reg));
__ mov(ecx, register_location(start_reg + 1));
@ -586,6 +662,37 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
// Private methods:
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
int byte_offset1,
int byte_offset2,
size_t byte_length) {
ASSERT(byte_length % 2 == 0);
Address buffer_address = reinterpret_cast<Address>(*buffer);
uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
uc16* substring2 = reinterpret_cast<uc16*>(buffer_address + byte_offset2);
size_t length = byte_length >> 1;
for (size_t i = 0; i < length; i++) {
unibrow::uchar c1 = substring1[i];
unibrow::uchar c2 = substring2[i];
if (c1 != c2) {
canonicalize.get(c1, '\0', &c1);
if (c1 != c2) {
canonicalize.get(c2, '\0', &c2);
if (c1 != c2) {
return 0;
}
}
}
}
return 1;
}
Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
ASSERT(register_index < (1<<30));
if (num_registers_ <= register_index) {

View File

@ -119,8 +119,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
static const int kRegExpConstantsSize = 256;
// Only unroll loops up to this length.
static const int kMaxInlineStringTests = 8;
// Special "character" marking end of input.
static const uint32_t kEndOfInput = ~0;
// Compares two-byte strings case insenstively.
static int CaseInsensitiveCompareUC16(uc16** buffer,
int byte_offset1,
int byte_offset2,
size_t byte_length);
// The ebp-relative location of a regexp register.
Operand register_location(int register_index);

View File

@ -817,7 +817,6 @@ TEST(MacroAssemblerIA32BackReference) {
}
TEST(MacroAssemblerIA32AtStart) {
V8::Initialize(NULL);
@ -882,6 +881,65 @@ TEST(MacroAssemblerIA32AtStart) {
TEST(MacroAssemblerIA32BackRefNoCase) {
V8::Initialize(NULL);
// regexp-macro-assembler-ia32 needs a handle scope to allocate
// byte-arrays for constants.
v8::HandleScope scope;
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
Label fail, succ;
m.WriteCurrentPositionToRegister(0);
m.WriteCurrentPositionToRegister(2);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
Label expected_fail;
m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
m.Bind(&fail);
m.Fail();
m.Bind(&expected_fail);
m.AdvanceCurrentPosition(3); // Skip "xYz"
m.CheckNotBackReferenceIgnoreCase(2, &succ);
m.Fail();
m.Bind(&succ);
m.WriteCurrentPositionToRegister(1);
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input =
Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
Address start_adr = seq_input->GetCharsAddress();
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
int end_offset = start_offset + seq_input->length();
int output[4];
bool success = RegExpMacroAssemblerIA32::Execute(*code,
seq_input.location(),
start_offset,
end_offset,
output,
true);
CHECK(success);
CHECK_EQ(0, output[0]);
CHECK_EQ(12, output[1]);
CHECK_EQ(0, output[2]);
CHECK_EQ(3, output[3]);
}
TEST(MacroAssemblerIA32Registers) {
V8::Initialize(NULL);

View File

@ -263,4 +263,43 @@ assertTrue(/foo$(?!bar)/.test("foo"), "football12");
assertFalse(/f(o)\b\1/.test('foo'));
assertTrue(/f(o)\B\1/.test('foo'));
// Back-reference, ignore case:
// ASCII
assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "\\1 ASCII");
assertFalse(/x(...)\1/i.test("xaaaaa"), "\\1 ASCII, string short");
assertTrue(/x((?:))\1\1x/i.test("xx"), "\\1 empty, ASCII");
assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "\\1 uncaptured, ASCII");
assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "\\1 backtrack, ASCII");
assertEquals("xaBcAbCABCx,aBc",
String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
"\\1\\1 ASCII");
for (var i = 0; i < 128; i++) {
var testName = "(.)\\1 ~ " + i + "," + (i^0x20);
var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
var c = String.fromCharCode(i);
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
assertTrue(test, testName);
} else {
assertFalse(test, testName);
}
}
// UC16
// Characters used:
// "\u03a3\u03c2\u03c3\u039b\u03bb" - Sigma, final sigma, sigma, Lambda, lamda
assertEquals("x\u03a3\u03c3x,\u03a3",
String(/x(.)\1x/i.exec("x\u03a3\u03c3x")), "\\1 UC16");
assertFalse(/x(...)\1/i.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"),
"\\1 ASCII, string short");
assertTrue(/\u03a3((?:))\1\1x/i.test("\u03c2x"), "\\1 empty, UC16");
assertTrue(/x(?:...|(...))\1x/i.test("x\u03a3\u03c2\u03c3x"),
"\\1 uncaptured, UC16");
assertTrue(/x(?:...|(...))\1x/i.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"),
"\\1 backtrack, UC16");
var longUC16String = "x\u03a3\u03c2\u039b\u03c2\u03c3\u03bb\u03c3\u03a3\u03bb";
assertEquals(longUC16String + "," + longUC16String.substring(1,4),
String(/x(...)\1\1/i.exec(longUC16String)),
"\\1\\1 UC16");
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");