Makes irregexp-ia32 feature complete wrt. regexps.
git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@920 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
30204a0c00
commit
ed4e792cd5
@ -27,6 +27,7 @@
|
||||
|
||||
#include <string.h>
|
||||
#include "v8.h"
|
||||
#include "unicode.h"
|
||||
#include "log.h"
|
||||
#include "ast.h"
|
||||
#include "macro-assembler.h"
|
||||
@ -240,22 +241,97 @@ void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
|
||||
int start_reg, Label* on_no_match) {
|
||||
int start_reg,
|
||||
Label* on_no_match) {
|
||||
Label fallthrough;
|
||||
__ mov(eax, register_location(start_reg));
|
||||
__ mov(ecx, register_location(start_reg + 1));
|
||||
__ sub(ecx, Operand(eax)); // Length to check.
|
||||
__ j(less, on_no_match);
|
||||
BranchOrBacktrack(less, on_no_match);
|
||||
__ j(equal, &fallthrough);
|
||||
|
||||
UNIMPLEMENTED(); // TODO(lrn): Call runtime function to do test.
|
||||
if (mode_ == ASCII) {
|
||||
Label success;
|
||||
Label fail;
|
||||
__ push(esi);
|
||||
__ push(edi);
|
||||
__ add(edi, Operand(esi));
|
||||
__ add(esi, Operand(eax));
|
||||
Label loop;
|
||||
__ bind(&loop);
|
||||
__ rep_cmpsb();
|
||||
__ j(equal, &success);
|
||||
// Compare lower-case if letters.
|
||||
__ movzx_b(eax, Operand(edi, -1));
|
||||
__ or_(eax, 0x20); // To-lower-case
|
||||
__ lea(ebx, Operand(eax, -'a'));
|
||||
__ cmp(ebx, static_cast<int32_t>('z' - 'a'));
|
||||
__ j(above, &fail);
|
||||
__ movzx_b(ebx, Operand(esi, -1));
|
||||
__ or_(ebx, 0x20); // To-lower-case
|
||||
__ cmp(eax, Operand(ebx));
|
||||
__ j(not_equal, &fail);
|
||||
__ or_(ecx, Operand(ecx));
|
||||
__ j(not_equal, &loop);
|
||||
__ jmp(&success);
|
||||
|
||||
__ bind(&fail);
|
||||
__ pop(edi);
|
||||
__ pop(esi);
|
||||
BranchOrBacktrack(no_condition, on_no_match);
|
||||
|
||||
__ bind(&success);
|
||||
__ pop(eax); // discard original value of edi
|
||||
__ pop(esi);
|
||||
__ sub(edi, Operand(esi));
|
||||
} else {
|
||||
// store state
|
||||
__ push(esi);
|
||||
__ push(edi);
|
||||
__ push(ecx);
|
||||
// align stack
|
||||
int frameAlignment = OS::ActivationFrameAlignment();
|
||||
if (frameAlignment != 0) {
|
||||
__ mov(ebx, esp);
|
||||
__ sub(Operand(esp), Immediate(5 * kPointerSize)); // args + esp.
|
||||
ASSERT(IsPowerOf2(frameAlignment));
|
||||
__ and_(esp, -frameAlignment);
|
||||
__ mov(Operand(esp, 4 * kPointerSize), ebx);
|
||||
} else {
|
||||
__ sub(Operand(esp), Immediate(4 * kPointerSize));
|
||||
}
|
||||
// Put arguments on stack.
|
||||
__ mov(Operand(esp, 3 * kPointerSize), ecx);
|
||||
__ mov(ebx, Operand(ebp, kInputEndOffset));
|
||||
__ add(edi, Operand(ebx));
|
||||
__ mov(Operand(esp, 2 * kPointerSize), edi);
|
||||
__ add(eax, Operand(ebx));
|
||||
__ mov(Operand(esp, 1 * kPointerSize), eax);
|
||||
__ mov(eax, Operand(ebp, kInputBuffer));
|
||||
__ mov(Operand(esp, 0 * kPointerSize), eax);
|
||||
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
|
||||
__ mov(Operand(eax),
|
||||
Immediate(reinterpret_cast<int32_t>(function_address)));
|
||||
__ call(Operand(eax));
|
||||
if (frameAlignment != 0) {
|
||||
__ mov(esp, Operand(esp, 4 * kPointerSize));
|
||||
} else {
|
||||
__ add(Operand(esp), Immediate(4 * sizeof(int32_t)));
|
||||
}
|
||||
__ pop(ecx);
|
||||
__ pop(edi);
|
||||
__ pop(esi);
|
||||
__ or_(eax, Operand(eax));
|
||||
BranchOrBacktrack(zero, on_no_match);
|
||||
__ add(edi, Operand(ecx));
|
||||
}
|
||||
__ bind(&fallthrough);
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::CheckNotBackReference(
|
||||
int start_reg, Label* on_no_match) {
|
||||
int start_reg,
|
||||
Label* on_no_match) {
|
||||
Label fallthrough;
|
||||
__ mov(eax, register_location(start_reg));
|
||||
__ mov(ecx, register_location(start_reg + 1));
|
||||
@ -586,6 +662,37 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
|
||||
|
||||
// Private methods:
|
||||
|
||||
|
||||
static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
|
||||
|
||||
|
||||
int RegExpMacroAssemblerIA32::CaseInsensitiveCompareUC16(uc16** buffer,
|
||||
int byte_offset1,
|
||||
int byte_offset2,
|
||||
size_t byte_length) {
|
||||
ASSERT(byte_length % 2 == 0);
|
||||
Address buffer_address = reinterpret_cast<Address>(*buffer);
|
||||
uc16* substring1 = reinterpret_cast<uc16*>(buffer_address + byte_offset1);
|
||||
uc16* substring2 = reinterpret_cast<uc16*>(buffer_address + byte_offset2);
|
||||
size_t length = byte_length >> 1;
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
unibrow::uchar c1 = substring1[i];
|
||||
unibrow::uchar c2 = substring2[i];
|
||||
if (c1 != c2) {
|
||||
canonicalize.get(c1, '\0', &c1);
|
||||
if (c1 != c2) {
|
||||
canonicalize.get(c2, '\0', &c2);
|
||||
if (c1 != c2) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
|
||||
ASSERT(register_index < (1<<30));
|
||||
if (num_registers_ <= register_index) {
|
||||
|
@ -119,8 +119,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
|
||||
static const int kRegExpConstantsSize = 256;
|
||||
// Only unroll loops up to this length.
|
||||
static const int kMaxInlineStringTests = 8;
|
||||
// Special "character" marking end of input.
|
||||
static const uint32_t kEndOfInput = ~0;
|
||||
|
||||
// Compares two-byte strings case insenstively.
|
||||
static int CaseInsensitiveCompareUC16(uc16** buffer,
|
||||
int byte_offset1,
|
||||
int byte_offset2,
|
||||
size_t byte_length);
|
||||
|
||||
// The ebp-relative location of a regexp register.
|
||||
Operand register_location(int register_index);
|
||||
|
@ -817,7 +817,6 @@ TEST(MacroAssemblerIA32BackReference) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(MacroAssemblerIA32AtStart) {
|
||||
V8::Initialize(NULL);
|
||||
|
||||
@ -882,6 +881,65 @@ TEST(MacroAssemblerIA32AtStart) {
|
||||
|
||||
|
||||
|
||||
|
||||
TEST(MacroAssemblerIA32BackRefNoCase) {
|
||||
V8::Initialize(NULL);
|
||||
|
||||
// regexp-macro-assembler-ia32 needs a handle scope to allocate
|
||||
// byte-arrays for constants.
|
||||
v8::HandleScope scope;
|
||||
|
||||
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
|
||||
|
||||
Label fail, succ;
|
||||
|
||||
m.WriteCurrentPositionToRegister(0);
|
||||
m.WriteCurrentPositionToRegister(2);
|
||||
m.AdvanceCurrentPosition(3);
|
||||
m.WriteCurrentPositionToRegister(3);
|
||||
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
|
||||
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
|
||||
Label expected_fail;
|
||||
m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
|
||||
m.Bind(&fail);
|
||||
m.Fail();
|
||||
|
||||
m.Bind(&expected_fail);
|
||||
m.AdvanceCurrentPosition(3); // Skip "xYz"
|
||||
m.CheckNotBackReferenceIgnoreCase(2, &succ);
|
||||
m.Fail();
|
||||
|
||||
m.Bind(&succ);
|
||||
m.WriteCurrentPositionToRegister(1);
|
||||
m.Succeed();
|
||||
|
||||
Handle<Object> code_object = m.GetCode();
|
||||
Handle<Code> code = Handle<Code>::cast(code_object);
|
||||
|
||||
Handle<String> input =
|
||||
Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
|
||||
Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
|
||||
Address start_adr = seq_input->GetCharsAddress();
|
||||
int start_offset = start_adr - reinterpret_cast<Address>(*seq_input);
|
||||
int end_offset = start_offset + seq_input->length();
|
||||
|
||||
int output[4];
|
||||
bool success = RegExpMacroAssemblerIA32::Execute(*code,
|
||||
seq_input.location(),
|
||||
start_offset,
|
||||
end_offset,
|
||||
output,
|
||||
true);
|
||||
|
||||
CHECK(success);
|
||||
CHECK_EQ(0, output[0]);
|
||||
CHECK_EQ(12, output[1]);
|
||||
CHECK_EQ(0, output[2]);
|
||||
CHECK_EQ(3, output[3]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
TEST(MacroAssemblerIA32Registers) {
|
||||
V8::Initialize(NULL);
|
||||
|
||||
|
@ -263,4 +263,43 @@ assertTrue(/foo$(?!bar)/.test("foo"), "football12");
|
||||
assertFalse(/f(o)\b\1/.test('foo'));
|
||||
assertTrue(/f(o)\B\1/.test('foo'));
|
||||
|
||||
// Back-reference, ignore case:
|
||||
// ASCII
|
||||
assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "\\1 ASCII");
|
||||
assertFalse(/x(...)\1/i.test("xaaaaa"), "\\1 ASCII, string short");
|
||||
assertTrue(/x((?:))\1\1x/i.test("xx"), "\\1 empty, ASCII");
|
||||
assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "\\1 uncaptured, ASCII");
|
||||
assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "\\1 backtrack, ASCII");
|
||||
assertEquals("xaBcAbCABCx,aBc",
|
||||
String(/x(...)\1\1x/i.exec("xaBcAbCABCx")),
|
||||
"\\1\\1 ASCII");
|
||||
|
||||
for (var i = 0; i < 128; i++) {
|
||||
var testName = "(.)\\1 ~ " + i + "," + (i^0x20);
|
||||
var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20))
|
||||
var c = String.fromCharCode(i);
|
||||
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
|
||||
assertTrue(test, testName);
|
||||
} else {
|
||||
assertFalse(test, testName);
|
||||
}
|
||||
}
|
||||
|
||||
// UC16
|
||||
// Characters used:
|
||||
// "\u03a3\u03c2\u03c3\u039b\u03bb" - Sigma, final sigma, sigma, Lambda, lamda
|
||||
assertEquals("x\u03a3\u03c3x,\u03a3",
|
||||
String(/x(.)\1x/i.exec("x\u03a3\u03c3x")), "\\1 UC16");
|
||||
assertFalse(/x(...)\1/i.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"),
|
||||
"\\1 ASCII, string short");
|
||||
assertTrue(/\u03a3((?:))\1\1x/i.test("\u03c2x"), "\\1 empty, UC16");
|
||||
assertTrue(/x(?:...|(...))\1x/i.test("x\u03a3\u03c2\u03c3x"),
|
||||
"\\1 uncaptured, UC16");
|
||||
assertTrue(/x(?:...|(...))\1x/i.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"),
|
||||
"\\1 backtrack, UC16");
|
||||
var longUC16String = "x\u03a3\u03c2\u039b\u03c2\u03c3\u03bb\u03c3\u03a3\u03bb";
|
||||
assertEquals(longUC16String + "," + longUC16String.substring(1,4),
|
||||
String(/x(...)\1\1/i.exec(longUC16String)),
|
||||
"\\1\\1 UC16");
|
||||
|
||||
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
|
||||
|
Loading…
Reference in New Issue
Block a user