Add external strings support to regexp in generated code.

TEST=test/mjsunit/string-external-cached.js

Review URL: http://codereview.chromium.org/8680010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10070 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2011-11-25 14:04:47 +00:00
parent 04aa022e51
commit d542a2fb75
5 changed files with 206 additions and 62 deletions

View File

@ -4612,8 +4612,13 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
// First check for flat string. None of the following string type tests will
// succeed if kIsNotStringTag is set.
__ and_(r1, r0, Operand(kIsNotStringMask | kStringRepresentationMask), SetCC);
// succeed if subject is not a string or a short external string.
__ and_(r1,
r0,
Operand(kIsNotStringMask |
kStringRepresentationMask |
kShortExternalStringMask),
SetCC);
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
__ b(eq, &seq_string);
@ -4626,17 +4631,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
Label cons_string, external_string, check_encoding;
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
__ cmp(r1, Operand(kExternalStringTag));
__ b(lt, &cons_string);
__ b(eq, &runtime);
__ b(eq, &external_string);
// Catch non-string subject (should already have been guarded against).
STATIC_ASSERT(kNotStringTag != 0);
__ tst(r1, Operand(kIsNotStringMask));
// Catch non-string subject or short external string.
STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0);
__ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask));
__ b(ne, &runtime);
// String is sliced.
@ -4648,8 +4654,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// String is a cons string, check whether it is flat.
__ bind(&cons_string);
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
__ LoadRoot(r1, Heap::kEmptyStringRootIndex);
__ cmp(r0, r1);
__ CompareRoot(r0, Heap::kEmptyStringRootIndex);
__ b(ne, &runtime);
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
// Is first part of cons or parent of slice a flat string?
@ -4658,7 +4663,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(r0, Operand(kStringRepresentationMask));
__ b(ne, &runtime);
__ b(ne, &external_string);
__ bind(&seq_string);
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
@ -4866,6 +4872,26 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ add(sp, sp, Operand(4 * kPointerSize));
__ Ret();
// External string. Short external strings have already been ruled out.
// r0: scratch
__ bind(&external_string);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
if (FLAG_debug_code) {
// Assert that we do not have a cons or slice (indirect strings) here.
// Sequential strings have already been ruled out.
__ tst(r0, Operand(kIsIndirectStringMask));
__ Assert(eq, "external string expected, but not found");
}
__ ldr(subject,
FieldMemOperand(subject, ExternalString::kResourceDataOffset));
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
__ sub(subject,
subject,
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ jmp(&seq_string);
// Do the runtime call to execute the regexp.
__ bind(&runtime);
__ TailCallRuntime(Runtime::kRegExpExec, 4, 1);

View File

@ -3611,13 +3611,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
// First check for flat two byte string.
__ and_(ebx,
kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask);
__ and_(ebx, kIsNotStringMask |
kStringRepresentationMask |
kStringEncodingMask |
kShortExternalStringMask);
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string. None of the following
// string type tests will succeed if kIsNotStringTag is set.
__ and_(ebx, Immediate(kIsNotStringMask | kStringRepresentationMask));
// string type tests will succeed if subject is not a string or a short
// external string.
__ and_(ebx, Immediate(kIsNotStringMask |
kStringRepresentationMask |
kShortExternalStringMask));
__ j(zero, &seq_ascii_string, Label::kNear);
// ebx: whether subject is a string and if yes, its string representation
@ -3627,17 +3632,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
Label cons_string, external_string, check_encoding;
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
__ cmp(ebx, Immediate(kExternalStringTag));
__ j(less, &cons_string);
__ j(equal, &runtime);
__ j(equal, &external_string);
// Catch non-string subject (should already have been guarded against).
STATIC_ASSERT(kNotStringTag != 0);
__ test(ebx, Immediate(kIsNotStringMask));
// Catch non-string subject or short external string.
STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0);
__ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag));
__ j(not_zero, &runtime);
// String is sliced.
@ -3660,10 +3666,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
kStringRepresentationMask | kStringEncodingMask);
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be ascii.
// Any other flat string must be sequential ascii or external.
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
kStringRepresentationMask);
__ j(not_zero, &runtime);
__ j(not_zero, &external_string);
__ bind(&seq_ascii_string);
// eax: subject string (flat ascii)
@ -3884,6 +3890,27 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ mov(eax, Operand(esp, kLastMatchInfoOffset));
__ ret(4 * kPointerSize);
// External string. Short external strings have already been ruled out.
// eax: subject string (expected to be external)
// ebx: scratch
__ bind(&external_string);
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset));
if (FLAG_debug_code) {
// Assert that we do not have a cons or slice (indirect strings) here.
// Sequential strings have already been ruled out.
__ test_b(ebx, kIsIndirectStringMask);
__ Assert(zero, "external string expected, but not found");
}
__ mov(eax, FieldOperand(eax, ExternalString::kResourceDataOffset));
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
__ sub(eax, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
STATIC_ASSERT(kTwoByteStringTag == 0);
__ test_b(ebx, kStringEncodingMask);
__ j(not_zero, &seq_ascii_string);
__ jmp(&seq_two_byte_string);
// Do the runtime call to execute the regexp.
__ bind(&runtime);
__ TailCallRuntime(Runtime::kRegExpExec, 4, 1);

View File

@ -2658,13 +2658,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
__ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset));
// First check for flat two byte string.
__ andb(rbx, Immediate(
kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask));
__ andb(rbx, Immediate(kIsNotStringMask |
kStringRepresentationMask |
kStringEncodingMask |
kShortExternalStringMask));
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string. None of the following
// string type tests will succeed if kIsNotStringTag is set.
__ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
// string type tests will succeed if subject is not a string or a short
// external string.
__ andb(rbx, Immediate(kIsNotStringMask |
kStringRepresentationMask |
kShortExternalStringMask));
__ j(zero, &seq_ascii_string, Label::kNear);
// rbx: whether subject is a string and if yes, its string representation
@ -2674,17 +2679,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
Label cons_string, external_string, check_encoding;
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
__ cmpq(rbx, Immediate(kExternalStringTag));
__ j(less, &cons_string, Label::kNear);
__ j(equal, &runtime);
__ j(equal, &external_string);
// Catch non-string subject (should already have been guarded against).
STATIC_ASSERT(kNotStringTag != 0);
__ testb(rbx, Immediate(kIsNotStringMask));
// Catch non-string subject or short external string.
STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0);
__ testb(rbx, Immediate(kIsNotStringMask | kShortExternalStringMask));
__ j(not_zero, &runtime);
// String is sliced.
@ -2709,10 +2715,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
Immediate(kStringRepresentationMask | kStringEncodingMask));
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be ascii.
// Any other flat string must be sequential ascii or external.
__ testb(FieldOperand(rbx, Map::kInstanceTypeOffset),
Immediate(kStringRepresentationMask));
__ j(not_zero, &runtime);
__ j(not_zero, &external_string);
__ bind(&seq_ascii_string);
// rdi: subject string (sequential ascii)
@ -2946,6 +2952,27 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ bind(&termination_exception);
__ ThrowUncatchable(TERMINATION, rax);
// External string. Short external strings have already been ruled out.
// rdi: subject string (expected to be external)
// rbx: scratch
__ bind(&external_string);
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
__ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset));
if (FLAG_debug_code) {
// Assert that we do not have a cons or slice (indirect strings) here.
// Sequential strings have already been ruled out.
__ testb(rbx, Immediate(kIsIndirectStringMask));
__ Assert(zero, "external string expected, but not found");
}
__ movq(rdi, FieldOperand(rdi, ExternalString::kResourceDataOffset));
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
__ subq(rdi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
STATIC_ASSERT(kTwoByteStringTag == 0);
__ testb(rbx, Immediate(kStringEncodingMask));
__ j(not_zero, &seq_ascii_string);
__ jmp(&seq_two_byte_string);
// Do the runtime call to execute the regexp.
__ bind(&runtime);
__ TailCallRuntime(Runtime::kRegExpExec, 4, 1);

View File

@ -0,0 +1,94 @@
// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --expose-externalize-string --expose-gc
// Test data pointer caching of external strings.
function test() {
// Test string.charAt.
var charat_str = new Array(5);
charat_str[0] = "0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
charat_str[1] = "0123456789ABCDEF";
for (var i = 0; i < 6; i++) charat_str[1] += charat_str[1];
try { // String can only be externalized once
externalizeString(charat_str[0], false);
externalizeString(charat_str[1], true);
} catch (ex) { }
charat_str[2] = charat_str[0].slice(0, -1);
charat_str[3] = charat_str[1].slice(0, -1);
charat_str[4] = charat_str[0] + charat_str[0];
for (var i = 0; i < 5; i++) {
assertEquals('B', charat_str[i].charAt(6*16 + 11));
assertEquals('C', charat_str[i].charAt(6*16 + 12));
assertEquals('A', charat_str[i].charAt(3*16 + 10));
assertEquals('B', charat_str[i].charAt(3*16 + 11));
}
charat_short = "012";
try { // String can only be externalized once
externalizeString(charat_short, true);
} catch (ex) { }
assertEquals("1", charat_short.charAt(1));
// Test regexp.
var re = /(A|B)/;
var rere = /(T.{1,2}B)/;
var ascii = "ABCDEFGHIJKLMNOPQRST";
var twobyte = "_ABCDEFGHIJKLMNOPQRST";
try {
externalizeString(ascii, false);
externalizeString(twobyte, true);
} catch (ex) { }
assertTrue(isAsciiString(ascii));
assertFalse(isAsciiString(twobyte));
var ascii_slice = ascii.slice(1,-1);
var twobyte_slice = twobyte.slice(2,-1);
var ascii_cons = ascii + ascii;
var twobyte_cons = twobyte + twobyte;
for (var i = 0; i < 2; i++) {
assertEquals(["A", "A"], re.exec(ascii));
assertEquals(["B", "B"], re.exec(ascii_slice));
assertEquals(["TAB", "TAB"], rere.exec(ascii_cons));
assertEquals(["A", "A"], re.exec(twobyte));
assertEquals(["B", "B"], re.exec(twobyte_slice));
assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons));
}
}
// Run the test many times to ensure IC-s don't break things.
for (var i = 0; i < 10; i++) {
test();
}
// Clean up string to make Valgrind happy.
gc();
gc();

View File

@ -87,36 +87,6 @@ function test() {
// Flattened string should still be two-byte.
assertFalse(isAsciiString(str2));
// Test buffered external strings.
var charat_str = new Array(5);
charat_str[0] = "0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\
0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
charat_str[1] = "0123456789ABCDEF";
for (var i = 0; i < 6; i++) charat_str[1] += charat_str[1];
try { // String can only be externalized once
externalizeString(charat_str[0], false);
externalizeString(charat_str[1], true);
} catch (ex) { }
charat_str[2] = charat_str[0].slice(0, -1);
charat_str[3] = charat_str[1].slice(0, -1);
charat_str[4] = charat_str[0] + charat_str[0];
for (var i = 0; i < 5; i++) {
assertEquals('B', charat_str[i].charAt(6*16 + 11));
assertEquals('C', charat_str[i].charAt(6*16 + 12));
assertEquals('A', charat_str[i].charAt(3*16 + 10));
assertEquals('B', charat_str[i].charAt(3*16 + 11));
}
charat_short = "012";
try { // String can only be externalized once
externalizeString(charat_short, true);
} catch (ex) { }
assertEquals("1", charat_short.charAt(1));
}
// Run the test many times to ensure IC-s don't break things.