Tentative implementation of string slices (hidden under the flag --string-slices).

TEST=test/mjsunit/string-slices.js

Review URL: http://codereview.chromium.org/7477045

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@9027 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2011-08-26 13:03:30 +00:00
parent 92b9bdfec5
commit 77141f78ff
31 changed files with 1146 additions and 375 deletions

View File

@ -4367,6 +4367,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ cmp(r2, Operand(r0, ASR, kSmiTagSize));
__ b(gt, &runtime);
// Reset offset for possibly sliced string.
__ mov(r9, Operand(0));
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
@ -4374,33 +4376,45 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
// First check for flat string.
__ tst(r0, Operand(kIsNotStringMask | kStringRepresentationMask));
__ and_(r1, r0, Operand(kIsNotStringMask | kStringRepresentationMask), SetCC);
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
__ b(eq, &seq_string);
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
// Check for flat cons string.
// Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
STATIC_ASSERT(kExternalStringTag !=0);
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
__ tst(r0, Operand(kIsNotStringMask | kExternalStringTag));
__ b(ne, &runtime);
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmp(r1, Operand(kExternalStringTag));
__ b(lt, &cons_string);
__ b(eq, &runtime);
// String is sliced.
__ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset));
__ mov(r9, Operand(r9, ASR, kSmiTagSize));
__ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
// r9: offset of sliced string, smi-tagged.
__ jmp(&check_encoding);
// String is a cons string, check whether it is flat.
__ bind(&cons_string);
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
__ LoadRoot(r1, Heap::kEmptyStringRootIndex);
__ cmp(r0, r1);
__ b(ne, &runtime);
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
// Is first part of cons or parent of slice a flat string?
__ bind(&check_encoding);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
// Is first part a flat string?
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(r0, Operand(kStringRepresentationMask));
__ b(ne, &runtime);
__ bind(&seq_string);
// subject: Subject string
// regexp_data: RegExp data (FixedArray)
@ -4466,21 +4480,30 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// For arguments 4 and 3 get string length, calculate start of string data and
// calculate the shift of the index (0 for ASCII and 1 for two byte).
__ ldr(r0, FieldMemOperand(subject, String::kLengthOffset));
__ mov(r0, Operand(r0, ASR, kSmiTagSize));
STATIC_ASSERT(SeqAsciiString::kHeaderSize == SeqTwoByteString::kHeaderSize);
__ add(r9, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
__ add(r8, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
__ eor(r3, r3, Operand(1));
// Argument 4 (r3): End of string data
// Argument 3 (r2): Start of string data
// Load the length from the original subject string from the previous stack
// frame. Therefore we have to use fp, which points exactly to two pointer
// sizes below the previous sp. (Because creating a new stack frame pushes
// the previous fp onto the stack and moves up sp by 2 * kPointerSize.)
__ ldr(r0, MemOperand(fp, kSubjectOffset + 2 * kPointerSize));
// If slice offset is not 0, load the length from the original sliced string.
// Argument 4, r3: End of string data
// Argument 3, r2: Start of string data
// Prepare start and end index of the input.
__ add(r9, r8, Operand(r9, LSL, r3));
__ add(r2, r9, Operand(r1, LSL, r3));
__ add(r3, r9, Operand(r0, LSL, r3));
__ ldr(r8, FieldMemOperand(r0, String::kLengthOffset));
__ mov(r8, Operand(r8, ASR, kSmiTagSize));
__ add(r3, r9, Operand(r8, LSL, r3));
// Argument 2 (r1): Previous index.
// Already there
// Argument 1 (r0): Subject string.
__ mov(r0, subject);
// Already there
// Locate the code entry and call it.
__ add(r7, r7, Operand(Code::kHeaderSize - kHeapObjectTag));
@ -4497,12 +4520,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// Check the result.
Label success;
__ cmp(r0, Operand(NativeRegExpMacroAssembler::SUCCESS));
__ cmp(subject, Operand(NativeRegExpMacroAssembler::SUCCESS));
__ b(eq, &success);
Label failure;
__ cmp(r0, Operand(NativeRegExpMacroAssembler::FAILURE));
__ cmp(subject, Operand(NativeRegExpMacroAssembler::FAILURE));
__ b(eq, &failure);
__ cmp(r0, Operand(NativeRegExpMacroAssembler::EXCEPTION));
__ cmp(subject, Operand(NativeRegExpMacroAssembler::EXCEPTION));
// If not exception it can only be retry. Handle that in the runtime system.
__ b(ne, &runtime);
// Result must now be exception. If there is no pending exception already a
@ -4514,18 +4537,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ mov(r2, Operand(ExternalReference(Isolate::k_pending_exception_address,
isolate)));
__ ldr(r0, MemOperand(r2, 0));
__ cmp(r0, r1);
__ cmp(subject, r1);
__ b(eq, &runtime);
__ str(r1, MemOperand(r2, 0)); // Clear pending exception.
// Check if the exception is a termination. If so, throw as uncatchable.
__ LoadRoot(ip, Heap::kTerminationExceptionRootIndex);
__ cmp(r0, ip);
__ cmp(subject, ip);
Label termination_exception;
__ b(eq, &termination_exception);
__ Throw(r0); // Expects thrown value in r0.
__ Throw(subject); // Expects thrown value in r0.
__ bind(&termination_exception);
__ ThrowUncatchable(TERMINATION, r0); // Expects thrown value in r0.
@ -4803,6 +4826,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
Label flat_string;
Label ascii_string;
Label got_char_code;
Label sliced_string;
// If the receiver is a smi trigger the non-string case.
__ JumpIfSmi(object_, receiver_not_string_);
@ -4832,7 +4856,11 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
__ b(eq, &flat_string);
// Handle non-flat strings.
__ tst(result_, Operand(kIsConsStringMask));
__ and_(result_, result_, Operand(kStringRepresentationMask));
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmp(result_, Operand(kExternalStringTag));
__ b(gt, &sliced_string);
__ b(eq, &call_runtime_);
// ConsString.
@ -4840,15 +4868,26 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
Label assure_seq_string;
__ ldr(result_, FieldMemOperand(object_, ConsString::kSecondOffset));
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
__ cmp(result_, Operand(ip));
__ b(ne, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ ldr(object_, FieldMemOperand(object_, ConsString::kFirstOffset));
__ jmp(&assure_seq_string);
// SlicedString, unpack and add offset.
__ bind(&sliced_string);
__ ldr(result_, FieldMemOperand(object_, SlicedString::kOffsetOffset));
__ add(scratch_, scratch_, result_);
__ ldr(object_, FieldMemOperand(object_, SlicedString::kParentOffset));
// Assure that we are dealing with a sequential string. Go to runtime if not.
__ bind(&assure_seq_string);
__ ldr(result_, FieldMemOperand(object_, HeapObject::kMapOffset));
__ ldrb(result_, FieldMemOperand(result_, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
// Check that parent is not an external string. Go to runtime otherwise.
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(result_, Operand(kStringRepresentationMask));
__ b(ne, &call_runtime_);
@ -5428,10 +5467,17 @@ void SubStringStub::Generate(MacroAssembler* masm) {
// Check bounds and smi-ness.
Register to = r6;
Register from = r7;
if (FLAG_string_slices) {
__ nop(0); // Jumping as first instruction would crash the code generation.
__ jmp(&runtime);
}
__ Ldrd(to, from, MemOperand(sp, kToOffset));
STATIC_ASSERT(kFromOffset == kToOffset + 4);
STATIC_ASSERT(kSmiTag == 0);
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
// I.e., arithmetic shift right by one un-smi-tags.
__ mov(r2, Operand(to, ASR, 1), SetCC);
__ mov(r3, Operand(from, ASR, 1), SetCC, cc);
@ -5440,7 +5486,6 @@ void SubStringStub::Generate(MacroAssembler* masm) {
__ b(mi, &runtime); // From is negative.
// Both to and from are smis.
__ sub(r2, r2, Operand(r3), SetCC);
__ b(mi, &runtime); // Fail if from > to.
// Special handling of sub-strings of length 1 and 2. One character strings

View File

@ -1999,8 +1999,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
LOperand* string = UseRegister(instr->string());
LOperand* index = UseRegisterOrConstant(instr->index());
LOperand* string = UseTempRegister(instr->string());
LOperand* index = UseTempRegister(instr->index());
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
}

View File

@ -3455,97 +3455,83 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
LStringCharCodeAt* instr_;
};
Register scratch = scratch0();
Register string = ToRegister(instr->string());
Register index = no_reg;
int const_index = -1;
if (instr->index()->IsConstantOperand()) {
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
if (!Smi::IsValid(const_index)) {
// Guaranteed to be out of bounds because of the assert above.
// So the bounds check that must dominate this instruction must
// have deoptimized already.
if (FLAG_debug_code) {
__ Abort("StringCharCodeAt: out of bounds index.");
}
// No code needs to be generated.
return;
}
} else {
index = ToRegister(instr->index());
}
Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
Label flat_string, ascii_string, done;
// Fetch the instance type of the receiver into result register.
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
// We need special handling for non-flat strings.
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(result, Operand(kStringRepresentationMask));
__ b(eq, &flat_string);
// We need special handling for indirect strings.
Label check_sequential;
__ tst(result, Operand(kIsIndirectStringMask));
__ b(eq, &check_sequential);
// Handle non-flat strings.
__ tst(result, Operand(kIsConsStringMask));
__ b(eq, deferred->entry());
// Dispatch on the indirect string shape: slice or cons.
Label cons_string;
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
__ tst(result, Operand(kSlicedNotConsMask));
__ b(eq, &cons_string);
// ConsString.
// Handle slices.
Label indirect_string_loaded;
__ ldr(result, FieldMemOperand(string, SlicedString::kOffsetOffset));
__ add(index, index, Operand(result, ASR, kSmiTagSize));
__ ldr(string, FieldMemOperand(string, SlicedString::kParentOffset));
__ jmp(&indirect_string_loaded);
// Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
__ ldr(scratch, FieldMemOperand(string, ConsString::kSecondOffset));
__ bind(&cons_string);
__ ldr(result, FieldMemOperand(string, ConsString::kSecondOffset));
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
__ cmp(scratch, ip);
__ cmp(result, ip);
__ b(ne, deferred->entry());
// Get the first of the two strings and load its instance type.
__ ldr(string, FieldMemOperand(string, ConsString::kFirstOffset));
__ bind(&indirect_string_loaded);
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
// Check whether the string is sequential. The only non-sequential
// shapes we support have just been unwrapped above.
__ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(result, Operand(kStringRepresentationMask));
__ b(ne, deferred->entry());
// Check for 1-byte or 2-byte string.
__ bind(&flat_string);
// Dispatch on the encoding: ASCII or two-byte.
Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ tst(result, Operand(kStringEncodingMask));
__ b(ne, &ascii_string);
// 2-byte string.
// Load the 2-byte character code into the result register.
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
if (instr->index()->IsConstantOperand()) {
__ ldrh(result,
FieldMemOperand(string,
SeqTwoByteString::kHeaderSize + 2 * const_index));
} else {
__ add(scratch,
// Two-byte string.
// Load the two-byte character code into the result register.
Label done;
__ add(result,
string,
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ ldrh(result, MemOperand(scratch, index, LSL, 1));
}
__ ldrh(result, MemOperand(result, index, LSL, 1));
__ jmp(&done);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
if (instr->index()->IsConstantOperand()) {
__ ldrb(result, FieldMemOperand(string,
SeqAsciiString::kHeaderSize + const_index));
} else {
__ add(scratch,
__ add(result,
string,
Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
__ ldrb(result, MemOperand(scratch, index));
}
__ ldrb(result, MemOperand(result, index));
__ bind(&done);
__ bind(deferred->exit());
}

View File

@ -1034,12 +1034,13 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
}
// Prepare for possible GC.
HandleScope handles;
HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
// Current string.
bool is_ascii = subject->IsAsciiRepresentation();
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
@ -1057,8 +1058,20 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
return EXCEPTION;
}
Handle<String> subject_tmp = subject;
int slice_offset = 0;
// Extract the underlying string and the slice offset.
if (StringShape(*subject_tmp).IsCons()) {
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
} else if (StringShape(*subject_tmp).IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject_tmp);
subject_tmp = Handle<String>(slice->parent());
slice_offset = slice->offset();
}
// String might have changed.
if (subject->IsAsciiRepresentation() != is_ascii) {
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
@ -1069,8 +1082,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
ASSERT(StringShape(*subject).IsSequential() ||
StringShape(*subject).IsExternal());
ASSERT(StringShape(*subject_tmp).IsSequential() ||
StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
@ -1078,13 +1091,14 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
const byte* new_address = StringCharacterPosition(*subject, start_index);
const byte* new_address = StringCharacterPosition(*subject_tmp,
start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
int byte_length = end_address - start_address;
int byte_length = static_cast<int>(end_address - start_address);
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;

View File

@ -104,6 +104,7 @@ DEFINE_bool(harmony_block_scoping, false, "enable harmony block scoping")
// Flags for experimental implementation features.
DEFINE_bool(unbox_double_arrays, true, "automatically unbox arrays of doubles")
DEFINE_bool(string_slices, false, "use string slices")
// Flags for Crankshaft.
#ifdef V8_TARGET_ARCH_MIPS

View File

@ -323,10 +323,10 @@ AllocationSpace Heap::TargetSpaceId(InstanceType type) {
ASSERT(type != JS_GLOBAL_PROPERTY_CELL_TYPE);
if (type < FIRST_NONSTRING_TYPE) {
// There are three string representations: sequential strings, cons
// strings, and external strings. Only cons strings contain
// non-map-word pointers to heap objects.
return ((type & kStringRepresentationMask) == kConsStringTag)
// There are four string representations: sequential strings, external
// strings, cons strings, and sliced strings.
// Only the latter two contain non-map-word pointers to heap objects.
return ((type & kIsIndirectStringMask) == kIsIndirectStringTag)
? OLD_POINTER_SPACE
: OLD_DATA_SPACE;
} else {

View File

@ -1290,6 +1290,10 @@ class ScavengingVisitor : public StaticVisitorBase {
&ObjectEvacuationStrategy<POINTER_OBJECT>::
template VisitSpecialized<ConsString::kSize>);
table_.Register(kVisitSlicedString,
&ObjectEvacuationStrategy<POINTER_OBJECT>::
template VisitSpecialized<SlicedString::kSize>);
table_.Register(kVisitSharedFunctionInfo,
&ObjectEvacuationStrategy<POINTER_OBJECT>::
template VisitSpecialized<SharedFunctionInfo::kSize>);
@ -2564,6 +2568,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
// If the resulting string is small make a flat string.
if (length < String::kMinNonFlatLength) {
// Note that neither of the two inputs can be a slice because:
STATIC_ASSERT(String::kMinNonFlatLength <= SlicedString::kMinLength);
ASSERT(first->IsFlat());
ASSERT(second->IsFlat());
if (is_ascii) {
@ -2655,6 +2661,16 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
// Make an attempt to flatten the buffer to reduce access time.
buffer = buffer->TryFlattenGetString();
// TODO(1626): For now slicing external strings is not supported. However,
// a flat cons string can have an external string as first part in some cases.
// Therefore we have to single out this case as well.
if (!FLAG_string_slices ||
(buffer->IsConsString() &&
(!buffer->IsFlat() ||
!ConsString::cast(buffer)->first()->IsSeqString())) ||
buffer->IsExternalString() ||
length < SlicedString::kMinLength ||
pretenure == TENURED) {
Object* result;
{ MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
? AllocateRawAsciiString(length, pretenure)
@ -2672,7 +2688,42 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
uc16* dest = SeqTwoByteString::cast(string_result)->GetChars();
String::WriteToFlat(buffer, dest, start, end);
}
return result;
}
ASSERT(buffer->IsFlat());
ASSERT(!buffer->IsExternalString());
#if DEBUG
buffer->StringVerify();
#endif
Object* result;
{ Map* map = buffer->IsAsciiRepresentation()
? sliced_ascii_string_map()
: sliced_string_map();
MaybeObject* maybe_result = Allocate(map, NEW_SPACE);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
AssertNoAllocation no_gc;
SlicedString* sliced_string = SlicedString::cast(result);
sliced_string->set_length(length);
sliced_string->set_hash_field(String::kEmptyHashField);
if (buffer->IsConsString()) {
ConsString* cons = ConsString::cast(buffer);
ASSERT(cons->second()->length() == 0);
sliced_string->set_parent(cons->first());
sliced_string->set_offset(start);
} else if (buffer->IsSlicedString()) {
// Prevent nesting sliced strings.
SlicedString* parent_slice = SlicedString::cast(buffer);
sliced_string->set_parent(parent_slice->parent());
sliced_string->set_offset(start + parent_slice->offset());
} else {
sliced_string->set_parent(buffer);
sliced_string->set_offset(start);
}
ASSERT(sliced_string->parent()->IsSeqString());
return result;
}

View File

@ -88,6 +88,8 @@ inline Heap* _inline_get_heap_();
V(Map, symbol_map, SymbolMap) \
V(Map, cons_string_map, ConsStringMap) \
V(Map, cons_ascii_string_map, ConsAsciiStringMap) \
V(Map, sliced_string_map, SlicedStringMap) \
V(Map, sliced_ascii_string_map, SlicedAsciiStringMap) \
V(Map, ascii_symbol_map, AsciiSymbolMap) \
V(Map, cons_symbol_map, ConsSymbolMap) \
V(Map, cons_ascii_symbol_map, ConsAsciiSymbolMap) \

View File

@ -3371,6 +3371,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ cmp(edx, Operand(eax));
__ j(greater, &runtime);
// Reset offset for possibly sliced string.
__ Set(edi, Immediate(0));
// ecx: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
Label seq_ascii_string, seq_two_byte_string, check_code;
@ -3381,36 +3383,45 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ and_(ebx,
kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask);
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string.
__ test(Operand(ebx),
__ and_(Operand(ebx),
Immediate(kIsNotStringMask | kStringRepresentationMask));
__ j(zero, &seq_ascii_string);
__ j(zero, &seq_ascii_string, Label::kNear);
// Check for flat cons string.
// Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
STATIC_ASSERT(kExternalStringTag != 0);
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
__ test(Operand(ebx),
Immediate(kIsNotStringMask | kExternalStringTag));
__ j(not_zero, &runtime);
// String is a cons string.
__ mov(edx, FieldOperand(eax, ConsString::kSecondOffset));
__ cmp(Operand(edx), factory->empty_string());
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmp(Operand(ebx), Immediate(kExternalStringTag));
__ j(less, &cons_string);
__ j(equal, &runtime);
// String is sliced.
__ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset));
__ mov(eax, FieldOperand(eax, SlicedString::kParentOffset));
// edi: offset of sliced string, smi-tagged.
// eax: parent string.
__ jmp(&check_encoding, Label::kNear);
// String is a cons string, check whether it is flat.
__ bind(&cons_string);
__ cmp(FieldOperand(eax, ConsString::kSecondOffset), factory->empty_string());
__ j(not_equal, &runtime);
__ mov(eax, FieldOperand(eax, ConsString::kFirstOffset));
__ bind(&check_encoding);
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
// String is a cons string with empty second part.
// eax: first part of cons string.
// ebx: map of first part of cons string.
// Is first part a flat two byte string?
// eax: first part of cons string or parent of sliced string.
// ebx: map of first part of cons string or map of parent of sliced string.
// Is first part of cons or parent of slice a flat two byte string?
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
kStringRepresentationMask | kStringEncodingMask);
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be ascii.
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
kStringRepresentationMask);
@ -3420,14 +3431,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// eax: subject string (flat ascii)
// ecx: RegExp data (FixedArray)
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataAsciiCodeOffset));
__ Set(edi, Immediate(1)); // Type is ascii.
__ jmp(&check_code);
__ Set(ecx, Immediate(1)); // Type is ascii.
__ jmp(&check_code, Label::kNear);
__ bind(&seq_two_byte_string);
// eax: subject string (flat two byte)
// ecx: RegExp data (FixedArray)
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataUC16CodeOffset));
__ Set(edi, Immediate(0)); // Type is two byte.
__ Set(ecx, Immediate(0)); // Type is two byte.
__ bind(&check_code);
// Check that the irregexp code has been generated for the actual string
@ -3437,7 +3448,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// eax: subject string
// edx: code
// edi: encoding of subject string (1 if ascii, 0 if two_byte);
// ecx: encoding of subject string (1 if ascii, 0 if two_byte);
// Load used arguments before starting to push arguments for call to native
// RegExp code to avoid handling changing stack height.
__ mov(ebx, Operand(esp, kPreviousIndexOffset));
@ -3446,7 +3457,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// eax: subject string
// ebx: previous index
// edx: code
// edi: encoding of subject string (1 if ascii 0 if two_byte);
// ecx: encoding of subject string (1 if ascii 0 if two_byte);
// All checks done. Now push arguments for native regexp code.
Counters* counters = masm->isolate()->counters();
__ IncrementCounter(counters->regexp_entry_native(), 1);
@ -3463,23 +3474,47 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ mov(Operand(esp, 6 * kPointerSize), Immediate(1));
// Argument 6: Start (high end) of backtracking stack memory area.
__ mov(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_address));
__ add(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_size));
__ mov(Operand(esp, 5 * kPointerSize), ecx);
__ mov(esi, Operand::StaticVariable(address_of_regexp_stack_memory_address));
__ add(esi, Operand::StaticVariable(address_of_regexp_stack_memory_size));
__ mov(Operand(esp, 5 * kPointerSize), esi);
// Argument 5: static offsets vector buffer.
__ mov(Operand(esp, 4 * kPointerSize),
Immediate(ExternalReference::address_of_static_offsets_vector(
masm->isolate())));
// Argument 2: Previous index.
__ mov(Operand(esp, 1 * kPointerSize), ebx);
// Argument 1: Original subject string.
// The original subject is in the previous stack frame. Therefore we have to
// use ebp, which points exactly to one pointer size below the previous esp.
// (Because creating a new stack frame pushes the previous ebp onto the stack
// and thereby moves up esp by one kPointerSize.)
__ mov(esi, Operand(ebp, kSubjectOffset + kPointerSize));
__ mov(Operand(esp, 0 * kPointerSize), esi);
// esi: original subject string
// eax: underlying subject string
// ebx: previous index
// ecx: encoding of subject string (1 if ascii 0 if two_byte);
// edx: code
// Argument 4: End of string data
// Argument 3: Start of string data
Label setup_two_byte, setup_rest;
__ test(edi, Operand(edi));
__ mov(edi, FieldOperand(eax, String::kLengthOffset));
__ j(zero, &setup_two_byte, Label::kNear);
// Prepare start and end index of the input.
// Load the length from the original sliced string if that is the case.
__ mov(esi, FieldOperand(esi, String::kLengthOffset));
__ add(esi, Operand(edi)); // Calculate input end wrt offset.
__ SmiUntag(edi);
__ lea(ecx, FieldOperand(eax, edi, times_1, SeqAsciiString::kHeaderSize));
__ add(ebx, Operand(edi)); // Calculate input start wrt offset.
// ebx: start index of the input string
// esi: end index of the input string
Label setup_two_byte, setup_rest;
__ test(ecx, Operand(ecx));
__ j(zero, &setup_two_byte, Label::kNear);
__ SmiUntag(esi);
__ lea(ecx, FieldOperand(eax, esi, times_1, SeqAsciiString::kHeaderSize));
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
__ lea(ecx, FieldOperand(eax, ebx, times_1, SeqAsciiString::kHeaderSize));
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
@ -3487,20 +3522,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ bind(&setup_two_byte);
STATIC_ASSERT(kSmiTag == 0);
STATIC_ASSERT(kSmiTagSize == 1); // edi is smi (powered by 2).
__ lea(ecx, FieldOperand(eax, edi, times_1, SeqTwoByteString::kHeaderSize));
STATIC_ASSERT(kSmiTagSize == 1); // esi is smi (powered by 2).
__ lea(ecx, FieldOperand(eax, esi, times_1, SeqTwoByteString::kHeaderSize));
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
__ lea(ecx, FieldOperand(eax, ebx, times_2, SeqTwoByteString::kHeaderSize));
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
__ bind(&setup_rest);
// Argument 2: Previous index.
__ mov(Operand(esp, 1 * kPointerSize), ebx);
// Argument 1: Subject string.
__ mov(Operand(esp, 0 * kPointerSize), eax);
// Locate the code entry and call it.
__ add(Operand(edx), Immediate(Code::kHeaderSize - kHeapObjectTag));
__ call(Operand(edx));
@ -3539,7 +3568,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// by javascript code.
__ cmp(eax, factory->termination_exception());
Label throw_termination_exception;
__ j(equal, &throw_termination_exception);
__ j(equal, &throw_termination_exception, Label::kNear);
// Handle normal exception by following handler chain.
__ Throw(eax);
@ -4811,6 +4840,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
Label flat_string;
Label ascii_string;
Label got_char_code;
Label sliced_string;
// If the receiver is a smi trigger the non-string case.
STATIC_ASSERT(kSmiTag == 0);
@ -4841,31 +4871,45 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
__ j(zero, &flat_string);
// Handle non-flat strings.
__ test(result_, Immediate(kIsConsStringMask));
__ j(zero, &call_runtime_);
__ and_(result_, kStringRepresentationMask);
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmp(result_, kExternalStringTag);
__ j(greater, &sliced_string, Label::kNear);
__ j(equal, &call_runtime_);
// ConsString.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
Label assure_seq_string;
__ cmp(FieldOperand(object_, ConsString::kSecondOffset),
Immediate(masm->isolate()->factory()->empty_string()));
__ j(not_equal, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ mov(object_, FieldOperand(object_, ConsString::kFirstOffset));
__ jmp(&assure_seq_string, Label::kNear);
// SlicedString, unpack and add offset.
__ bind(&sliced_string);
__ add(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
__ mov(object_, FieldOperand(object_, SlicedString::kParentOffset));
// Assure that we are dealing with a sequential string. Go to runtime if not.
__ bind(&assure_seq_string);
__ mov(result_, FieldOperand(object_, HeapObject::kMapOffset));
__ movzx_b(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
STATIC_ASSERT(kSeqStringTag == 0);
__ test(result_, Immediate(kStringRepresentationMask));
__ j(not_zero, &call_runtime_);
__ jmp(&flat_string, Label::kNear);
// Check for 1-byte or 2-byte string.
__ bind(&flat_string);
STATIC_ASSERT(kAsciiStringTag != 0);
__ test(result_, Immediate(kStringEncodingMask));
__ j(not_zero, &ascii_string);
__ j(not_zero, &ascii_string, Label::kNear);
// 2-byte string.
// Load the 2-byte character code into the result register.
@ -4873,7 +4917,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
__ movzx_w(result_, FieldOperand(object_,
scratch_, times_1, // Scratch is smi-tagged.
SeqTwoByteString::kHeaderSize));
__ jmp(&got_char_code);
__ jmp(&got_char_code, Label::kNear);
// ASCII string.
// Load the byte into the result register.
@ -5185,6 +5229,8 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ and_(ecx, kStringRepresentationMask);
__ cmp(ecx, kExternalStringTag);
__ j(equal, &string_add_runtime);
// We cannot encounter sliced strings here since:
STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
// Now check if both strings are ascii strings.
// eax: first string
// ebx: length of resulting flat string as a smi
@ -5596,6 +5642,9 @@ void StringHelper::GenerateHashGetHash(MacroAssembler* masm,
void SubStringStub::Generate(MacroAssembler* masm) {
Label runtime;
if (FLAG_string_slices) {
__ jmp(&runtime);
}
// Stack frame on entry.
// esp[0]: return address
// esp[4]: to

View File

@ -3217,95 +3217,81 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
};
Register string = ToRegister(instr->string());
Register index = no_reg;
int const_index = -1;
if (instr->index()->IsConstantOperand()) {
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
if (!Smi::IsValid(const_index)) {
// Guaranteed to be out of bounds because of the assert above.
// So the bounds check that must dominate this instruction must
// have deoptimized already.
if (FLAG_debug_code) {
__ Abort("StringCharCodeAt: out of bounds index.");
}
// No code needs to be generated.
return;
}
} else {
index = ToRegister(instr->index());
}
Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
Label flat_string, ascii_string, done;
// Fetch the instance type of the receiver into result register.
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
// We need special handling for non-flat strings.
STATIC_ASSERT(kSeqStringTag == 0);
__ test(result, Immediate(kStringRepresentationMask));
__ j(zero, &flat_string, Label::kNear);
// We need special handling for indirect strings.
Label check_sequential;
__ test(result, Immediate(kIsIndirectStringMask));
__ j(zero, &check_sequential, Label::kNear);
// Handle non-flat strings.
__ test(result, Immediate(kIsConsStringMask));
__ j(zero, deferred->entry());
// Dispatch on the indirect string shape: slice or cons.
Label cons_string;
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
__ test(result, Immediate(kSlicedNotConsMask));
__ j(zero, &cons_string, Label::kNear);
// ConsString.
// Handle slices.
Label indirect_string_loaded;
__ mov(result, FieldOperand(string, SlicedString::kOffsetOffset));
__ SmiUntag(result);
__ add(index, Operand(result));
__ mov(string, FieldOperand(string, SlicedString::kParentOffset));
__ jmp(&indirect_string_loaded, Label::kNear);
// Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
__ bind(&cons_string);
__ cmp(FieldOperand(string, ConsString::kSecondOffset),
Immediate(factory()->empty_string()));
__ j(not_equal, deferred->entry());
// Get the first of the two strings and load its instance type.
__ mov(string, FieldOperand(string, ConsString::kFirstOffset));
__ bind(&indirect_string_loaded);
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
// Check whether the string is sequential. The only non-sequential
// shapes we support have just been unwrapped above.
__ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ test(result, Immediate(kStringRepresentationMask));
__ j(not_zero, deferred->entry());
// Check for ASCII or two-byte string.
__ bind(&flat_string);
// Dispatch on the encoding: ASCII or two-byte.
Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ test(result, Immediate(kStringEncodingMask));
__ j(not_zero, &ascii_string, Label::kNear);
// Two-byte string.
// Load the two-byte character code into the result register.
Label done;
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
if (instr->index()->IsConstantOperand()) {
__ movzx_w(result,
FieldOperand(string,
SeqTwoByteString::kHeaderSize +
(kUC16Size * const_index)));
} else {
__ movzx_w(result, FieldOperand(string,
index,
times_2,
SeqTwoByteString::kHeaderSize));
}
__ jmp(&done, Label::kNear);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
if (instr->index()->IsConstantOperand()) {
__ movzx_b(result, FieldOperand(string,
SeqAsciiString::kHeaderSize + const_index));
} else {
__ movzx_b(result, FieldOperand(string,
index,
times_1,
SeqAsciiString::kHeaderSize));
}
__ bind(&done);
__ bind(deferred->exit());
}

View File

@ -2058,8 +2058,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
LOperand* string = UseRegister(instr->string());
LOperand* index = UseRegisterOrConstant(instr->index());
LOperand* string = UseTempRegister(instr->string());
LOperand* index = UseTempRegister(instr->index());
LOperand* context = UseAny(instr->context());
LStringCharCodeAt* result = new LStringCharCodeAt(context, string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));

View File

@ -1065,12 +1065,13 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
}
// Prepare for possible GC.
HandleScope handles;
HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
// Current string.
bool is_ascii = subject->IsAsciiRepresentation();
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
@ -1088,8 +1089,20 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
return EXCEPTION;
}
Handle<String> subject_tmp = subject;
int slice_offset = 0;
// Extract the underlying string and the slice offset.
if (StringShape(*subject_tmp).IsCons()) {
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
} else if (StringShape(*subject_tmp).IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject_tmp);
subject_tmp = Handle<String>(slice->parent());
slice_offset = slice->offset();
}
// String might have changed.
if (subject->IsAsciiRepresentation() != is_ascii) {
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
@ -1100,8 +1113,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
ASSERT(StringShape(*subject).IsSequential() ||
StringShape(*subject).IsExternal());
ASSERT(StringShape(*subject_tmp).IsSequential() ||
StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
@ -1109,13 +1122,14 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
const byte* new_address = StringCharacterPosition(*subject, start_index);
const byte* new_address = StringCharacterPosition(*subject_tmp,
start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end
// addresses in the RegExp stack frame to match the new value.
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
int byte_length = end_address - start_address;
int byte_length = static_cast<int>(end_address - start_address);
frame_entry<const String*>(re_frame, kInputString) = *subject;
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;

View File

@ -224,9 +224,9 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
if (!subject->IsFlat()) FlattenString(subject);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
ASSERT(StringShape(needle).IsSequential());
int needle_len = needle->length();
ASSERT(needle->IsFlat());
@ -347,10 +347,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
JSRegExp::Flags flags = re->GetFlags();
Handle<String> pattern(re->Pattern());
if (!pattern->IsFlat()) {
FlattenString(pattern);
}
if (!pattern->IsFlat()) FlattenString(pattern);
RegExpCompileData compile_data;
FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
@ -434,22 +431,12 @@ void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject) {
if (!subject->IsFlat()) {
FlattenString(subject);
}
if (!subject->IsFlat()) FlattenString(subject);
// Check the asciiness of the underlying storage.
bool is_ascii;
{
AssertNoAllocation no_gc;
String* sequential_string = *subject;
if (subject->IsConsString()) {
sequential_string = ConsString::cast(*subject)->first();
}
is_ascii = sequential_string->IsAsciiRepresentation();
}
if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
return -1;
}
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
#ifdef V8_INTERPRETED_REGEXP
// Byte-code regexp needs space allocated for all its registers.
return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
@ -474,15 +461,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
ASSERT(index <= subject->length());
ASSERT(subject->IsFlat());
// A flat ASCII string might have a two-byte first part.
if (subject->IsConsString()) {
subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
}
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
#ifndef V8_INTERPRETED_REGEXP
ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
bool is_ascii = subject->IsAsciiRepresentation();
EnsureCompiledIrregexp(regexp, is_ascii);
Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
NativeRegExpMacroAssembler::Result res =
@ -510,13 +493,13 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
// being internal and external, and even between being ASCII and UC16,
// but the characters are always the same).
IrregexpPrepare(regexp, subject);
is_ascii = subject->IsAsciiRepresentationUnderneath();
} while (true);
UNREACHABLE();
return RE_EXCEPTION;
#else // V8_INTERPRETED_REGEXP
ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
bool is_ascii = subject->IsAsciiRepresentation();
// We must have done EnsureCompiledIrregexp, so we can get the number of
// registers.
int* register_vector = output.start();

View File

@ -394,6 +394,10 @@ class StaticMarkingVisitor : public StaticVisitorBase {
ConsString::BodyDescriptor,
void>::Visit);
table_.Register(kVisitSlicedString,
&FixedBodyVisitor<StaticMarkingVisitor,
SlicedString::BodyDescriptor,
void>::Visit);
table_.Register(kVisitFixedArray,
&FlexibleBodyVisitor<StaticMarkingVisitor,

View File

@ -352,6 +352,31 @@ void String::StringVerify() {
if (IsSymbol()) {
CHECK(!HEAP->InNewSpace(this));
}
if (IsConsString()) {
ConsString::cast(this)->ConsStringVerify();
} else if (IsSlicedString()) {
SlicedString::cast(this)->SlicedStringVerify();
}
}
void ConsString::ConsStringVerify() {
CHECK(this->first()->IsString());
CHECK(this->second() == GetHeap()->empty_string() ||
this->second()->IsString());
CHECK(this->length() >= String::kMinNonFlatLength);
if (this->IsFlat()) {
// A flat cons can only be created by String::SlowTryFlatten.
// Afterwards, the first part may be externalized.
CHECK(this->first()->IsSeqString() || this->first()->IsExternalString());
}
}
void SlicedString::SlicedStringVerify() {
CHECK(!this->parent()->IsConsString());
CHECK(!this->parent()->IsSlicedString());
CHECK(this->length() >= SlicedString::kMinLength);
}

View File

@ -178,10 +178,14 @@ bool Object::IsSymbol() {
bool Object::IsConsString() {
if (!this->IsHeapObject()) return false;
uint32_t type = HeapObject::cast(this)->map()->instance_type();
return (type & (kIsNotStringMask | kStringRepresentationMask)) ==
(kStringTag | kConsStringTag);
if (!IsString()) return false;
return StringShape(String::cast(this)).IsCons();
}
bool Object::IsSlicedString() {
if (!IsString()) return false;
return StringShape(String::cast(this)).IsSliced();
}
@ -269,6 +273,38 @@ bool String::IsTwoByteRepresentation() {
}
bool String::IsAsciiRepresentationUnderneath() {
uint32_t type = map()->instance_type();
STATIC_ASSERT(kIsIndirectStringTag != 0);
STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
ASSERT(IsFlat());
switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
case kAsciiStringTag:
return true;
case kTwoByteStringTag:
return false;
default: // Cons or sliced string. Need to go deeper.
return GetUnderlying()->IsAsciiRepresentation();
}
}
bool String::IsTwoByteRepresentationUnderneath() {
uint32_t type = map()->instance_type();
STATIC_ASSERT(kIsIndirectStringTag != 0);
STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
ASSERT(IsFlat());
switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
case kAsciiStringTag:
return false;
case kTwoByteStringTag:
return true;
default: // Cons or sliced string. Need to go deeper.
return GetUnderlying()->IsTwoByteRepresentation();
}
}
bool String::HasOnlyAsciiChars() {
uint32_t type = map()->instance_type();
return (type & kStringEncodingMask) == kAsciiStringTag ||
@ -281,6 +317,16 @@ bool StringShape::IsCons() {
}
bool StringShape::IsSliced() {
return (type_ & kStringRepresentationMask) == kSlicedStringTag;
}
bool StringShape::IsIndirect() {
return (type_ & kIsIndirectStringMask) == kIsIndirectStringTag;
}
bool StringShape::IsExternal() {
return (type_ & kStringRepresentationMask) == kExternalStringTag;
}
@ -2075,6 +2121,7 @@ CAST_ACCESSOR(String)
CAST_ACCESSOR(SeqString)
CAST_ACCESSOR(SeqAsciiString)
CAST_ACCESSOR(SeqTwoByteString)
CAST_ACCESSOR(SlicedString)
CAST_ACCESSOR(ConsString)
CAST_ACCESSOR(ExternalString)
CAST_ACCESSOR(ExternalAsciiString)
@ -2156,7 +2203,7 @@ bool String::Equals(String* other) {
MaybeObject* String::TryFlatten(PretenureFlag pretenure) {
if (!StringShape(this).IsCons()) return this;
ConsString* cons = ConsString::cast(this);
if (cons->second()->length() == 0) return cons->first();
if (cons->IsFlat()) return cons->first();
return SlowTryFlatten(pretenure);
}
@ -2164,11 +2211,9 @@ MaybeObject* String::TryFlatten(PretenureFlag pretenure) {
String* String::TryFlattenGetString(PretenureFlag pretenure) {
MaybeObject* flat = TryFlatten(pretenure);
Object* successfully_flattened;
if (flat->ToObject(&successfully_flattened)) {
if (!flat->ToObject(&successfully_flattened)) return this;
return String::cast(successfully_flattened);
}
return this;
}
uint16_t String::Get(int index) {
@ -2185,6 +2230,9 @@ uint16_t String::Get(int index) {
return ExternalAsciiString::cast(this)->ExternalAsciiStringGet(index);
case kExternalStringTag | kTwoByteStringTag:
return ExternalTwoByteString::cast(this)->ExternalTwoByteStringGet(index);
case kSlicedStringTag | kAsciiStringTag:
case kSlicedStringTag | kTwoByteStringTag:
return SlicedString::cast(this)->SlicedStringGet(index);
default:
break;
}
@ -2205,15 +2253,19 @@ void String::Set(int index, uint16_t value) {
bool String::IsFlat() {
switch (StringShape(this).representation_tag()) {
case kConsStringTag: {
String* second = ConsString::cast(this)->second();
// Only flattened strings have second part empty.
return second->length() == 0;
}
default:
return true;
if (!StringShape(this).IsCons()) return true;
return ConsString::cast(this)->second()->length() == 0;
}
String* String::GetUnderlying() {
// Giving direct access to underlying string only makes sense if the
// wrapping string is already flattened.
ASSERT(this->IsFlat());
ASSERT(StringShape(this).IsIndirect());
STATIC_ASSERT(ConsString::kFirstOffset == SlicedString::kParentOffset);
const int kUnderlyingOffset = SlicedString::kParentOffset;
return String::cast(READ_FIELD(this, kUnderlyingOffset));
}
@ -2272,6 +2324,20 @@ int SeqAsciiString::SeqAsciiStringSize(InstanceType instance_type) {
}
String* SlicedString::parent() {
return String::cast(READ_FIELD(this, kParentOffset));
}
void SlicedString::set_parent(String* parent) {
ASSERT(parent->IsSeqString());
WRITE_FIELD(this, kParentOffset, parent);
}
SMI_ACCESSORS(SlicedString, offset, kOffsetOffset)
String* ConsString::first() {
return String::cast(READ_FIELD(this, kFirstOffset));
}

View File

@ -58,6 +58,9 @@ StaticVisitorBase::VisitorId StaticVisitorBase::GetVisitorId(
return kVisitConsString;
}
case kSlicedStringTag:
return kVisitSlicedString;
case kExternalStringTag:
return GetVisitorIdForSize(kVisitDataObject,
kVisitDataObjectGeneric,

View File

@ -115,6 +115,7 @@ class StaticVisitorBase : public AllStatic {
kVisitStructGeneric,
kVisitConsString,
kVisitSlicedString,
kVisitOddball,
kVisitCode,
kVisitMap,
@ -299,6 +300,11 @@ class StaticNewSpaceVisitor : public StaticVisitorBase {
ConsString::BodyDescriptor,
int>::Visit);
table_.Register(kVisitSlicedString,
&FixedBodyVisitor<StaticVisitor,
SlicedString::BodyDescriptor,
int>::Visit);
table_.Register(kVisitFixedArray,
&FlexibleBodyVisitor<StaticVisitor,
FixedArray::BodyDescriptor,

View File

@ -1208,6 +1208,9 @@ void HeapObject::IterateBody(InstanceType type, int object_size,
case kConsStringTag:
ConsString::BodyDescriptor::IterateBody(this, v);
break;
case kSlicedStringTag:
SlicedString::BodyDescriptor::IterateBody(this, v);
break;
case kExternalStringTag:
if ((type & kStringEncodingMask) == kAsciiStringTag) {
reinterpret_cast<ExternalAsciiString*>(this)->
@ -5042,6 +5045,7 @@ String::FlatContent String::GetFlatContent() {
int length = this->length();
StringShape shape(this);
String* string = this;
int offset = 0;
if (shape.representation_tag() == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
if (cons->second()->length() != 0) {
@ -5050,6 +5054,14 @@ String::FlatContent String::GetFlatContent() {
string = cons->first();
shape = StringShape(string);
}
if (shape.representation_tag() == kSlicedStringTag) {
SlicedString* slice = SlicedString::cast(string);
offset = slice->offset();
string = slice->parent();
shape = StringShape(string);
ASSERT(shape.representation_tag() != kConsStringTag &&
shape.representation_tag() != kSlicedStringTag);
}
if (shape.encoding_tag() == kAsciiStringTag) {
const char* start;
if (shape.representation_tag() == kSeqStringTag) {
@ -5057,7 +5069,7 @@ String::FlatContent String::GetFlatContent() {
} else {
start = ExternalAsciiString::cast(string)->resource()->data();
}
return FlatContent(Vector<const char>(start, length));
return FlatContent(Vector<const char>(start + offset, length));
} else {
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
const uc16* start;
@ -5066,7 +5078,7 @@ String::FlatContent String::GetFlatContent() {
} else {
start = ExternalTwoByteString::cast(string)->resource()->data();
}
return FlatContent(Vector<const uc16>(start, length));
return FlatContent(Vector<const uc16>(start + offset, length));
}
}
@ -5138,13 +5150,17 @@ const uc16* String::GetTwoByteData() {
const uc16* String::GetTwoByteData(unsigned start) {
ASSERT(!IsAsciiRepresentation());
ASSERT(!IsAsciiRepresentationUnderneath());
switch (StringShape(this).representation_tag()) {
case kSeqStringTag:
return SeqTwoByteString::cast(this)->SeqTwoByteStringGetData(start);
case kExternalStringTag:
return ExternalTwoByteString::cast(this)->
ExternalTwoByteStringGetData(start);
case kSlicedStringTag: {
SlicedString* slice = SlicedString::cast(this);
return slice->parent()->GetTwoByteData(start + slice->offset());
}
case kConsStringTag:
UNREACHABLE();
return NULL;
@ -5435,6 +5451,10 @@ const unibrow::byte* String::ReadBlock(String* input,
max_chars);
return rbb->util_buffer;
}
case kSlicedStringTag:
return SlicedString::cast(input)->SlicedStringReadBlock(rbb,
offset_ptr,
max_chars);
default:
break;
}
@ -5578,6 +5598,11 @@ void String::ReadBlockIntoBuffer(String* input,
max_chars);
}
return;
case kSlicedStringTag:
SlicedString::cast(input)->SlicedStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return;
default:
break;
}
@ -5712,6 +5737,31 @@ uint16_t ConsString::ConsStringGet(int index) {
}
uint16_t SlicedString::SlicedStringGet(int index) {
return parent()->Get(offset() + index);
}
const unibrow::byte* SlicedString::SlicedStringReadBlock(
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
unsigned offset = this->offset();
*offset_ptr += offset;
const unibrow::byte* answer = String::ReadBlock(String::cast(parent()),
buffer, offset_ptr, chars);
*offset_ptr -= offset;
return answer;
}
void SlicedString::SlicedStringReadBlockIntoBuffer(
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
unsigned offset = this->offset();
*offset_ptr += offset;
String::ReadBlockIntoBuffer(String::cast(parent()),
buffer, offset_ptr, chars);
*offset_ptr -= offset;
}
template <typename sinkchar>
void String::WriteToFlat(String* src,
sinkchar* sink,
@ -5779,6 +5829,13 @@ void String::WriteToFlat(String* src,
}
break;
}
case kAsciiStringTag | kSlicedStringTag:
case kTwoByteStringTag | kSlicedStringTag: {
SlicedString* slice = SlicedString::cast(source);
unsigned offset = slice->offset();
WriteToFlat(slice->parent(), sink, from + offset, to + offset);
return;
}
}
}
}

View File

@ -89,6 +89,7 @@
// - SeqString
// - SeqAsciiString
// - SeqTwoByteString
// - SlicedString
// - ConsString
// - ExternalString
// - ExternalAsciiString
@ -283,6 +284,7 @@ static const int kVariableSizeSentinel = 0;
V(ASCII_STRING_TYPE) \
V(CONS_STRING_TYPE) \
V(CONS_ASCII_STRING_TYPE) \
V(SLICED_STRING_TYPE) \
V(EXTERNAL_STRING_TYPE) \
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE) \
V(EXTERNAL_ASCII_STRING_TYPE) \
@ -401,6 +403,14 @@ static const int kVariableSizeSentinel = 0;
ConsString::kSize, \
cons_ascii_string, \
ConsAsciiString) \
V(SLICED_STRING_TYPE, \
SlicedString::kSize, \
sliced_string, \
SlicedString) \
V(SLICED_ASCII_STRING_TYPE, \
SlicedString::kSize, \
sliced_ascii_string, \
SlicedAsciiString) \
V(EXTERNAL_STRING_TYPE, \
ExternalTwoByteString::kSize, \
external_string, \
@ -474,9 +484,17 @@ const uint32_t kStringRepresentationMask = 0x03;
enum StringRepresentationTag {
kSeqStringTag = 0x0,
kConsStringTag = 0x1,
kExternalStringTag = 0x2
kExternalStringTag = 0x2,
kSlicedStringTag = 0x3
};
const uint32_t kIsConsStringMask = 0x1;
const uint32_t kIsIndirectStringMask = 0x1;
const uint32_t kIsIndirectStringTag = 0x1;
STATIC_ASSERT((kSeqStringTag & kIsIndirectStringMask) == 0);
STATIC_ASSERT((kExternalStringTag & kIsIndirectStringMask) == 0);
STATIC_ASSERT(
(kConsStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
STATIC_ASSERT(
(kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
// If bit 7 is clear, then bit 3 indicates whether this two-byte
// string actually contains ascii data.
@ -511,6 +529,8 @@ enum InstanceType {
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
SLICED_STRING_TYPE = kTwoByteStringTag | kSlicedStringTag,
SLICED_ASCII_STRING_TYPE = kAsciiStringTag | kSlicedStringTag,
EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
@ -718,6 +738,7 @@ class MaybeObject BASE_EMBEDDED {
V(SeqString) \
V(ExternalString) \
V(ConsString) \
V(SlicedString) \
V(ExternalTwoByteString) \
V(ExternalAsciiString) \
V(SeqTwoByteString) \
@ -5783,6 +5804,8 @@ class StringShape BASE_EMBEDDED {
inline bool IsSequential();
inline bool IsExternal();
inline bool IsCons();
inline bool IsSliced();
inline bool IsIndirect();
inline bool IsExternalAscii();
inline bool IsExternalTwoByte();
inline bool IsSequentialAscii();
@ -5874,14 +5897,19 @@ class String: public HeapObject {
inline uint32_t hash_field();
inline void set_hash_field(uint32_t value);
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
// Returns whether this string has only ASCII chars, i.e. all of them can
// be ASCII encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers
// two-byte external representations even for ASCII data.
//
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
// Cons and slices have an encoding flag that may not represent the actual
// encoding of the underlying string. This is taken into account here.
// Requires: this->IsFlat()
inline bool IsAsciiRepresentationUnderneath();
inline bool IsTwoByteRepresentationUnderneath();
// NOTE: this should be considered only a hint. False negatives are
// possible.
inline bool HasOnlyAsciiChars();
@ -5921,6 +5949,10 @@ class String: public HeapObject {
// kind.
FlatContent GetFlatContent();
// Returns the parent of a sliced string or first part of a flat cons string.
// Requires: StringShape(this).IsIndirect() && this->IsFlat()
inline String* GetUnderlying();
// Mark the string as an undetectable object. It only applies to
// ascii and two byte string types.
bool MarkAsUndetectable();
@ -6349,11 +6381,69 @@ class ConsString: public String {
typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
BodyDescriptor;
#ifdef DEBUG
void ConsStringVerify();
#endif
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
};
// The Sliced String class describes strings that are substrings of another
// sequential string. The motivation is to save time and memory when creating
// a substring. A Sliced String is described as a pointer to the parent,
// the offset from the start of the parent string and the length. Using
// a Sliced String therefore requires unpacking of the parent string and
// adding the offset to the start address. A substring of a Sliced String
// are not nested since the double indirection is simplified when creating
// such a substring.
// Currently missing features are:
// - handling externalized parent strings
// - external strings as parent
// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
class SlicedString: public String {
public:
inline String* parent();
inline void set_parent(String* parent);
inline int offset();
inline void set_offset(int offset);
// Dispatched behavior.
uint16_t SlicedStringGet(int index);
// Casting.
static inline SlicedString* cast(Object* obj);
// Layout description.
static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
static const int kOffsetOffset = kParentOffset + kPointerSize;
static const int kSize = kOffsetOffset + kPointerSize;
// Support for StringInputBuffer
inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
// Minimum length for a sliced string.
static const int kMinLength = 13;
typedef FixedBodyDescriptor<kParentOffset,
kOffsetOffset + kPointerSize, kSize>
BodyDescriptor;
#ifdef DEBUG
void SlicedStringVerify();
#endif
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
};
// The ExternalString class describes string values that are backed by
// a string resource that lies outside the V8 heap. ExternalStrings
// consist of the length field common to all strings, a pointer to the

View File

@ -120,27 +120,31 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
String* subject_ptr = *subject;
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject_ptr->length();
int char_length = subject_ptr->length() - start_offset;
int slice_offset = 0;
// The string has been flattened, so it it is a cons string it contains the
// The string has been flattened, so if it is a cons string it contains the
// full string in the first part.
if (StringShape(subject_ptr).IsCons()) {
ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
subject_ptr = ConsString::cast(subject_ptr)->first();
} else if (StringShape(subject_ptr).IsSliced()) {
SlicedString* slice = SlicedString::cast(subject_ptr);
subject_ptr = slice->parent();
slice_offset = slice->offset();
}
// Ensure that an underlying string has the same ascii-ness.
bool is_ascii = subject_ptr->IsAsciiRepresentation();
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
// String is now either Sequential or External
int char_size_shift = is_ascii ? 0 : 1;
int char_length = end_offset - start_offset;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset);
StringCharacterPosition(subject_ptr, start_offset + slice_offset);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
Result res = Execute(*regexp_code,
subject_ptr,
*subject,
start_offset,
input_start,
input_end,
@ -152,7 +156,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
Code* code,
String* input,
String* input, // This needs to be the unpacked (sliced, cons) string.
int start_offset,
const byte* input_start,
const byte* input_end,

View File

@ -3674,7 +3674,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
HandleScope handles(isolate);
CONVERT_ARG_CHECKED(String, subject, 1);
if (!subject->IsFlat()) { FlattenString(subject); }
if (!subject->IsFlat()) FlattenString(subject);
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
CONVERT_ARG_CHECKED(JSArray, result_array, 3);

View File

@ -2374,7 +2374,6 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ testq(kScratchRegister, kScratchRegister);
__ j(zero, &runtime);
// Check that the first argument is a JSRegExp object.
__ movq(rax, Operand(rsp, kJSRegExpOffset));
__ JumpIfSmi(rax, &runtime);
@ -2445,10 +2444,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ cmpl(rdx, rdi);
__ j(greater, &runtime);
// Reset offset for possibly sliced string.
__ Set(r14, 0);
// rax: RegExp data (FixedArray)
// Check the representation and encoding of the subject string.
Label seq_ascii_string, seq_two_byte_string, check_code;
__ movq(rdi, Operand(rsp, kSubjectOffset));
// Make a copy of the original subject string.
__ movq(r15, rdi);
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
__ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset));
// First check for flat two byte string.
@ -2457,28 +2460,40 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
__ j(zero, &seq_two_byte_string, Label::kNear);
// Any other flat string must be a flat ascii string.
__ testb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
__ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
__ j(zero, &seq_ascii_string, Label::kNear);
// Check for flat cons string.
// Check for flat cons string or sliced string.
// A flat cons string is a cons string where the second part is the empty
// string. In that case the subject string is just the first part of the cons
// string. Also in this case the first part of the cons string is known to be
// a sequential string or an external string.
STATIC_ASSERT(kExternalStringTag !=0);
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
__ testb(rbx, Immediate(kIsNotStringMask | kExternalStringTag));
__ j(not_zero, &runtime);
// String is a cons string.
// In the case of a sliced string its offset has to be taken into account.
Label cons_string, check_encoding;
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmpq(rbx, Immediate(kExternalStringTag));
__ j(less, &cons_string, Label::kNear);
__ j(equal, &runtime);
// String is sliced.
__ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset));
__ movq(rdi, FieldOperand(rdi, SlicedString::kParentOffset));
// r14: slice offset
// r15: original subject string
// rdi: parent string
__ jmp(&check_encoding, Label::kNear);
// String is a cons string, check whether it is flat.
__ bind(&cons_string);
__ CompareRoot(FieldOperand(rdi, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, &runtime);
__ movq(rdi, FieldOperand(rdi, ConsString::kFirstOffset));
// rdi: first part of cons string or parent of sliced string.
// rbx: map of first part of cons string or map of parent of sliced string.
// Is first part of cons or parent of slice a flat two byte string?
__ bind(&check_encoding);
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
// String is a cons string with empty second part.
// rdi: first part of cons string.
// rbx: map of first part of cons string.
// Is first part a flat two byte string?
__ testb(FieldOperand(rbx, Map::kInstanceTypeOffset),
Immediate(kStringRepresentationMask | kStringEncodingMask));
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
@ -2575,33 +2590,40 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// rbx: previous index
// rcx: encoding of subject string (1 if ascii 0 if two_byte);
// r11: code
// r14: slice offset
// r15: original subject string
// Argument 4: End of string data
// Argument 3: Start of string data
Label setup_two_byte, setup_rest;
__ testb(rcx, rcx); // Last use of rcx as encoding of subject string.
__ j(zero, &setup_two_byte, Label::kNear);
__ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
__ lea(arg4, FieldOperand(rdi, rcx, times_1, SeqAsciiString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_1, SeqAsciiString::kHeaderSize));
__ jmp(&setup_rest, Label::kNear);
__ bind(&setup_two_byte);
__ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
__ lea(arg4, FieldOperand(rdi, rcx, times_2, SeqTwoByteString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_2, SeqTwoByteString::kHeaderSize));
__ bind(&setup_rest);
// Argument 2: Previous index.
__ movq(arg2, rbx);
// Argument 1: Subject string.
#ifdef _WIN64
__ movq(arg1, rdi);
#else
// Already there in AMD64 calling convention.
ASSERT(arg1.is(rdi));
USE(arg1);
#endif
// Argument 4: End of string data
// Argument 3: Start of string data
Label setup_two_byte, setup_rest, got_length, length_not_from_slice;
// Prepare start and end index of the input.
// Load the length from the original sliced string if that is the case.
__ addq(rbx, r14);
__ SmiToInteger32(arg3, FieldOperand(r15, String::kLengthOffset));
__ addq(r14, arg3); // Using arg3 as scratch.
// rbx: start index of the input
// r14: end index of the input
// r15: original subject string
__ testb(rcx, rcx); // Last use of rcx as encoding of subject string.
__ j(zero, &setup_two_byte, Label::kNear);
__ lea(arg4, FieldOperand(rdi, r14, times_1, SeqAsciiString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_1, SeqAsciiString::kHeaderSize));
__ jmp(&setup_rest, Label::kNear);
__ bind(&setup_two_byte);
__ lea(arg4, FieldOperand(rdi, r14, times_2, SeqTwoByteString::kHeaderSize));
__ lea(arg3, FieldOperand(rdi, rbx, times_2, SeqTwoByteString::kHeaderSize));
__ bind(&setup_rest);
// Argument 1: Original subject string.
// The original subject is in the previous stack frame. Therefore we have to
// use rbp, which points exactly to one pointer size below the previous rsp.
// (Because creating a new stack frame pushes the previous rbp onto the stack
// and thereby moves up rsp by one kPointerSize.)
__ movq(arg1, r15);
// Locate the code entry and call it.
__ addq(r11, Immediate(Code::kHeaderSize - kHeapObjectTag));
@ -3851,6 +3873,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
Label flat_string;
Label ascii_string;
Label got_char_code;
Label sliced_string;
// If the receiver is a smi trigger the non-string case.
__ JumpIfSmi(object_, receiver_not_string_);
@ -3879,25 +3902,39 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
__ j(zero, &flat_string);
// Handle non-flat strings.
__ testb(result_, Immediate(kIsConsStringMask));
__ j(zero, &call_runtime_);
__ and_(result_, Immediate(kStringRepresentationMask));
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
__ cmpb(result_, Immediate(kExternalStringTag));
__ j(greater, &sliced_string);
__ j(equal, &call_runtime_);
// ConsString.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
Label assure_seq_string;
__ CompareRoot(FieldOperand(object_, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, &call_runtime_);
// Get the first of the two strings and load its instance type.
__ movq(object_, FieldOperand(object_, ConsString::kFirstOffset));
__ jmp(&assure_seq_string, Label::kNear);
// SlicedString, unpack and add offset.
__ bind(&sliced_string);
__ addq(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
__ movq(object_, FieldOperand(object_, SlicedString::kParentOffset));
__ bind(&assure_seq_string);
__ movq(result_, FieldOperand(object_, HeapObject::kMapOffset));
__ movzxbl(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
STATIC_ASSERT(kSeqStringTag == 0);
__ testb(result_, Immediate(kStringRepresentationMask));
__ j(not_zero, &call_runtime_);
__ jmp(&flat_string);
// Check for 1-byte or 2-byte string.
__ bind(&flat_string);
@ -4208,6 +4245,8 @@ void StringAddStub::Generate(MacroAssembler* masm) {
__ and_(rcx, Immediate(kStringRepresentationMask));
__ cmpl(rcx, Immediate(kExternalStringTag));
__ j(equal, &string_add_runtime);
// We cannot encounter sliced strings here since:
STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
// Now check if both strings are ascii strings.
// rax: first string
// rbx: length of resulting flat string
@ -4600,6 +4639,9 @@ void StringHelper::GenerateHashGetHash(MacroAssembler* masm,
void SubStringStub::Generate(MacroAssembler* masm) {
Label runtime;
if (FLAG_string_slices) {
__ jmp(&runtime);
}
// Stack frame on entry.
// rsp[0]: return address
// rsp[8]: to

View File

@ -3200,95 +3200,80 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
};
Register string = ToRegister(instr->string());
Register index = no_reg;
int const_index = -1;
if (instr->index()->IsConstantOperand()) {
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
if (!Smi::IsValid(const_index)) {
// Guaranteed to be out of bounds because of the assert above.
// So the bounds check that must dominate this instruction must
// have deoptimized already.
if (FLAG_debug_code) {
__ Abort("StringCharCodeAt: out of bounds index.");
}
// No code needs to be generated.
return;
}
} else {
index = ToRegister(instr->index());
}
Register index = ToRegister(instr->index());
Register result = ToRegister(instr->result());
DeferredStringCharCodeAt* deferred =
new DeferredStringCharCodeAt(this, instr);
Label flat_string, ascii_string, done;
// Fetch the instance type of the receiver into result register.
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
// We need special handling for non-sequential strings.
STATIC_ASSERT(kSeqStringTag == 0);
__ testb(result, Immediate(kStringRepresentationMask));
__ j(zero, &flat_string, Label::kNear);
// We need special handling for indirect strings.
Label check_sequential;
__ testb(result, Immediate(kIsIndirectStringMask));
__ j(zero, &check_sequential, Label::kNear);
// Handle cons strings and go to deferred code for the rest.
__ testb(result, Immediate(kIsConsStringMask));
__ j(zero, deferred->entry());
// Dispatch on the indirect string shape: slice or cons.
Label cons_string;
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
__ testb(result, Immediate(kSlicedNotConsMask));
__ j(zero, &cons_string, Label::kNear);
// ConsString.
// Handle slices.
Label indirect_string_loaded;
__ SmiToInteger32(result, FieldOperand(string, SlicedString::kOffsetOffset));
__ addq(index, result);
__ movq(string, FieldOperand(string, SlicedString::kParentOffset));
__ jmp(&indirect_string_loaded, Label::kNear);
// Handle conses.
// Check whether the right hand side is the empty string (i.e. if
// this is really a flat string in a cons string). If that is not
// the case we would rather go to the runtime system now to flatten
// the string.
__ bind(&cons_string);
__ CompareRoot(FieldOperand(string, ConsString::kSecondOffset),
Heap::kEmptyStringRootIndex);
__ j(not_equal, deferred->entry());
// Get the first of the two strings and load its instance type.
__ movq(string, FieldOperand(string, ConsString::kFirstOffset));
__ bind(&indirect_string_loaded);
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
// If the first cons component is also non-flat, then go to runtime.
// Check whether the string is sequential. The only non-sequential
// shapes we support have just been unwrapped above.
__ bind(&check_sequential);
STATIC_ASSERT(kSeqStringTag == 0);
__ testb(result, Immediate(kStringRepresentationMask));
__ j(not_zero, deferred->entry());
// Check for ASCII or two-byte string.
__ bind(&flat_string);
// Dispatch on the encoding: ASCII or two-byte.
Label ascii_string;
STATIC_ASSERT(kAsciiStringTag != 0);
__ testb(result, Immediate(kStringEncodingMask));
__ j(not_zero, &ascii_string, Label::kNear);
// Two-byte string.
// Load the two-byte character code into the result register.
Label done;
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
if (instr->index()->IsConstantOperand()) {
__ movzxwl(result,
FieldOperand(string,
SeqTwoByteString::kHeaderSize +
(kUC16Size * const_index)));
} else {
__ movzxwl(result, FieldOperand(string,
index,
times_2,
SeqTwoByteString::kHeaderSize));
}
__ jmp(&done, Label::kNear);
// ASCII string.
// Load the byte into the result register.
__ bind(&ascii_string);
if (instr->index()->IsConstantOperand()) {
__ movzxbl(result, FieldOperand(string,
SeqAsciiString::kHeaderSize + const_index));
} else {
__ movzxbl(result, FieldOperand(string,
index,
times_1,
SeqAsciiString::kHeaderSize));
}
__ bind(&done);
__ bind(deferred->exit());
}

View File

@ -1984,8 +1984,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
LOperand* string = UseRegister(instr->string());
LOperand* index = UseRegisterOrConstant(instr->index());
LOperand* string = UseTempRegister(instr->string());
LOperand* index = UseTempRegister(instr->index());
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
}

View File

@ -1170,12 +1170,13 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
}
// Prepare for possible GC.
HandleScope handles;
HandleScope handles(isolate);
Handle<Code> code_handle(re_code);
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
// Current string.
bool is_ascii = subject->IsAsciiRepresentation();
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
ASSERT(re_code->instruction_start() <= *return_address);
ASSERT(*return_address <=
@ -1184,7 +1185,7 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
MaybeObject* result = Execution::HandleStackGuardInterrupt();
if (*code_handle != re_code) { // Return address no longer valid
intptr_t delta = *code_handle - re_code;
int delta = *code_handle - re_code;
// Overwrite the return address on the stack.
*return_address += delta;
}
@ -1193,8 +1194,20 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
return EXCEPTION;
}
Handle<String> subject_tmp = subject;
int slice_offset = 0;
// Extract the underlying string and the slice offset.
if (StringShape(*subject_tmp).IsCons()) {
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
} else if (StringShape(*subject_tmp).IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject_tmp);
subject_tmp = Handle<String>(slice->parent());
slice_offset = slice->offset();
}
// String might have changed.
if (subject->IsAsciiRepresentation() != is_ascii) {
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
// If we changed between an ASCII and an UC16 string, the specialized
// code cannot be used, and we need to restart regexp matching from
// scratch (including, potentially, compiling a new version of the code).
@ -1205,8 +1218,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
// be a sequential or external string with the same content.
// Update the start and end pointers in the stack frame to the current
// location (whether it has actually moved or not).
ASSERT(StringShape(*subject).IsSequential() ||
StringShape(*subject).IsExternal());
ASSERT(StringShape(*subject_tmp).IsSequential() ||
StringShape(*subject_tmp).IsExternal());
// The original start address of the characters to match.
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
@ -1214,7 +1227,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
// Find the current start address of the same character at the current string
// position.
int start_index = frame_entry<int>(re_frame, kStartIndex);
const byte* new_address = StringCharacterPosition(*subject, start_index);
const byte* new_address = StringCharacterPosition(*subject_tmp,
start_index + slice_offset);
if (start_address != new_address) {
// If there is a difference, update the object pointer and start and end

View File

@ -2174,7 +2174,7 @@ TEST(ScriptBreakPointLine) {
f = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("f")));
g = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("g")));
// Chesk that a break point was hit when the script was run.
// Check that a break point was hit when the script was run.
CHECK_EQ(1, break_point_hit_count);
CHECK_EQ(0, StrLength(last_function_hit));

View File

@ -430,8 +430,7 @@ TEST(ExternalShortStringAdd) {
" return 0;"
"};"
"test()";
CHECK_EQ(0,
v8::Script::Compile(v8::String::New(source))->Run()->Int32Value());
CHECK_EQ(0, CompileRun(source)->Int32Value());
}
@ -481,3 +480,52 @@ TEST(CachedHashOverflow) {
}
}
}
TEST(SliceFromCons) {
FLAG_string_slices = true;
InitializeVM();
v8::HandleScope scope;
Handle<String> string =
FACTORY->NewStringFromAscii(CStrVector("parentparentparent"));
Handle<String> parent = FACTORY->NewConsString(string, string);
CHECK(parent->IsConsString());
CHECK(!parent->IsFlat());
Handle<String> slice = FACTORY->NewSubString(parent, 1, 25);
// After slicing, the original string becomes a flat cons.
CHECK(parent->IsFlat());
CHECK(slice->IsSlicedString());
CHECK_EQ(SlicedString::cast(*slice)->parent(),
ConsString::cast(*parent)->first());
CHECK(SlicedString::cast(*slice)->parent()->IsSeqString());
CHECK(slice->IsFlat());
}
TEST(TrivialSlice) {
// This tests whether a slice that contains the entire parent string
// actually creates a new string (it should not).
FLAG_string_slices = true;
InitializeVM();
HandleScope scope;
v8::Local<v8::Value> result;
Handle<String> string;
const char* init = "var str = 'abcdefghijklmnopqrstuvwxyz';";
const char* check = "str.slice(0,26)";
const char* crosscheck = "str.slice(1,25)";
CompileRun(init);
result = CompileRun(check);
CHECK(result->IsString());
string = v8::Utils::OpenHandle(v8::String::Cast(*result));
CHECK(!string->IsSlicedString());
string = FACTORY->NewSubString(string, 0, 26);
CHECK(!string->IsSlicedString());
result = CompileRun(crosscheck);
CHECK(result->IsString());
string = v8::Utils::OpenHandle(v8::String::Cast(*result));
CHECK(string->IsSlicedString());
CHECK_EQ("bcdefghijklmnopqrstuvwxy", *(string->ToCString()));
}

View File

@ -0,0 +1,81 @@
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Flags: --string-slices
//assertEquals('345"12345 6"1234567"123',
// '12345""12345 6""1234567""1234'.slice(2,-1).replace(/""/g, '"'));
var foo = "lsdfj sldkfj sdklfj læsdfjl sdkfjlsdk fjsdl fjsdljskdj flsj flsdkj flskd regexp: /foobar/\nldkfj sdlkfj sdkl";
for(var i = 0; i < 1000; i++) {
assertTrue(/^([a-z]+): (.*)/.test(foo.substring(foo.indexOf("regexp:"))));
assertEquals("regexp", RegExp.$1, "RegExp.$1");
}
var re = /^(((N({)?)|(R)|(U)|(V)|(B)|(H)|(n((n)|(r)|(v)|(h))?)|(r(r)?)|(v)|(b((n)|(b))?)|(h))|((Y)|(A)|(E)|(o(u)?)|(p(u)?)|(q(u)?)|(s)|(t)|(u)|(w)|(x(u)?)|(y)|(z)|(a((T)|(A)|(L))?)|(c)|(e)|(f(u)?)|(g(u)?)|(i)|(j)|(l)|(m(u)?)))+/;
var r = new RegExp(re)
var str = "_Avtnennan gunzvmu pubExnY nEvln vaTxh rmuhguhaTxnY_".slice(1,-1);
str = str + str;
assertTrue(r.test(str));
assertTrue(r.test(str));
var re = /x/;
assertEquals("a.yb", "_axyb_".slice(1,-1).replace(re, "."));
re.compile("y");
assertEquals("ax.b", "_axyb_".slice(1,-1).replace(re, "."));
re.compile("(x)");
assertEquals(["x", "x"], re.exec("_axyb_".slice(1,-1)));
re.compile("(y)");
assertEquals(["y", "y"], re.exec("_axyb_".slice(1,-1)));
for(var i = 0; i < 100; i++) {
var a = "aaaaaaaaaaaaaaaaaaaaaaaabbaacabbabaaaaabbaaaabbac".slice(24,-1);
var b = "bbaacabbabaaaaabbaaaabba" + a;
// The first time, the cons string will be flattened and handled by the
// runtime system.
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
// The second time, the cons string is already flattened and will be
// handled by generated code.
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
}
var c = "ABCDEFGHIJKLMN".slice(2,-2);
var d = "ABCDEF\u1234GHIJKLMN".slice(2,-2);
var e = "ABCDEFGHIJKLMN".slice(0,-2);
assertTrue(/^C.*L$/.test(c));
assertTrue(/^C.*L$/.test(c));
assertTrue(/^C.*L$/.test(d));
assertTrue(/^C.*L$/.test(d));
assertTrue(/^A\w{10}L$/.test(e));
assertTrue(/^A\w{10}L$/.test(e));
var e = "qui-opIasd-fghjklzx-cvbn-mqwer-tyuio-pasdf-ghIjkl-zx".slice(6,-6);
var e_split = e.split("-");
assertEquals(e_split[0], "Iasd");
assertEquals(e_split[1], "fghjklzx");
assertEquals(e_split[6], "ghI");

198
test/mjsunit/string-slices.js Executable file
View File

@ -0,0 +1,198 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Flags: --string-slices --expose-externalize-string
var s = 'abcdefghijklmn';
assertEquals(s, s.substr());
assertEquals(s, s.substr(0));
assertEquals(s, s.substr('0'));
assertEquals(s, s.substr(void 0));
assertEquals(s, s.substr(null));
assertEquals(s, s.substr(false));
assertEquals(s, s.substr(0.9));
assertEquals(s, s.substr({ valueOf: function() { return 0; } }));
assertEquals(s, s.substr({ toString: function() { return '0'; } }));
var s1 = s.substring(1);
assertEquals(s1, s.substr(1));
assertEquals(s1, s.substr('1'));
assertEquals(s1, s.substr(true));
assertEquals(s1, s.substr(1.1));
assertEquals(s1, s.substr({ valueOf: function() { return 1; } }));
assertEquals(s1, s.substr({ toString: function() { return '1'; } }));
assertEquals(s.substring(s.length - 1), s.substr(-1));
assertEquals(s.substring(s.length - 1), s.substr(-1.2));
assertEquals(s.substring(s.length - 1), s.substr(-1.7));
assertEquals(s.substring(s.length - 2), s.substr(-2));
assertEquals(s.substring(s.length - 2), s.substr(-2.3));
assertEquals(s.substring(s.length - 2, s.length - 1), s.substr(-2, 1));
assertEquals(s, s.substr(-100));
assertEquals('abc', s.substr(-100, 3));
assertEquals(s1, s.substr(-s.length + 1));
// assertEquals('', s.substr(0, void 0)); // smjs and rhino
assertEquals('abcdefghijklmn', s.substr(0, void 0)); // kjs and v8
assertEquals('', s.substr(0, null));
assertEquals(s, s.substr(0, String(s.length)));
assertEquals('a', s.substr(0, true));
// Test substrings of different lengths and alignments.
// First ASCII.
var x = "ASCII";
for (var i = 0; i < 25; i++) {
x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
}
/x/.exec(x); // Try to force a flatten.
for (var i = 5; i < 25; i++) {
for (var j = 12; j < 25; j++) {
var z = x.substring(i, i+j);
var w = Math.random() * 42; // Allocate something new in new-space.
assertEquals(j, z.length);
for (var k = 0; k < j; k++) {
assertEquals(x.charAt(i+k), z.charAt(k));
}
}
}
// Then two-byte strings.
x = "UC16\u2028"; // Non-ascii char forces two-byte string.
for (var i = 0; i < 25; i++) {
x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
}
/x/.exec(x); // Try to force a flatten.
for (var i = 5; i < 25; i++) {
for (var j = 0; j < 25; j++) {
var z = x.substring(i, i + j);
var w = Math.random() * 42; // Allocate something new in new-space.
assertEquals(j, z.length);
for (var k = 0; k < j; k++) {
assertEquals(x.charAt(i+k), z.charAt(k));
}
}
}
// Keep creating strings to to force allocation failure on substring creation.
var x = "0123456789ABCDEF";
x += x; // 2^5
x += x;
x += x;
x += x;
x += x;
x += x; // 2^10
x += x;
x += x;
var xl = x.length;
var cache = [];
for (var i = 0; i < 10000; i++) {
var z = x.substring(i % xl);
assertEquals(xl - (i % xl), z.length);
cache.push(z);
}
// Same with two-byte strings
var x = "\u2028123456789ABCDEF";
x += x; // 2^5
x += x;
x += x;
x += x;
x += x;
x += x; // 2^10
x += x;
x += x;
var xl = x.length;
var cache = [];
for (var i = 0; i < 10000; i++) {
var z = x.substring(i % xl);
assertEquals(xl - (i % xl), z.length);
cache.push(z);
}
// Substring of substring.
var cache = [];
var last = x;
var offset = 0;
for (var i = 0; i < 64; i++) {
var z = last.substring(i);
last = z;
cache.push(z);
offset += i;
}
for (var i = 63; i >= 0; i--) {
var z = cache.pop();
assertTrue(/\u2028123456789ABCDEF/.test(z));
assertEquals(xl - offset, z.length);
offset -= i;
}
// Test charAt for different strings.
function f(s1, s2, s3, i) {
assertEquals(String.fromCharCode(97+i%11), s1.charAt(i%11));
assertEquals(String.fromCharCode(97+i%11), s2.charAt(i%11));
assertEquals(String.fromCharCode(98+i%11), s3.charAt(i%11));
assertEquals(String.fromCharCode(101), s3.charAt(3));
}
flat = "abcdefghijkl12345";
cons = flat + flat.toUpperCase();
slice = "abcdefghijklmn12345".slice(1, -1);
for ( var i = 0; i < 1000; i++) {
f(flat, cons, slice, i);
}
flat = "abcdefghijkl1\u20232345";
cons = flat + flat.toUpperCase();
slice = "abcdefghijklmn1\u20232345".slice(1, -1);
for ( var i = 0; i < 1000; i++) {
f(flat, cons, slice, i);
}
// Concatenate substrings.
var ascii = 'abcdefghijklmnop';
var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB';
assertEquals("klmno", ascii.substring(10,15) + ascii.substring(16));
assertEquals("\u03B4\u03B7", utf.substring(3,4) + utf.substring(6,7));
assertEquals("klp", ascii.substring(10,12) + ascii.substring(15,16));
assertEquals("\u03B1\u03B4\u03B5", utf.substring(0,1) + utf.substring(5,3));
assertEquals("", ascii.substring(16) + utf.substring(16));
assertEquals("bcdef\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9",
ascii.substring(1,6) + utf.substring(3,9));
assertEquals("\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9abcdefghijklmnop",
utf.substring(3,9) + ascii);
assertEquals("\u03B2\u03B3\u03B4\u03B5\u03B4\u03B5\u03B6\u03B7",
utf.substring(5,1) + utf.substring(3,7));
/*
// Externalizing strings.
var a = "123456789qwertyuiopasdfghjklzxcvbnm";
var b = a.slice(1,-1);
assertEquals(a.slice(1,-1), b);
externalizeString(a);
assertEquals(a.slice(1,-1), b);
*/

View File

@ -135,3 +135,20 @@ for (var i = 0; i < 10000; i++) {
assertEquals(xl - (i % xl), z.length);
cache.push(z);
}
// Substring of substring.
var cache = [];
var last = x;
var offset = 0;
for (var i = 0; i < 64; i++) {
var z = last.substring(i);
last = z;
cache.push(z);
offset += i;
}
for (var i = 63; i >= 0; i--) {
var z = cache.pop();
assertTrue(/\u2028123456789ABCDEF/.test(z));
assertEquals(xl - offset, z.length);
offset -= i;
}