Tentative implementation of string slices (hidden under the flag --string-slices).
TEST=test/mjsunit/string-slices.js Review URL: http://codereview.chromium.org/7477045 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@9027 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
92b9bdfec5
commit
77141f78ff
@ -4367,6 +4367,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ cmp(r2, Operand(r0, ASR, kSmiTagSize));
|
||||
__ b(gt, &runtime);
|
||||
|
||||
// Reset offset for possibly sliced string.
|
||||
__ mov(r9, Operand(0));
|
||||
// subject: Subject string
|
||||
// regexp_data: RegExp data (FixedArray)
|
||||
// Check the representation and encoding of the subject string.
|
||||
@ -4374,33 +4376,45 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
|
||||
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
|
||||
// First check for flat string.
|
||||
__ tst(r0, Operand(kIsNotStringMask | kStringRepresentationMask));
|
||||
__ and_(r1, r0, Operand(kIsNotStringMask | kStringRepresentationMask), SetCC);
|
||||
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
|
||||
__ b(eq, &seq_string);
|
||||
|
||||
// subject: Subject string
|
||||
// regexp_data: RegExp data (FixedArray)
|
||||
// Check for flat cons string.
|
||||
// Check for flat cons string or sliced string.
|
||||
// A flat cons string is a cons string where the second part is the empty
|
||||
// string. In that case the subject string is just the first part of the cons
|
||||
// string. Also in this case the first part of the cons string is known to be
|
||||
// a sequential string or an external string.
|
||||
STATIC_ASSERT(kExternalStringTag !=0);
|
||||
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
|
||||
__ tst(r0, Operand(kIsNotStringMask | kExternalStringTag));
|
||||
__ b(ne, &runtime);
|
||||
// In the case of a sliced string its offset has to be taken into account.
|
||||
Label cons_string, check_encoding;
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmp(r1, Operand(kExternalStringTag));
|
||||
__ b(lt, &cons_string);
|
||||
__ b(eq, &runtime);
|
||||
|
||||
// String is sliced.
|
||||
__ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset));
|
||||
__ mov(r9, Operand(r9, ASR, kSmiTagSize));
|
||||
__ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
|
||||
// r9: offset of sliced string, smi-tagged.
|
||||
__ jmp(&check_encoding);
|
||||
// String is a cons string, check whether it is flat.
|
||||
__ bind(&cons_string);
|
||||
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
|
||||
__ LoadRoot(r1, Heap::kEmptyStringRootIndex);
|
||||
__ cmp(r0, r1);
|
||||
__ b(ne, &runtime);
|
||||
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
|
||||
// Is first part of cons or parent of slice a flat string?
|
||||
__ bind(&check_encoding);
|
||||
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
|
||||
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
|
||||
// Is first part a flat string?
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ tst(r0, Operand(kStringRepresentationMask));
|
||||
__ b(ne, &runtime);
|
||||
|
||||
__ bind(&seq_string);
|
||||
// subject: Subject string
|
||||
// regexp_data: RegExp data (FixedArray)
|
||||
@ -4466,21 +4480,30 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
// For arguments 4 and 3 get string length, calculate start of string data and
|
||||
// calculate the shift of the index (0 for ASCII and 1 for two byte).
|
||||
__ ldr(r0, FieldMemOperand(subject, String::kLengthOffset));
|
||||
__ mov(r0, Operand(r0, ASR, kSmiTagSize));
|
||||
STATIC_ASSERT(SeqAsciiString::kHeaderSize == SeqTwoByteString::kHeaderSize);
|
||||
__ add(r9, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
|
||||
__ add(r8, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
|
||||
__ eor(r3, r3, Operand(1));
|
||||
// Argument 4 (r3): End of string data
|
||||
// Argument 3 (r2): Start of string data
|
||||
// Load the length from the original subject string from the previous stack
|
||||
// frame. Therefore we have to use fp, which points exactly to two pointer
|
||||
// sizes below the previous sp. (Because creating a new stack frame pushes
|
||||
// the previous fp onto the stack and moves up sp by 2 * kPointerSize.)
|
||||
__ ldr(r0, MemOperand(fp, kSubjectOffset + 2 * kPointerSize));
|
||||
// If slice offset is not 0, load the length from the original sliced string.
|
||||
// Argument 4, r3: End of string data
|
||||
// Argument 3, r2: Start of string data
|
||||
// Prepare start and end index of the input.
|
||||
__ add(r9, r8, Operand(r9, LSL, r3));
|
||||
__ add(r2, r9, Operand(r1, LSL, r3));
|
||||
__ add(r3, r9, Operand(r0, LSL, r3));
|
||||
|
||||
__ ldr(r8, FieldMemOperand(r0, String::kLengthOffset));
|
||||
__ mov(r8, Operand(r8, ASR, kSmiTagSize));
|
||||
__ add(r3, r9, Operand(r8, LSL, r3));
|
||||
|
||||
// Argument 2 (r1): Previous index.
|
||||
// Already there
|
||||
|
||||
// Argument 1 (r0): Subject string.
|
||||
__ mov(r0, subject);
|
||||
// Already there
|
||||
|
||||
// Locate the code entry and call it.
|
||||
__ add(r7, r7, Operand(Code::kHeaderSize - kHeapObjectTag));
|
||||
@ -4497,12 +4520,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
// Check the result.
|
||||
Label success;
|
||||
|
||||
__ cmp(r0, Operand(NativeRegExpMacroAssembler::SUCCESS));
|
||||
__ cmp(subject, Operand(NativeRegExpMacroAssembler::SUCCESS));
|
||||
__ b(eq, &success);
|
||||
Label failure;
|
||||
__ cmp(r0, Operand(NativeRegExpMacroAssembler::FAILURE));
|
||||
__ cmp(subject, Operand(NativeRegExpMacroAssembler::FAILURE));
|
||||
__ b(eq, &failure);
|
||||
__ cmp(r0, Operand(NativeRegExpMacroAssembler::EXCEPTION));
|
||||
__ cmp(subject, Operand(NativeRegExpMacroAssembler::EXCEPTION));
|
||||
// If not exception it can only be retry. Handle that in the runtime system.
|
||||
__ b(ne, &runtime);
|
||||
// Result must now be exception. If there is no pending exception already a
|
||||
@ -4514,18 +4537,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ mov(r2, Operand(ExternalReference(Isolate::k_pending_exception_address,
|
||||
isolate)));
|
||||
__ ldr(r0, MemOperand(r2, 0));
|
||||
__ cmp(r0, r1);
|
||||
__ cmp(subject, r1);
|
||||
__ b(eq, &runtime);
|
||||
|
||||
__ str(r1, MemOperand(r2, 0)); // Clear pending exception.
|
||||
|
||||
// Check if the exception is a termination. If so, throw as uncatchable.
|
||||
__ LoadRoot(ip, Heap::kTerminationExceptionRootIndex);
|
||||
__ cmp(r0, ip);
|
||||
__ cmp(subject, ip);
|
||||
Label termination_exception;
|
||||
__ b(eq, &termination_exception);
|
||||
|
||||
__ Throw(r0); // Expects thrown value in r0.
|
||||
__ Throw(subject); // Expects thrown value in r0.
|
||||
|
||||
__ bind(&termination_exception);
|
||||
__ ThrowUncatchable(TERMINATION, r0); // Expects thrown value in r0.
|
||||
@ -4803,6 +4826,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
Label flat_string;
|
||||
Label ascii_string;
|
||||
Label got_char_code;
|
||||
Label sliced_string;
|
||||
|
||||
// If the receiver is a smi trigger the non-string case.
|
||||
__ JumpIfSmi(object_, receiver_not_string_);
|
||||
@ -4832,7 +4856,11 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
__ b(eq, &flat_string);
|
||||
|
||||
// Handle non-flat strings.
|
||||
__ tst(result_, Operand(kIsConsStringMask));
|
||||
__ and_(result_, result_, Operand(kStringRepresentationMask));
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmp(result_, Operand(kExternalStringTag));
|
||||
__ b(gt, &sliced_string);
|
||||
__ b(eq, &call_runtime_);
|
||||
|
||||
// ConsString.
|
||||
@ -4840,15 +4868,26 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
Label assure_seq_string;
|
||||
__ ldr(result_, FieldMemOperand(object_, ConsString::kSecondOffset));
|
||||
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
|
||||
__ cmp(result_, Operand(ip));
|
||||
__ b(ne, &call_runtime_);
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ ldr(object_, FieldMemOperand(object_, ConsString::kFirstOffset));
|
||||
__ jmp(&assure_seq_string);
|
||||
|
||||
// SlicedString, unpack and add offset.
|
||||
__ bind(&sliced_string);
|
||||
__ ldr(result_, FieldMemOperand(object_, SlicedString::kOffsetOffset));
|
||||
__ add(scratch_, scratch_, result_);
|
||||
__ ldr(object_, FieldMemOperand(object_, SlicedString::kParentOffset));
|
||||
|
||||
// Assure that we are dealing with a sequential string. Go to runtime if not.
|
||||
__ bind(&assure_seq_string);
|
||||
__ ldr(result_, FieldMemOperand(object_, HeapObject::kMapOffset));
|
||||
__ ldrb(result_, FieldMemOperand(result_, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
// Check that parent is not an external string. Go to runtime otherwise.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ tst(result_, Operand(kStringRepresentationMask));
|
||||
__ b(ne, &call_runtime_);
|
||||
@ -5428,10 +5467,17 @@ void SubStringStub::Generate(MacroAssembler* masm) {
|
||||
// Check bounds and smi-ness.
|
||||
Register to = r6;
|
||||
Register from = r7;
|
||||
|
||||
if (FLAG_string_slices) {
|
||||
__ nop(0); // Jumping as first instruction would crash the code generation.
|
||||
__ jmp(&runtime);
|
||||
}
|
||||
|
||||
__ Ldrd(to, from, MemOperand(sp, kToOffset));
|
||||
STATIC_ASSERT(kFromOffset == kToOffset + 4);
|
||||
STATIC_ASSERT(kSmiTag == 0);
|
||||
STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1);
|
||||
|
||||
// I.e., arithmetic shift right by one un-smi-tags.
|
||||
__ mov(r2, Operand(to, ASR, 1), SetCC);
|
||||
__ mov(r3, Operand(from, ASR, 1), SetCC, cc);
|
||||
@ -5440,7 +5486,6 @@ void SubStringStub::Generate(MacroAssembler* masm) {
|
||||
__ b(mi, &runtime); // From is negative.
|
||||
|
||||
// Both to and from are smis.
|
||||
|
||||
__ sub(r2, r2, Operand(r3), SetCC);
|
||||
__ b(mi, &runtime); // Fail if from > to.
|
||||
// Special handling of sub-strings of length 1 and 2. One character strings
|
||||
|
@ -1999,8 +1999,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
|
||||
|
||||
|
||||
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
|
||||
LOperand* string = UseRegister(instr->string());
|
||||
LOperand* index = UseRegisterOrConstant(instr->index());
|
||||
LOperand* string = UseTempRegister(instr->string());
|
||||
LOperand* index = UseTempRegister(instr->index());
|
||||
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
|
||||
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
|
||||
}
|
||||
|
@ -3455,97 +3455,83 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
|
||||
LStringCharCodeAt* instr_;
|
||||
};
|
||||
|
||||
Register scratch = scratch0();
|
||||
Register string = ToRegister(instr->string());
|
||||
Register index = no_reg;
|
||||
int const_index = -1;
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
|
||||
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
|
||||
if (!Smi::IsValid(const_index)) {
|
||||
// Guaranteed to be out of bounds because of the assert above.
|
||||
// So the bounds check that must dominate this instruction must
|
||||
// have deoptimized already.
|
||||
if (FLAG_debug_code) {
|
||||
__ Abort("StringCharCodeAt: out of bounds index.");
|
||||
}
|
||||
// No code needs to be generated.
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
index = ToRegister(instr->index());
|
||||
}
|
||||
Register index = ToRegister(instr->index());
|
||||
Register result = ToRegister(instr->result());
|
||||
|
||||
DeferredStringCharCodeAt* deferred =
|
||||
new DeferredStringCharCodeAt(this, instr);
|
||||
|
||||
Label flat_string, ascii_string, done;
|
||||
|
||||
// Fetch the instance type of the receiver into result register.
|
||||
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
|
||||
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
|
||||
|
||||
// We need special handling for non-flat strings.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ tst(result, Operand(kStringRepresentationMask));
|
||||
__ b(eq, &flat_string);
|
||||
// We need special handling for indirect strings.
|
||||
Label check_sequential;
|
||||
__ tst(result, Operand(kIsIndirectStringMask));
|
||||
__ b(eq, &check_sequential);
|
||||
|
||||
// Handle non-flat strings.
|
||||
__ tst(result, Operand(kIsConsStringMask));
|
||||
__ b(eq, deferred->entry());
|
||||
// Dispatch on the indirect string shape: slice or cons.
|
||||
Label cons_string;
|
||||
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
|
||||
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
|
||||
__ tst(result, Operand(kSlicedNotConsMask));
|
||||
__ b(eq, &cons_string);
|
||||
|
||||
// ConsString.
|
||||
// Handle slices.
|
||||
Label indirect_string_loaded;
|
||||
__ ldr(result, FieldMemOperand(string, SlicedString::kOffsetOffset));
|
||||
__ add(index, index, Operand(result, ASR, kSmiTagSize));
|
||||
__ ldr(string, FieldMemOperand(string, SlicedString::kParentOffset));
|
||||
__ jmp(&indirect_string_loaded);
|
||||
|
||||
// Handle conses.
|
||||
// Check whether the right hand side is the empty string (i.e. if
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
__ ldr(scratch, FieldMemOperand(string, ConsString::kSecondOffset));
|
||||
__ bind(&cons_string);
|
||||
__ ldr(result, FieldMemOperand(string, ConsString::kSecondOffset));
|
||||
__ LoadRoot(ip, Heap::kEmptyStringRootIndex);
|
||||
__ cmp(scratch, ip);
|
||||
__ cmp(result, ip);
|
||||
__ b(ne, deferred->entry());
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ ldr(string, FieldMemOperand(string, ConsString::kFirstOffset));
|
||||
|
||||
__ bind(&indirect_string_loaded);
|
||||
__ ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
|
||||
__ ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
|
||||
// Check whether the string is sequential. The only non-sequential
|
||||
// shapes we support have just been unwrapped above.
|
||||
__ bind(&check_sequential);
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ tst(result, Operand(kStringRepresentationMask));
|
||||
__ b(ne, deferred->entry());
|
||||
|
||||
// Check for 1-byte or 2-byte string.
|
||||
__ bind(&flat_string);
|
||||
// Dispatch on the encoding: ASCII or two-byte.
|
||||
Label ascii_string;
|
||||
STATIC_ASSERT(kAsciiStringTag != 0);
|
||||
__ tst(result, Operand(kStringEncodingMask));
|
||||
__ b(ne, &ascii_string);
|
||||
|
||||
// 2-byte string.
|
||||
// Load the 2-byte character code into the result register.
|
||||
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ ldrh(result,
|
||||
FieldMemOperand(string,
|
||||
SeqTwoByteString::kHeaderSize + 2 * const_index));
|
||||
} else {
|
||||
__ add(scratch,
|
||||
string,
|
||||
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
|
||||
__ ldrh(result, MemOperand(scratch, index, LSL, 1));
|
||||
}
|
||||
// Two-byte string.
|
||||
// Load the two-byte character code into the result register.
|
||||
Label done;
|
||||
__ add(result,
|
||||
string,
|
||||
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
|
||||
__ ldrh(result, MemOperand(result, index, LSL, 1));
|
||||
__ jmp(&done);
|
||||
|
||||
// ASCII string.
|
||||
// Load the byte into the result register.
|
||||
__ bind(&ascii_string);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ ldrb(result, FieldMemOperand(string,
|
||||
SeqAsciiString::kHeaderSize + const_index));
|
||||
} else {
|
||||
__ add(scratch,
|
||||
string,
|
||||
Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
|
||||
__ ldrb(result, MemOperand(scratch, index));
|
||||
}
|
||||
__ add(result,
|
||||
string,
|
||||
Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
|
||||
__ ldrb(result, MemOperand(result, index));
|
||||
|
||||
__ bind(&done);
|
||||
__ bind(deferred->exit());
|
||||
}
|
||||
|
@ -1034,12 +1034,13 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
|
||||
}
|
||||
|
||||
// Prepare for possible GC.
|
||||
HandleScope handles;
|
||||
HandleScope handles(isolate);
|
||||
Handle<Code> code_handle(re_code);
|
||||
|
||||
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
|
||||
|
||||
// Current string.
|
||||
bool is_ascii = subject->IsAsciiRepresentation();
|
||||
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
|
||||
ASSERT(re_code->instruction_start() <= *return_address);
|
||||
ASSERT(*return_address <=
|
||||
@ -1057,8 +1058,20 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
|
||||
return EXCEPTION;
|
||||
}
|
||||
|
||||
Handle<String> subject_tmp = subject;
|
||||
int slice_offset = 0;
|
||||
|
||||
// Extract the underlying string and the slice offset.
|
||||
if (StringShape(*subject_tmp).IsCons()) {
|
||||
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
|
||||
} else if (StringShape(*subject_tmp).IsSliced()) {
|
||||
SlicedString* slice = SlicedString::cast(*subject_tmp);
|
||||
subject_tmp = Handle<String>(slice->parent());
|
||||
slice_offset = slice->offset();
|
||||
}
|
||||
|
||||
// String might have changed.
|
||||
if (subject->IsAsciiRepresentation() != is_ascii) {
|
||||
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
|
||||
// If we changed between an ASCII and an UC16 string, the specialized
|
||||
// code cannot be used, and we need to restart regexp matching from
|
||||
// scratch (including, potentially, compiling a new version of the code).
|
||||
@ -1069,8 +1082,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
|
||||
// be a sequential or external string with the same content.
|
||||
// Update the start and end pointers in the stack frame to the current
|
||||
// location (whether it has actually moved or not).
|
||||
ASSERT(StringShape(*subject).IsSequential() ||
|
||||
StringShape(*subject).IsExternal());
|
||||
ASSERT(StringShape(*subject_tmp).IsSequential() ||
|
||||
StringShape(*subject_tmp).IsExternal());
|
||||
|
||||
// The original start address of the characters to match.
|
||||
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
|
||||
@ -1078,13 +1091,14 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
|
||||
// Find the current start address of the same character at the current string
|
||||
// position.
|
||||
int start_index = frame_entry<int>(re_frame, kStartIndex);
|
||||
const byte* new_address = StringCharacterPosition(*subject, start_index);
|
||||
const byte* new_address = StringCharacterPosition(*subject_tmp,
|
||||
start_index + slice_offset);
|
||||
|
||||
if (start_address != new_address) {
|
||||
// If there is a difference, update the object pointer and start and end
|
||||
// addresses in the RegExp stack frame to match the new value.
|
||||
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
|
||||
int byte_length = end_address - start_address;
|
||||
int byte_length = static_cast<int>(end_address - start_address);
|
||||
frame_entry<const String*>(re_frame, kInputString) = *subject;
|
||||
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
|
||||
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
|
||||
|
@ -104,6 +104,7 @@ DEFINE_bool(harmony_block_scoping, false, "enable harmony block scoping")
|
||||
|
||||
// Flags for experimental implementation features.
|
||||
DEFINE_bool(unbox_double_arrays, true, "automatically unbox arrays of doubles")
|
||||
DEFINE_bool(string_slices, false, "use string slices")
|
||||
|
||||
// Flags for Crankshaft.
|
||||
#ifdef V8_TARGET_ARCH_MIPS
|
||||
|
@ -323,10 +323,10 @@ AllocationSpace Heap::TargetSpaceId(InstanceType type) {
|
||||
ASSERT(type != JS_GLOBAL_PROPERTY_CELL_TYPE);
|
||||
|
||||
if (type < FIRST_NONSTRING_TYPE) {
|
||||
// There are three string representations: sequential strings, cons
|
||||
// strings, and external strings. Only cons strings contain
|
||||
// non-map-word pointers to heap objects.
|
||||
return ((type & kStringRepresentationMask) == kConsStringTag)
|
||||
// There are four string representations: sequential strings, external
|
||||
// strings, cons strings, and sliced strings.
|
||||
// Only the latter two contain non-map-word pointers to heap objects.
|
||||
return ((type & kIsIndirectStringMask) == kIsIndirectStringTag)
|
||||
? OLD_POINTER_SPACE
|
||||
: OLD_DATA_SPACE;
|
||||
} else {
|
||||
|
83
src/heap.cc
83
src/heap.cc
@ -1290,6 +1290,10 @@ class ScavengingVisitor : public StaticVisitorBase {
|
||||
&ObjectEvacuationStrategy<POINTER_OBJECT>::
|
||||
template VisitSpecialized<ConsString::kSize>);
|
||||
|
||||
table_.Register(kVisitSlicedString,
|
||||
&ObjectEvacuationStrategy<POINTER_OBJECT>::
|
||||
template VisitSpecialized<SlicedString::kSize>);
|
||||
|
||||
table_.Register(kVisitSharedFunctionInfo,
|
||||
&ObjectEvacuationStrategy<POINTER_OBJECT>::
|
||||
template VisitSpecialized<SharedFunctionInfo::kSize>);
|
||||
@ -2564,6 +2568,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
|
||||
|
||||
// If the resulting string is small make a flat string.
|
||||
if (length < String::kMinNonFlatLength) {
|
||||
// Note that neither of the two inputs can be a slice because:
|
||||
STATIC_ASSERT(String::kMinNonFlatLength <= SlicedString::kMinLength);
|
||||
ASSERT(first->IsFlat());
|
||||
ASSERT(second->IsFlat());
|
||||
if (is_ascii) {
|
||||
@ -2655,24 +2661,69 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
|
||||
// Make an attempt to flatten the buffer to reduce access time.
|
||||
buffer = buffer->TryFlattenGetString();
|
||||
|
||||
Object* result;
|
||||
{ MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
|
||||
? AllocateRawAsciiString(length, pretenure )
|
||||
: AllocateRawTwoByteString(length, pretenure);
|
||||
if (!maybe_result->ToObject(&result)) return maybe_result;
|
||||
}
|
||||
String* string_result = String::cast(result);
|
||||
// Copy the characters into the new object.
|
||||
if (buffer->IsAsciiRepresentation()) {
|
||||
ASSERT(string_result->IsAsciiRepresentation());
|
||||
char* dest = SeqAsciiString::cast(string_result)->GetChars();
|
||||
String::WriteToFlat(buffer, dest, start, end);
|
||||
} else {
|
||||
ASSERT(string_result->IsTwoByteRepresentation());
|
||||
uc16* dest = SeqTwoByteString::cast(string_result)->GetChars();
|
||||
String::WriteToFlat(buffer, dest, start, end);
|
||||
// TODO(1626): For now slicing external strings is not supported. However,
|
||||
// a flat cons string can have an external string as first part in some cases.
|
||||
// Therefore we have to single out this case as well.
|
||||
if (!FLAG_string_slices ||
|
||||
(buffer->IsConsString() &&
|
||||
(!buffer->IsFlat() ||
|
||||
!ConsString::cast(buffer)->first()->IsSeqString())) ||
|
||||
buffer->IsExternalString() ||
|
||||
length < SlicedString::kMinLength ||
|
||||
pretenure == TENURED) {
|
||||
Object* result;
|
||||
{ MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
|
||||
? AllocateRawAsciiString(length, pretenure)
|
||||
: AllocateRawTwoByteString(length, pretenure);
|
||||
if (!maybe_result->ToObject(&result)) return maybe_result;
|
||||
}
|
||||
String* string_result = String::cast(result);
|
||||
// Copy the characters into the new object.
|
||||
if (buffer->IsAsciiRepresentation()) {
|
||||
ASSERT(string_result->IsAsciiRepresentation());
|
||||
char* dest = SeqAsciiString::cast(string_result)->GetChars();
|
||||
String::WriteToFlat(buffer, dest, start, end);
|
||||
} else {
|
||||
ASSERT(string_result->IsTwoByteRepresentation());
|
||||
uc16* dest = SeqTwoByteString::cast(string_result)->GetChars();
|
||||
String::WriteToFlat(buffer, dest, start, end);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ASSERT(buffer->IsFlat());
|
||||
ASSERT(!buffer->IsExternalString());
|
||||
#if DEBUG
|
||||
buffer->StringVerify();
|
||||
#endif
|
||||
|
||||
Object* result;
|
||||
{ Map* map = buffer->IsAsciiRepresentation()
|
||||
? sliced_ascii_string_map()
|
||||
: sliced_string_map();
|
||||
MaybeObject* maybe_result = Allocate(map, NEW_SPACE);
|
||||
if (!maybe_result->ToObject(&result)) return maybe_result;
|
||||
}
|
||||
|
||||
AssertNoAllocation no_gc;
|
||||
SlicedString* sliced_string = SlicedString::cast(result);
|
||||
sliced_string->set_length(length);
|
||||
sliced_string->set_hash_field(String::kEmptyHashField);
|
||||
if (buffer->IsConsString()) {
|
||||
ConsString* cons = ConsString::cast(buffer);
|
||||
ASSERT(cons->second()->length() == 0);
|
||||
sliced_string->set_parent(cons->first());
|
||||
sliced_string->set_offset(start);
|
||||
} else if (buffer->IsSlicedString()) {
|
||||
// Prevent nesting sliced strings.
|
||||
SlicedString* parent_slice = SlicedString::cast(buffer);
|
||||
sliced_string->set_parent(parent_slice->parent());
|
||||
sliced_string->set_offset(start + parent_slice->offset());
|
||||
} else {
|
||||
sliced_string->set_parent(buffer);
|
||||
sliced_string->set_offset(start);
|
||||
}
|
||||
ASSERT(sliced_string->parent()->IsSeqString());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,8 @@ inline Heap* _inline_get_heap_();
|
||||
V(Map, symbol_map, SymbolMap) \
|
||||
V(Map, cons_string_map, ConsStringMap) \
|
||||
V(Map, cons_ascii_string_map, ConsAsciiStringMap) \
|
||||
V(Map, sliced_string_map, SlicedStringMap) \
|
||||
V(Map, sliced_ascii_string_map, SlicedAsciiStringMap) \
|
||||
V(Map, ascii_symbol_map, AsciiSymbolMap) \
|
||||
V(Map, cons_symbol_map, ConsSymbolMap) \
|
||||
V(Map, cons_ascii_symbol_map, ConsAsciiSymbolMap) \
|
||||
|
@ -3371,6 +3371,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ cmp(edx, Operand(eax));
|
||||
__ j(greater, &runtime);
|
||||
|
||||
// Reset offset for possibly sliced string.
|
||||
__ Set(edi, Immediate(0));
|
||||
// ecx: RegExp data (FixedArray)
|
||||
// Check the representation and encoding of the subject string.
|
||||
Label seq_ascii_string, seq_two_byte_string, check_code;
|
||||
@ -3381,36 +3383,45 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ and_(ebx,
|
||||
kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask);
|
||||
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
|
||||
__ j(zero, &seq_two_byte_string);
|
||||
__ j(zero, &seq_two_byte_string, Label::kNear);
|
||||
// Any other flat string must be a flat ascii string.
|
||||
__ test(Operand(ebx),
|
||||
__ and_(Operand(ebx),
|
||||
Immediate(kIsNotStringMask | kStringRepresentationMask));
|
||||
__ j(zero, &seq_ascii_string);
|
||||
__ j(zero, &seq_ascii_string, Label::kNear);
|
||||
|
||||
// Check for flat cons string.
|
||||
// Check for flat cons string or sliced string.
|
||||
// A flat cons string is a cons string where the second part is the empty
|
||||
// string. In that case the subject string is just the first part of the cons
|
||||
// string. Also in this case the first part of the cons string is known to be
|
||||
// a sequential string or an external string.
|
||||
STATIC_ASSERT(kExternalStringTag != 0);
|
||||
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
|
||||
__ test(Operand(ebx),
|
||||
Immediate(kIsNotStringMask | kExternalStringTag));
|
||||
__ j(not_zero, &runtime);
|
||||
// String is a cons string.
|
||||
__ mov(edx, FieldOperand(eax, ConsString::kSecondOffset));
|
||||
__ cmp(Operand(edx), factory->empty_string());
|
||||
// In the case of a sliced string its offset has to be taken into account.
|
||||
Label cons_string, check_encoding;
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmp(Operand(ebx), Immediate(kExternalStringTag));
|
||||
__ j(less, &cons_string);
|
||||
__ j(equal, &runtime);
|
||||
|
||||
// String is sliced.
|
||||
__ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset));
|
||||
__ mov(eax, FieldOperand(eax, SlicedString::kParentOffset));
|
||||
// edi: offset of sliced string, smi-tagged.
|
||||
// eax: parent string.
|
||||
__ jmp(&check_encoding, Label::kNear);
|
||||
// String is a cons string, check whether it is flat.
|
||||
__ bind(&cons_string);
|
||||
__ cmp(FieldOperand(eax, ConsString::kSecondOffset), factory->empty_string());
|
||||
__ j(not_equal, &runtime);
|
||||
__ mov(eax, FieldOperand(eax, ConsString::kFirstOffset));
|
||||
__ bind(&check_encoding);
|
||||
__ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
|
||||
// String is a cons string with empty second part.
|
||||
// eax: first part of cons string.
|
||||
// ebx: map of first part of cons string.
|
||||
// Is first part a flat two byte string?
|
||||
// eax: first part of cons string or parent of sliced string.
|
||||
// ebx: map of first part of cons string or map of parent of sliced string.
|
||||
// Is first part of cons or parent of slice a flat two byte string?
|
||||
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
|
||||
kStringRepresentationMask | kStringEncodingMask);
|
||||
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
|
||||
__ j(zero, &seq_two_byte_string);
|
||||
__ j(zero, &seq_two_byte_string, Label::kNear);
|
||||
// Any other flat string must be ascii.
|
||||
__ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset),
|
||||
kStringRepresentationMask);
|
||||
@ -3420,14 +3431,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
// eax: subject string (flat ascii)
|
||||
// ecx: RegExp data (FixedArray)
|
||||
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataAsciiCodeOffset));
|
||||
__ Set(edi, Immediate(1)); // Type is ascii.
|
||||
__ jmp(&check_code);
|
||||
__ Set(ecx, Immediate(1)); // Type is ascii.
|
||||
__ jmp(&check_code, Label::kNear);
|
||||
|
||||
__ bind(&seq_two_byte_string);
|
||||
// eax: subject string (flat two byte)
|
||||
// ecx: RegExp data (FixedArray)
|
||||
__ mov(edx, FieldOperand(ecx, JSRegExp::kDataUC16CodeOffset));
|
||||
__ Set(edi, Immediate(0)); // Type is two byte.
|
||||
__ Set(ecx, Immediate(0)); // Type is two byte.
|
||||
|
||||
__ bind(&check_code);
|
||||
// Check that the irregexp code has been generated for the actual string
|
||||
@ -3437,7 +3448,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
// eax: subject string
|
||||
// edx: code
|
||||
// edi: encoding of subject string (1 if ascii, 0 if two_byte);
|
||||
// ecx: encoding of subject string (1 if ascii, 0 if two_byte);
|
||||
// Load used arguments before starting to push arguments for call to native
|
||||
// RegExp code to avoid handling changing stack height.
|
||||
__ mov(ebx, Operand(esp, kPreviousIndexOffset));
|
||||
@ -3446,7 +3457,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
// eax: subject string
|
||||
// ebx: previous index
|
||||
// edx: code
|
||||
// edi: encoding of subject string (1 if ascii 0 if two_byte);
|
||||
// ecx: encoding of subject string (1 if ascii 0 if two_byte);
|
||||
// All checks done. Now push arguments for native regexp code.
|
||||
Counters* counters = masm->isolate()->counters();
|
||||
__ IncrementCounter(counters->regexp_entry_native(), 1);
|
||||
@ -3463,23 +3474,47 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ mov(Operand(esp, 6 * kPointerSize), Immediate(1));
|
||||
|
||||
// Argument 6: Start (high end) of backtracking stack memory area.
|
||||
__ mov(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_address));
|
||||
__ add(ecx, Operand::StaticVariable(address_of_regexp_stack_memory_size));
|
||||
__ mov(Operand(esp, 5 * kPointerSize), ecx);
|
||||
__ mov(esi, Operand::StaticVariable(address_of_regexp_stack_memory_address));
|
||||
__ add(esi, Operand::StaticVariable(address_of_regexp_stack_memory_size));
|
||||
__ mov(Operand(esp, 5 * kPointerSize), esi);
|
||||
|
||||
// Argument 5: static offsets vector buffer.
|
||||
__ mov(Operand(esp, 4 * kPointerSize),
|
||||
Immediate(ExternalReference::address_of_static_offsets_vector(
|
||||
masm->isolate())));
|
||||
|
||||
// Argument 2: Previous index.
|
||||
__ mov(Operand(esp, 1 * kPointerSize), ebx);
|
||||
|
||||
// Argument 1: Original subject string.
|
||||
// The original subject is in the previous stack frame. Therefore we have to
|
||||
// use ebp, which points exactly to one pointer size below the previous esp.
|
||||
// (Because creating a new stack frame pushes the previous ebp onto the stack
|
||||
// and thereby moves up esp by one kPointerSize.)
|
||||
__ mov(esi, Operand(ebp, kSubjectOffset + kPointerSize));
|
||||
__ mov(Operand(esp, 0 * kPointerSize), esi);
|
||||
|
||||
// esi: original subject string
|
||||
// eax: underlying subject string
|
||||
// ebx: previous index
|
||||
// ecx: encoding of subject string (1 if ascii 0 if two_byte);
|
||||
// edx: code
|
||||
// Argument 4: End of string data
|
||||
// Argument 3: Start of string data
|
||||
Label setup_two_byte, setup_rest;
|
||||
__ test(edi, Operand(edi));
|
||||
__ mov(edi, FieldOperand(eax, String::kLengthOffset));
|
||||
__ j(zero, &setup_two_byte, Label::kNear);
|
||||
// Prepare start and end index of the input.
|
||||
// Load the length from the original sliced string if that is the case.
|
||||
__ mov(esi, FieldOperand(esi, String::kLengthOffset));
|
||||
__ add(esi, Operand(edi)); // Calculate input end wrt offset.
|
||||
__ SmiUntag(edi);
|
||||
__ lea(ecx, FieldOperand(eax, edi, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ add(ebx, Operand(edi)); // Calculate input start wrt offset.
|
||||
|
||||
// ebx: start index of the input string
|
||||
// esi: end index of the input string
|
||||
Label setup_two_byte, setup_rest;
|
||||
__ test(ecx, Operand(ecx));
|
||||
__ j(zero, &setup_two_byte, Label::kNear);
|
||||
__ SmiUntag(esi);
|
||||
__ lea(ecx, FieldOperand(eax, esi, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
|
||||
__ lea(ecx, FieldOperand(eax, ebx, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
|
||||
@ -3487,20 +3522,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
|
||||
__ bind(&setup_two_byte);
|
||||
STATIC_ASSERT(kSmiTag == 0);
|
||||
STATIC_ASSERT(kSmiTagSize == 1); // edi is smi (powered by 2).
|
||||
__ lea(ecx, FieldOperand(eax, edi, times_1, SeqTwoByteString::kHeaderSize));
|
||||
STATIC_ASSERT(kSmiTagSize == 1); // esi is smi (powered by 2).
|
||||
__ lea(ecx, FieldOperand(eax, esi, times_1, SeqTwoByteString::kHeaderSize));
|
||||
__ mov(Operand(esp, 3 * kPointerSize), ecx); // Argument 4.
|
||||
__ lea(ecx, FieldOperand(eax, ebx, times_2, SeqTwoByteString::kHeaderSize));
|
||||
__ mov(Operand(esp, 2 * kPointerSize), ecx); // Argument 3.
|
||||
|
||||
__ bind(&setup_rest);
|
||||
|
||||
// Argument 2: Previous index.
|
||||
__ mov(Operand(esp, 1 * kPointerSize), ebx);
|
||||
|
||||
// Argument 1: Subject string.
|
||||
__ mov(Operand(esp, 0 * kPointerSize), eax);
|
||||
|
||||
// Locate the code entry and call it.
|
||||
__ add(Operand(edx), Immediate(Code::kHeaderSize - kHeapObjectTag));
|
||||
__ call(Operand(edx));
|
||||
@ -3539,7 +3568,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
// by javascript code.
|
||||
__ cmp(eax, factory->termination_exception());
|
||||
Label throw_termination_exception;
|
||||
__ j(equal, &throw_termination_exception);
|
||||
__ j(equal, &throw_termination_exception, Label::kNear);
|
||||
|
||||
// Handle normal exception by following handler chain.
|
||||
__ Throw(eax);
|
||||
@ -4811,6 +4840,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
Label flat_string;
|
||||
Label ascii_string;
|
||||
Label got_char_code;
|
||||
Label sliced_string;
|
||||
|
||||
// If the receiver is a smi trigger the non-string case.
|
||||
STATIC_ASSERT(kSmiTag == 0);
|
||||
@ -4841,31 +4871,45 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
__ j(zero, &flat_string);
|
||||
|
||||
// Handle non-flat strings.
|
||||
__ test(result_, Immediate(kIsConsStringMask));
|
||||
__ j(zero, &call_runtime_);
|
||||
__ and_(result_, kStringRepresentationMask);
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmp(result_, kExternalStringTag);
|
||||
__ j(greater, &sliced_string, Label::kNear);
|
||||
__ j(equal, &call_runtime_);
|
||||
|
||||
// ConsString.
|
||||
// Check whether the right hand side is the empty string (i.e. if
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
Label assure_seq_string;
|
||||
__ cmp(FieldOperand(object_, ConsString::kSecondOffset),
|
||||
Immediate(masm->isolate()->factory()->empty_string()));
|
||||
__ j(not_equal, &call_runtime_);
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ mov(object_, FieldOperand(object_, ConsString::kFirstOffset));
|
||||
__ jmp(&assure_seq_string, Label::kNear);
|
||||
|
||||
// SlicedString, unpack and add offset.
|
||||
__ bind(&sliced_string);
|
||||
__ add(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
|
||||
__ mov(object_, FieldOperand(object_, SlicedString::kParentOffset));
|
||||
|
||||
// Assure that we are dealing with a sequential string. Go to runtime if not.
|
||||
__ bind(&assure_seq_string);
|
||||
__ mov(result_, FieldOperand(object_, HeapObject::kMapOffset));
|
||||
__ movzx_b(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ test(result_, Immediate(kStringRepresentationMask));
|
||||
__ j(not_zero, &call_runtime_);
|
||||
__ jmp(&flat_string, Label::kNear);
|
||||
|
||||
// Check for 1-byte or 2-byte string.
|
||||
__ bind(&flat_string);
|
||||
STATIC_ASSERT(kAsciiStringTag != 0);
|
||||
__ test(result_, Immediate(kStringEncodingMask));
|
||||
__ j(not_zero, &ascii_string);
|
||||
__ j(not_zero, &ascii_string, Label::kNear);
|
||||
|
||||
// 2-byte string.
|
||||
// Load the 2-byte character code into the result register.
|
||||
@ -4873,7 +4917,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
__ movzx_w(result_, FieldOperand(object_,
|
||||
scratch_, times_1, // Scratch is smi-tagged.
|
||||
SeqTwoByteString::kHeaderSize));
|
||||
__ jmp(&got_char_code);
|
||||
__ jmp(&got_char_code, Label::kNear);
|
||||
|
||||
// ASCII string.
|
||||
// Load the byte into the result register.
|
||||
@ -5185,6 +5229,8 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ and_(ecx, kStringRepresentationMask);
|
||||
__ cmp(ecx, kExternalStringTag);
|
||||
__ j(equal, &string_add_runtime);
|
||||
// We cannot encounter sliced strings here since:
|
||||
STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
|
||||
// Now check if both strings are ascii strings.
|
||||
// eax: first string
|
||||
// ebx: length of resulting flat string as a smi
|
||||
@ -5596,6 +5642,9 @@ void StringHelper::GenerateHashGetHash(MacroAssembler* masm,
|
||||
void SubStringStub::Generate(MacroAssembler* masm) {
|
||||
Label runtime;
|
||||
|
||||
if (FLAG_string_slices) {
|
||||
__ jmp(&runtime);
|
||||
}
|
||||
// Stack frame on entry.
|
||||
// esp[0]: return address
|
||||
// esp[4]: to
|
||||
|
@ -3217,95 +3217,81 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
|
||||
};
|
||||
|
||||
Register string = ToRegister(instr->string());
|
||||
Register index = no_reg;
|
||||
int const_index = -1;
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
|
||||
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
|
||||
if (!Smi::IsValid(const_index)) {
|
||||
// Guaranteed to be out of bounds because of the assert above.
|
||||
// So the bounds check that must dominate this instruction must
|
||||
// have deoptimized already.
|
||||
if (FLAG_debug_code) {
|
||||
__ Abort("StringCharCodeAt: out of bounds index.");
|
||||
}
|
||||
// No code needs to be generated.
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
index = ToRegister(instr->index());
|
||||
}
|
||||
Register index = ToRegister(instr->index());
|
||||
Register result = ToRegister(instr->result());
|
||||
|
||||
DeferredStringCharCodeAt* deferred =
|
||||
new DeferredStringCharCodeAt(this, instr);
|
||||
|
||||
Label flat_string, ascii_string, done;
|
||||
|
||||
// Fetch the instance type of the receiver into result register.
|
||||
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
|
||||
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
|
||||
|
||||
// We need special handling for non-flat strings.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ test(result, Immediate(kStringRepresentationMask));
|
||||
__ j(zero, &flat_string, Label::kNear);
|
||||
// We need special handling for indirect strings.
|
||||
Label check_sequential;
|
||||
__ test(result, Immediate(kIsIndirectStringMask));
|
||||
__ j(zero, &check_sequential, Label::kNear);
|
||||
|
||||
// Handle non-flat strings.
|
||||
__ test(result, Immediate(kIsConsStringMask));
|
||||
__ j(zero, deferred->entry());
|
||||
// Dispatch on the indirect string shape: slice or cons.
|
||||
Label cons_string;
|
||||
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
|
||||
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
|
||||
__ test(result, Immediate(kSlicedNotConsMask));
|
||||
__ j(zero, &cons_string, Label::kNear);
|
||||
|
||||
// ConsString.
|
||||
// Handle slices.
|
||||
Label indirect_string_loaded;
|
||||
__ mov(result, FieldOperand(string, SlicedString::kOffsetOffset));
|
||||
__ SmiUntag(result);
|
||||
__ add(index, Operand(result));
|
||||
__ mov(string, FieldOperand(string, SlicedString::kParentOffset));
|
||||
__ jmp(&indirect_string_loaded, Label::kNear);
|
||||
|
||||
// Handle conses.
|
||||
// Check whether the right hand side is the empty string (i.e. if
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
__ bind(&cons_string);
|
||||
__ cmp(FieldOperand(string, ConsString::kSecondOffset),
|
||||
Immediate(factory()->empty_string()));
|
||||
__ j(not_equal, deferred->entry());
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ mov(string, FieldOperand(string, ConsString::kFirstOffset));
|
||||
|
||||
__ bind(&indirect_string_loaded);
|
||||
__ mov(result, FieldOperand(string, HeapObject::kMapOffset));
|
||||
__ movzx_b(result, FieldOperand(result, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
|
||||
// Check whether the string is sequential. The only non-sequential
|
||||
// shapes we support have just been unwrapped above.
|
||||
__ bind(&check_sequential);
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ test(result, Immediate(kStringRepresentationMask));
|
||||
__ j(not_zero, deferred->entry());
|
||||
|
||||
// Check for ASCII or two-byte string.
|
||||
__ bind(&flat_string);
|
||||
// Dispatch on the encoding: ASCII or two-byte.
|
||||
Label ascii_string;
|
||||
STATIC_ASSERT(kAsciiStringTag != 0);
|
||||
__ test(result, Immediate(kStringEncodingMask));
|
||||
__ j(not_zero, &ascii_string, Label::kNear);
|
||||
|
||||
// Two-byte string.
|
||||
// Load the two-byte character code into the result register.
|
||||
Label done;
|
||||
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ movzx_w(result,
|
||||
FieldOperand(string,
|
||||
SeqTwoByteString::kHeaderSize +
|
||||
(kUC16Size * const_index)));
|
||||
} else {
|
||||
__ movzx_w(result, FieldOperand(string,
|
||||
index,
|
||||
times_2,
|
||||
SeqTwoByteString::kHeaderSize));
|
||||
}
|
||||
__ movzx_w(result, FieldOperand(string,
|
||||
index,
|
||||
times_2,
|
||||
SeqTwoByteString::kHeaderSize));
|
||||
__ jmp(&done, Label::kNear);
|
||||
|
||||
// ASCII string.
|
||||
// Load the byte into the result register.
|
||||
__ bind(&ascii_string);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ movzx_b(result, FieldOperand(string,
|
||||
SeqAsciiString::kHeaderSize + const_index));
|
||||
} else {
|
||||
__ movzx_b(result, FieldOperand(string,
|
||||
index,
|
||||
times_1,
|
||||
SeqAsciiString::kHeaderSize));
|
||||
}
|
||||
__ movzx_b(result, FieldOperand(string,
|
||||
index,
|
||||
times_1,
|
||||
SeqAsciiString::kHeaderSize));
|
||||
__ bind(&done);
|
||||
__ bind(deferred->exit());
|
||||
}
|
||||
|
@ -2058,8 +2058,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
|
||||
|
||||
|
||||
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
|
||||
LOperand* string = UseRegister(instr->string());
|
||||
LOperand* index = UseRegisterOrConstant(instr->index());
|
||||
LOperand* string = UseTempRegister(instr->string());
|
||||
LOperand* index = UseTempRegister(instr->index());
|
||||
LOperand* context = UseAny(instr->context());
|
||||
LStringCharCodeAt* result = new LStringCharCodeAt(context, string, index);
|
||||
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
|
||||
|
@ -1065,12 +1065,13 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
|
||||
}
|
||||
|
||||
// Prepare for possible GC.
|
||||
HandleScope handles;
|
||||
HandleScope handles(isolate);
|
||||
Handle<Code> code_handle(re_code);
|
||||
|
||||
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
|
||||
|
||||
// Current string.
|
||||
bool is_ascii = subject->IsAsciiRepresentation();
|
||||
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
|
||||
ASSERT(re_code->instruction_start() <= *return_address);
|
||||
ASSERT(*return_address <=
|
||||
@ -1088,8 +1089,20 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
|
||||
return EXCEPTION;
|
||||
}
|
||||
|
||||
Handle<String> subject_tmp = subject;
|
||||
int slice_offset = 0;
|
||||
|
||||
// Extract the underlying string and the slice offset.
|
||||
if (StringShape(*subject_tmp).IsCons()) {
|
||||
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
|
||||
} else if (StringShape(*subject_tmp).IsSliced()) {
|
||||
SlicedString* slice = SlicedString::cast(*subject_tmp);
|
||||
subject_tmp = Handle<String>(slice->parent());
|
||||
slice_offset = slice->offset();
|
||||
}
|
||||
|
||||
// String might have changed.
|
||||
if (subject->IsAsciiRepresentation() != is_ascii) {
|
||||
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
|
||||
// If we changed between an ASCII and an UC16 string, the specialized
|
||||
// code cannot be used, and we need to restart regexp matching from
|
||||
// scratch (including, potentially, compiling a new version of the code).
|
||||
@ -1100,8 +1113,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
|
||||
// be a sequential or external string with the same content.
|
||||
// Update the start and end pointers in the stack frame to the current
|
||||
// location (whether it has actually moved or not).
|
||||
ASSERT(StringShape(*subject).IsSequential() ||
|
||||
StringShape(*subject).IsExternal());
|
||||
ASSERT(StringShape(*subject_tmp).IsSequential() ||
|
||||
StringShape(*subject_tmp).IsExternal());
|
||||
|
||||
// The original start address of the characters to match.
|
||||
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
|
||||
@ -1109,13 +1122,14 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
|
||||
// Find the current start address of the same character at the current string
|
||||
// position.
|
||||
int start_index = frame_entry<int>(re_frame, kStartIndex);
|
||||
const byte* new_address = StringCharacterPosition(*subject, start_index);
|
||||
const byte* new_address = StringCharacterPosition(*subject_tmp,
|
||||
start_index + slice_offset);
|
||||
|
||||
if (start_address != new_address) {
|
||||
// If there is a difference, update the object pointer and start and end
|
||||
// addresses in the RegExp stack frame to match the new value.
|
||||
const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
|
||||
int byte_length = end_address - start_address;
|
||||
int byte_length = static_cast<int>(end_address - start_address);
|
||||
frame_entry<const String*>(re_frame, kInputString) = *subject;
|
||||
frame_entry<const byte*>(re_frame, kInputStart) = new_address;
|
||||
frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
|
||||
|
@ -224,9 +224,9 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
|
||||
|
||||
if (!subject->IsFlat()) FlattenString(subject);
|
||||
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
|
||||
// Extract flattened substrings of cons strings before determining asciiness.
|
||||
|
||||
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
|
||||
ASSERT(StringShape(needle).IsSequential());
|
||||
int needle_len = needle->length();
|
||||
ASSERT(needle->IsFlat());
|
||||
|
||||
@ -347,10 +347,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
|
||||
JSRegExp::Flags flags = re->GetFlags();
|
||||
|
||||
Handle<String> pattern(re->Pattern());
|
||||
if (!pattern->IsFlat()) {
|
||||
FlattenString(pattern);
|
||||
}
|
||||
|
||||
if (!pattern->IsFlat()) FlattenString(pattern);
|
||||
RegExpCompileData compile_data;
|
||||
FlatStringReader reader(isolate, pattern);
|
||||
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
|
||||
@ -434,22 +431,12 @@ void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
|
||||
|
||||
int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
|
||||
Handle<String> subject) {
|
||||
if (!subject->IsFlat()) {
|
||||
FlattenString(subject);
|
||||
}
|
||||
if (!subject->IsFlat()) FlattenString(subject);
|
||||
|
||||
// Check the asciiness of the underlying storage.
|
||||
bool is_ascii;
|
||||
{
|
||||
AssertNoAllocation no_gc;
|
||||
String* sequential_string = *subject;
|
||||
if (subject->IsConsString()) {
|
||||
sequential_string = ConsString::cast(*subject)->first();
|
||||
}
|
||||
is_ascii = sequential_string->IsAsciiRepresentation();
|
||||
}
|
||||
if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
|
||||
return -1;
|
||||
}
|
||||
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
|
||||
|
||||
#ifdef V8_INTERPRETED_REGEXP
|
||||
// Byte-code regexp needs space allocated for all its registers.
|
||||
return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
|
||||
@ -474,15 +461,11 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
|
||||
ASSERT(index <= subject->length());
|
||||
ASSERT(subject->IsFlat());
|
||||
|
||||
// A flat ASCII string might have a two-byte first part.
|
||||
if (subject->IsConsString()) {
|
||||
subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
|
||||
}
|
||||
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
|
||||
#ifndef V8_INTERPRETED_REGEXP
|
||||
ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
|
||||
do {
|
||||
bool is_ascii = subject->IsAsciiRepresentation();
|
||||
EnsureCompiledIrregexp(regexp, is_ascii);
|
||||
Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
|
||||
NativeRegExpMacroAssembler::Result res =
|
||||
@ -510,13 +493,13 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
|
||||
// being internal and external, and even between being ASCII and UC16,
|
||||
// but the characters are always the same).
|
||||
IrregexpPrepare(regexp, subject);
|
||||
is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
} while (true);
|
||||
UNREACHABLE();
|
||||
return RE_EXCEPTION;
|
||||
#else // V8_INTERPRETED_REGEXP
|
||||
|
||||
ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
|
||||
bool is_ascii = subject->IsAsciiRepresentation();
|
||||
// We must have done EnsureCompiledIrregexp, so we can get the number of
|
||||
// registers.
|
||||
int* register_vector = output.start();
|
||||
|
@ -394,6 +394,10 @@ class StaticMarkingVisitor : public StaticVisitorBase {
|
||||
ConsString::BodyDescriptor,
|
||||
void>::Visit);
|
||||
|
||||
table_.Register(kVisitSlicedString,
|
||||
&FixedBodyVisitor<StaticMarkingVisitor,
|
||||
SlicedString::BodyDescriptor,
|
||||
void>::Visit);
|
||||
|
||||
table_.Register(kVisitFixedArray,
|
||||
&FlexibleBodyVisitor<StaticMarkingVisitor,
|
||||
|
@ -352,6 +352,31 @@ void String::StringVerify() {
|
||||
if (IsSymbol()) {
|
||||
CHECK(!HEAP->InNewSpace(this));
|
||||
}
|
||||
if (IsConsString()) {
|
||||
ConsString::cast(this)->ConsStringVerify();
|
||||
} else if (IsSlicedString()) {
|
||||
SlicedString::cast(this)->SlicedStringVerify();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ConsString::ConsStringVerify() {
|
||||
CHECK(this->first()->IsString());
|
||||
CHECK(this->second() == GetHeap()->empty_string() ||
|
||||
this->second()->IsString());
|
||||
CHECK(this->length() >= String::kMinNonFlatLength);
|
||||
if (this->IsFlat()) {
|
||||
// A flat cons can only be created by String::SlowTryFlatten.
|
||||
// Afterwards, the first part may be externalized.
|
||||
CHECK(this->first()->IsSeqString() || this->first()->IsExternalString());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SlicedString::SlicedStringVerify() {
|
||||
CHECK(!this->parent()->IsConsString());
|
||||
CHECK(!this->parent()->IsSlicedString());
|
||||
CHECK(this->length() >= SlicedString::kMinLength);
|
||||
}
|
||||
|
||||
|
||||
|
@ -178,10 +178,14 @@ bool Object::IsSymbol() {
|
||||
|
||||
|
||||
bool Object::IsConsString() {
|
||||
if (!this->IsHeapObject()) return false;
|
||||
uint32_t type = HeapObject::cast(this)->map()->instance_type();
|
||||
return (type & (kIsNotStringMask | kStringRepresentationMask)) ==
|
||||
(kStringTag | kConsStringTag);
|
||||
if (!IsString()) return false;
|
||||
return StringShape(String::cast(this)).IsCons();
|
||||
}
|
||||
|
||||
|
||||
bool Object::IsSlicedString() {
|
||||
if (!IsString()) return false;
|
||||
return StringShape(String::cast(this)).IsSliced();
|
||||
}
|
||||
|
||||
|
||||
@ -269,6 +273,38 @@ bool String::IsTwoByteRepresentation() {
|
||||
}
|
||||
|
||||
|
||||
bool String::IsAsciiRepresentationUnderneath() {
|
||||
uint32_t type = map()->instance_type();
|
||||
STATIC_ASSERT(kIsIndirectStringTag != 0);
|
||||
STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
|
||||
ASSERT(IsFlat());
|
||||
switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
|
||||
case kAsciiStringTag:
|
||||
return true;
|
||||
case kTwoByteStringTag:
|
||||
return false;
|
||||
default: // Cons or sliced string. Need to go deeper.
|
||||
return GetUnderlying()->IsAsciiRepresentation();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool String::IsTwoByteRepresentationUnderneath() {
|
||||
uint32_t type = map()->instance_type();
|
||||
STATIC_ASSERT(kIsIndirectStringTag != 0);
|
||||
STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
|
||||
ASSERT(IsFlat());
|
||||
switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
|
||||
case kAsciiStringTag:
|
||||
return false;
|
||||
case kTwoByteStringTag:
|
||||
return true;
|
||||
default: // Cons or sliced string. Need to go deeper.
|
||||
return GetUnderlying()->IsTwoByteRepresentation();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool String::HasOnlyAsciiChars() {
|
||||
uint32_t type = map()->instance_type();
|
||||
return (type & kStringEncodingMask) == kAsciiStringTag ||
|
||||
@ -281,6 +317,16 @@ bool StringShape::IsCons() {
|
||||
}
|
||||
|
||||
|
||||
bool StringShape::IsSliced() {
|
||||
return (type_ & kStringRepresentationMask) == kSlicedStringTag;
|
||||
}
|
||||
|
||||
|
||||
bool StringShape::IsIndirect() {
|
||||
return (type_ & kIsIndirectStringMask) == kIsIndirectStringTag;
|
||||
}
|
||||
|
||||
|
||||
bool StringShape::IsExternal() {
|
||||
return (type_ & kStringRepresentationMask) == kExternalStringTag;
|
||||
}
|
||||
@ -2075,6 +2121,7 @@ CAST_ACCESSOR(String)
|
||||
CAST_ACCESSOR(SeqString)
|
||||
CAST_ACCESSOR(SeqAsciiString)
|
||||
CAST_ACCESSOR(SeqTwoByteString)
|
||||
CAST_ACCESSOR(SlicedString)
|
||||
CAST_ACCESSOR(ConsString)
|
||||
CAST_ACCESSOR(ExternalString)
|
||||
CAST_ACCESSOR(ExternalAsciiString)
|
||||
@ -2156,7 +2203,7 @@ bool String::Equals(String* other) {
|
||||
MaybeObject* String::TryFlatten(PretenureFlag pretenure) {
|
||||
if (!StringShape(this).IsCons()) return this;
|
||||
ConsString* cons = ConsString::cast(this);
|
||||
if (cons->second()->length() == 0) return cons->first();
|
||||
if (cons->IsFlat()) return cons->first();
|
||||
return SlowTryFlatten(pretenure);
|
||||
}
|
||||
|
||||
@ -2164,10 +2211,8 @@ MaybeObject* String::TryFlatten(PretenureFlag pretenure) {
|
||||
String* String::TryFlattenGetString(PretenureFlag pretenure) {
|
||||
MaybeObject* flat = TryFlatten(pretenure);
|
||||
Object* successfully_flattened;
|
||||
if (flat->ToObject(&successfully_flattened)) {
|
||||
return String::cast(successfully_flattened);
|
||||
}
|
||||
return this;
|
||||
if (!flat->ToObject(&successfully_flattened)) return this;
|
||||
return String::cast(successfully_flattened);
|
||||
}
|
||||
|
||||
|
||||
@ -2185,6 +2230,9 @@ uint16_t String::Get(int index) {
|
||||
return ExternalAsciiString::cast(this)->ExternalAsciiStringGet(index);
|
||||
case kExternalStringTag | kTwoByteStringTag:
|
||||
return ExternalTwoByteString::cast(this)->ExternalTwoByteStringGet(index);
|
||||
case kSlicedStringTag | kAsciiStringTag:
|
||||
case kSlicedStringTag | kTwoByteStringTag:
|
||||
return SlicedString::cast(this)->SlicedStringGet(index);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -2205,15 +2253,19 @@ void String::Set(int index, uint16_t value) {
|
||||
|
||||
|
||||
bool String::IsFlat() {
|
||||
switch (StringShape(this).representation_tag()) {
|
||||
case kConsStringTag: {
|
||||
String* second = ConsString::cast(this)->second();
|
||||
// Only flattened strings have second part empty.
|
||||
return second->length() == 0;
|
||||
}
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
if (!StringShape(this).IsCons()) return true;
|
||||
return ConsString::cast(this)->second()->length() == 0;
|
||||
}
|
||||
|
||||
|
||||
String* String::GetUnderlying() {
|
||||
// Giving direct access to underlying string only makes sense if the
|
||||
// wrapping string is already flattened.
|
||||
ASSERT(this->IsFlat());
|
||||
ASSERT(StringShape(this).IsIndirect());
|
||||
STATIC_ASSERT(ConsString::kFirstOffset == SlicedString::kParentOffset);
|
||||
const int kUnderlyingOffset = SlicedString::kParentOffset;
|
||||
return String::cast(READ_FIELD(this, kUnderlyingOffset));
|
||||
}
|
||||
|
||||
|
||||
@ -2272,6 +2324,20 @@ int SeqAsciiString::SeqAsciiStringSize(InstanceType instance_type) {
|
||||
}
|
||||
|
||||
|
||||
String* SlicedString::parent() {
|
||||
return String::cast(READ_FIELD(this, kParentOffset));
|
||||
}
|
||||
|
||||
|
||||
void SlicedString::set_parent(String* parent) {
|
||||
ASSERT(parent->IsSeqString());
|
||||
WRITE_FIELD(this, kParentOffset, parent);
|
||||
}
|
||||
|
||||
|
||||
SMI_ACCESSORS(SlicedString, offset, kOffsetOffset)
|
||||
|
||||
|
||||
String* ConsString::first() {
|
||||
return String::cast(READ_FIELD(this, kFirstOffset));
|
||||
}
|
||||
|
@ -58,6 +58,9 @@ StaticVisitorBase::VisitorId StaticVisitorBase::GetVisitorId(
|
||||
return kVisitConsString;
|
||||
}
|
||||
|
||||
case kSlicedStringTag:
|
||||
return kVisitSlicedString;
|
||||
|
||||
case kExternalStringTag:
|
||||
return GetVisitorIdForSize(kVisitDataObject,
|
||||
kVisitDataObjectGeneric,
|
||||
|
@ -115,6 +115,7 @@ class StaticVisitorBase : public AllStatic {
|
||||
kVisitStructGeneric,
|
||||
|
||||
kVisitConsString,
|
||||
kVisitSlicedString,
|
||||
kVisitOddball,
|
||||
kVisitCode,
|
||||
kVisitMap,
|
||||
@ -299,6 +300,11 @@ class StaticNewSpaceVisitor : public StaticVisitorBase {
|
||||
ConsString::BodyDescriptor,
|
||||
int>::Visit);
|
||||
|
||||
table_.Register(kVisitSlicedString,
|
||||
&FixedBodyVisitor<StaticVisitor,
|
||||
SlicedString::BodyDescriptor,
|
||||
int>::Visit);
|
||||
|
||||
table_.Register(kVisitFixedArray,
|
||||
&FlexibleBodyVisitor<StaticVisitor,
|
||||
FixedArray::BodyDescriptor,
|
||||
|
@ -1208,6 +1208,9 @@ void HeapObject::IterateBody(InstanceType type, int object_size,
|
||||
case kConsStringTag:
|
||||
ConsString::BodyDescriptor::IterateBody(this, v);
|
||||
break;
|
||||
case kSlicedStringTag:
|
||||
SlicedString::BodyDescriptor::IterateBody(this, v);
|
||||
break;
|
||||
case kExternalStringTag:
|
||||
if ((type & kStringEncodingMask) == kAsciiStringTag) {
|
||||
reinterpret_cast<ExternalAsciiString*>(this)->
|
||||
@ -5042,6 +5045,7 @@ String::FlatContent String::GetFlatContent() {
|
||||
int length = this->length();
|
||||
StringShape shape(this);
|
||||
String* string = this;
|
||||
int offset = 0;
|
||||
if (shape.representation_tag() == kConsStringTag) {
|
||||
ConsString* cons = ConsString::cast(string);
|
||||
if (cons->second()->length() != 0) {
|
||||
@ -5050,6 +5054,14 @@ String::FlatContent String::GetFlatContent() {
|
||||
string = cons->first();
|
||||
shape = StringShape(string);
|
||||
}
|
||||
if (shape.representation_tag() == kSlicedStringTag) {
|
||||
SlicedString* slice = SlicedString::cast(string);
|
||||
offset = slice->offset();
|
||||
string = slice->parent();
|
||||
shape = StringShape(string);
|
||||
ASSERT(shape.representation_tag() != kConsStringTag &&
|
||||
shape.representation_tag() != kSlicedStringTag);
|
||||
}
|
||||
if (shape.encoding_tag() == kAsciiStringTag) {
|
||||
const char* start;
|
||||
if (shape.representation_tag() == kSeqStringTag) {
|
||||
@ -5057,7 +5069,7 @@ String::FlatContent String::GetFlatContent() {
|
||||
} else {
|
||||
start = ExternalAsciiString::cast(string)->resource()->data();
|
||||
}
|
||||
return FlatContent(Vector<const char>(start, length));
|
||||
return FlatContent(Vector<const char>(start + offset, length));
|
||||
} else {
|
||||
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
|
||||
const uc16* start;
|
||||
@ -5066,7 +5078,7 @@ String::FlatContent String::GetFlatContent() {
|
||||
} else {
|
||||
start = ExternalTwoByteString::cast(string)->resource()->data();
|
||||
}
|
||||
return FlatContent(Vector<const uc16>(start, length));
|
||||
return FlatContent(Vector<const uc16>(start + offset, length));
|
||||
}
|
||||
}
|
||||
|
||||
@ -5138,13 +5150,17 @@ const uc16* String::GetTwoByteData() {
|
||||
|
||||
|
||||
const uc16* String::GetTwoByteData(unsigned start) {
|
||||
ASSERT(!IsAsciiRepresentation());
|
||||
ASSERT(!IsAsciiRepresentationUnderneath());
|
||||
switch (StringShape(this).representation_tag()) {
|
||||
case kSeqStringTag:
|
||||
return SeqTwoByteString::cast(this)->SeqTwoByteStringGetData(start);
|
||||
case kExternalStringTag:
|
||||
return ExternalTwoByteString::cast(this)->
|
||||
ExternalTwoByteStringGetData(start);
|
||||
case kSlicedStringTag: {
|
||||
SlicedString* slice = SlicedString::cast(this);
|
||||
return slice->parent()->GetTwoByteData(start + slice->offset());
|
||||
}
|
||||
case kConsStringTag:
|
||||
UNREACHABLE();
|
||||
return NULL;
|
||||
@ -5435,6 +5451,10 @@ const unibrow::byte* String::ReadBlock(String* input,
|
||||
max_chars);
|
||||
return rbb->util_buffer;
|
||||
}
|
||||
case kSlicedStringTag:
|
||||
return SlicedString::cast(input)->SlicedStringReadBlock(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -5578,6 +5598,11 @@ void String::ReadBlockIntoBuffer(String* input,
|
||||
max_chars);
|
||||
}
|
||||
return;
|
||||
case kSlicedStringTag:
|
||||
SlicedString::cast(input)->SlicedStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -5712,6 +5737,31 @@ uint16_t ConsString::ConsStringGet(int index) {
|
||||
}
|
||||
|
||||
|
||||
uint16_t SlicedString::SlicedStringGet(int index) {
|
||||
return parent()->Get(offset() + index);
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* SlicedString::SlicedStringReadBlock(
|
||||
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
|
||||
unsigned offset = this->offset();
|
||||
*offset_ptr += offset;
|
||||
const unibrow::byte* answer = String::ReadBlock(String::cast(parent()),
|
||||
buffer, offset_ptr, chars);
|
||||
*offset_ptr -= offset;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
void SlicedString::SlicedStringReadBlockIntoBuffer(
|
||||
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
|
||||
unsigned offset = this->offset();
|
||||
*offset_ptr += offset;
|
||||
String::ReadBlockIntoBuffer(String::cast(parent()),
|
||||
buffer, offset_ptr, chars);
|
||||
*offset_ptr -= offset;
|
||||
}
|
||||
|
||||
template <typename sinkchar>
|
||||
void String::WriteToFlat(String* src,
|
||||
sinkchar* sink,
|
||||
@ -5779,6 +5829,13 @@ void String::WriteToFlat(String* src,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kAsciiStringTag | kSlicedStringTag:
|
||||
case kTwoByteStringTag | kSlicedStringTag: {
|
||||
SlicedString* slice = SlicedString::cast(source);
|
||||
unsigned offset = slice->offset();
|
||||
WriteToFlat(slice->parent(), sink, from + offset, to + offset);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
102
src/objects.h
102
src/objects.h
@ -89,6 +89,7 @@
|
||||
// - SeqString
|
||||
// - SeqAsciiString
|
||||
// - SeqTwoByteString
|
||||
// - SlicedString
|
||||
// - ConsString
|
||||
// - ExternalString
|
||||
// - ExternalAsciiString
|
||||
@ -283,6 +284,7 @@ static const int kVariableSizeSentinel = 0;
|
||||
V(ASCII_STRING_TYPE) \
|
||||
V(CONS_STRING_TYPE) \
|
||||
V(CONS_ASCII_STRING_TYPE) \
|
||||
V(SLICED_STRING_TYPE) \
|
||||
V(EXTERNAL_STRING_TYPE) \
|
||||
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE) \
|
||||
V(EXTERNAL_ASCII_STRING_TYPE) \
|
||||
@ -401,6 +403,14 @@ static const int kVariableSizeSentinel = 0;
|
||||
ConsString::kSize, \
|
||||
cons_ascii_string, \
|
||||
ConsAsciiString) \
|
||||
V(SLICED_STRING_TYPE, \
|
||||
SlicedString::kSize, \
|
||||
sliced_string, \
|
||||
SlicedString) \
|
||||
V(SLICED_ASCII_STRING_TYPE, \
|
||||
SlicedString::kSize, \
|
||||
sliced_ascii_string, \
|
||||
SlicedAsciiString) \
|
||||
V(EXTERNAL_STRING_TYPE, \
|
||||
ExternalTwoByteString::kSize, \
|
||||
external_string, \
|
||||
@ -474,9 +484,17 @@ const uint32_t kStringRepresentationMask = 0x03;
|
||||
enum StringRepresentationTag {
|
||||
kSeqStringTag = 0x0,
|
||||
kConsStringTag = 0x1,
|
||||
kExternalStringTag = 0x2
|
||||
kExternalStringTag = 0x2,
|
||||
kSlicedStringTag = 0x3
|
||||
};
|
||||
const uint32_t kIsConsStringMask = 0x1;
|
||||
const uint32_t kIsIndirectStringMask = 0x1;
|
||||
const uint32_t kIsIndirectStringTag = 0x1;
|
||||
STATIC_ASSERT((kSeqStringTag & kIsIndirectStringMask) == 0);
|
||||
STATIC_ASSERT((kExternalStringTag & kIsIndirectStringMask) == 0);
|
||||
STATIC_ASSERT(
|
||||
(kConsStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
|
||||
STATIC_ASSERT(
|
||||
(kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag);
|
||||
|
||||
// If bit 7 is clear, then bit 3 indicates whether this two-byte
|
||||
// string actually contains ascii data.
|
||||
@ -511,6 +529,8 @@ enum InstanceType {
|
||||
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
|
||||
CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
|
||||
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
|
||||
SLICED_STRING_TYPE = kTwoByteStringTag | kSlicedStringTag,
|
||||
SLICED_ASCII_STRING_TYPE = kAsciiStringTag | kSlicedStringTag,
|
||||
EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
|
||||
EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
|
||||
kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
|
||||
@ -718,6 +738,7 @@ class MaybeObject BASE_EMBEDDED {
|
||||
V(SeqString) \
|
||||
V(ExternalString) \
|
||||
V(ConsString) \
|
||||
V(SlicedString) \
|
||||
V(ExternalTwoByteString) \
|
||||
V(ExternalAsciiString) \
|
||||
V(SeqTwoByteString) \
|
||||
@ -5783,6 +5804,8 @@ class StringShape BASE_EMBEDDED {
|
||||
inline bool IsSequential();
|
||||
inline bool IsExternal();
|
||||
inline bool IsCons();
|
||||
inline bool IsSliced();
|
||||
inline bool IsIndirect();
|
||||
inline bool IsExternalAscii();
|
||||
inline bool IsExternalTwoByte();
|
||||
inline bool IsSequentialAscii();
|
||||
@ -5874,14 +5897,19 @@ class String: public HeapObject {
|
||||
inline uint32_t hash_field();
|
||||
inline void set_hash_field(uint32_t value);
|
||||
|
||||
inline bool IsAsciiRepresentation();
|
||||
inline bool IsTwoByteRepresentation();
|
||||
|
||||
// Returns whether this string has only ASCII chars, i.e. all of them can
|
||||
// be ASCII encoded. This might be the case even if the string is
|
||||
// two-byte. Such strings may appear when the embedder prefers
|
||||
// two-byte external representations even for ASCII data.
|
||||
//
|
||||
inline bool IsAsciiRepresentation();
|
||||
inline bool IsTwoByteRepresentation();
|
||||
|
||||
// Cons and slices have an encoding flag that may not represent the actual
|
||||
// encoding of the underlying string. This is taken into account here.
|
||||
// Requires: this->IsFlat()
|
||||
inline bool IsAsciiRepresentationUnderneath();
|
||||
inline bool IsTwoByteRepresentationUnderneath();
|
||||
|
||||
// NOTE: this should be considered only a hint. False negatives are
|
||||
// possible.
|
||||
inline bool HasOnlyAsciiChars();
|
||||
@ -5921,6 +5949,10 @@ class String: public HeapObject {
|
||||
// kind.
|
||||
FlatContent GetFlatContent();
|
||||
|
||||
// Returns the parent of a sliced string or first part of a flat cons string.
|
||||
// Requires: StringShape(this).IsIndirect() && this->IsFlat()
|
||||
inline String* GetUnderlying();
|
||||
|
||||
// Mark the string as an undetectable object. It only applies to
|
||||
// ascii and two byte string types.
|
||||
bool MarkAsUndetectable();
|
||||
@ -6349,11 +6381,69 @@ class ConsString: public String {
|
||||
typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize>
|
||||
BodyDescriptor;
|
||||
|
||||
#ifdef DEBUG
|
||||
void ConsStringVerify();
|
||||
#endif
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString);
|
||||
};
|
||||
|
||||
|
||||
// The Sliced String class describes strings that are substrings of another
|
||||
// sequential string. The motivation is to save time and memory when creating
|
||||
// a substring. A Sliced String is described as a pointer to the parent,
|
||||
// the offset from the start of the parent string and the length. Using
|
||||
// a Sliced String therefore requires unpacking of the parent string and
|
||||
// adding the offset to the start address. A substring of a Sliced String
|
||||
// are not nested since the double indirection is simplified when creating
|
||||
// such a substring.
|
||||
// Currently missing features are:
|
||||
// - handling externalized parent strings
|
||||
// - external strings as parent
|
||||
// - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
|
||||
class SlicedString: public String {
|
||||
public:
|
||||
|
||||
inline String* parent();
|
||||
inline void set_parent(String* parent);
|
||||
inline int offset();
|
||||
inline void set_offset(int offset);
|
||||
|
||||
// Dispatched behavior.
|
||||
uint16_t SlicedStringGet(int index);
|
||||
|
||||
// Casting.
|
||||
static inline SlicedString* cast(Object* obj);
|
||||
|
||||
// Layout description.
|
||||
static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize);
|
||||
static const int kOffsetOffset = kParentOffset + kPointerSize;
|
||||
static const int kSize = kOffsetOffset + kPointerSize;
|
||||
|
||||
// Support for StringInputBuffer
|
||||
inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
// Minimum length for a sliced string.
|
||||
static const int kMinLength = 13;
|
||||
|
||||
typedef FixedBodyDescriptor<kParentOffset,
|
||||
kOffsetOffset + kPointerSize, kSize>
|
||||
BodyDescriptor;
|
||||
|
||||
#ifdef DEBUG
|
||||
void SlicedStringVerify();
|
||||
#endif
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString);
|
||||
};
|
||||
|
||||
|
||||
// The ExternalString class describes string values that are backed by
|
||||
// a string resource that lies outside the V8 heap. ExternalStrings
|
||||
// consist of the length field common to all strings, a pointer to the
|
||||
|
@ -120,27 +120,31 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
|
||||
String* subject_ptr = *subject;
|
||||
// Character offsets into string.
|
||||
int start_offset = previous_index;
|
||||
int end_offset = subject_ptr->length();
|
||||
int char_length = subject_ptr->length() - start_offset;
|
||||
int slice_offset = 0;
|
||||
|
||||
// The string has been flattened, so it it is a cons string it contains the
|
||||
// The string has been flattened, so if it is a cons string it contains the
|
||||
// full string in the first part.
|
||||
if (StringShape(subject_ptr).IsCons()) {
|
||||
ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
|
||||
subject_ptr = ConsString::cast(subject_ptr)->first();
|
||||
} else if (StringShape(subject_ptr).IsSliced()) {
|
||||
SlicedString* slice = SlicedString::cast(subject_ptr);
|
||||
subject_ptr = slice->parent();
|
||||
slice_offset = slice->offset();
|
||||
}
|
||||
// Ensure that an underlying string has the same ascii-ness.
|
||||
bool is_ascii = subject_ptr->IsAsciiRepresentation();
|
||||
ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
|
||||
// String is now either Sequential or External
|
||||
int char_size_shift = is_ascii ? 0 : 1;
|
||||
int char_length = end_offset - start_offset;
|
||||
|
||||
const byte* input_start =
|
||||
StringCharacterPosition(subject_ptr, start_offset);
|
||||
StringCharacterPosition(subject_ptr, start_offset + slice_offset);
|
||||
int byte_length = char_length << char_size_shift;
|
||||
const byte* input_end = input_start + byte_length;
|
||||
Result res = Execute(*regexp_code,
|
||||
subject_ptr,
|
||||
*subject,
|
||||
start_offset,
|
||||
input_start,
|
||||
input_end,
|
||||
@ -152,7 +156,7 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
|
||||
|
||||
NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
|
||||
Code* code,
|
||||
String* input,
|
||||
String* input, // This needs to be the unpacked (sliced, cons) string.
|
||||
int start_offset,
|
||||
const byte* input_start,
|
||||
const byte* input_end,
|
||||
|
@ -3674,7 +3674,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
|
||||
HandleScope handles(isolate);
|
||||
|
||||
CONVERT_ARG_CHECKED(String, subject, 1);
|
||||
if (!subject->IsFlat()) { FlattenString(subject); }
|
||||
if (!subject->IsFlat()) FlattenString(subject);
|
||||
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
|
||||
CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
|
||||
CONVERT_ARG_CHECKED(JSArray, result_array, 3);
|
||||
|
@ -2374,7 +2374,6 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ testq(kScratchRegister, kScratchRegister);
|
||||
__ j(zero, &runtime);
|
||||
|
||||
|
||||
// Check that the first argument is a JSRegExp object.
|
||||
__ movq(rax, Operand(rsp, kJSRegExpOffset));
|
||||
__ JumpIfSmi(rax, &runtime);
|
||||
@ -2445,10 +2444,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
__ cmpl(rdx, rdi);
|
||||
__ j(greater, &runtime);
|
||||
|
||||
// Reset offset for possibly sliced string.
|
||||
__ Set(r14, 0);
|
||||
// rax: RegExp data (FixedArray)
|
||||
// Check the representation and encoding of the subject string.
|
||||
Label seq_ascii_string, seq_two_byte_string, check_code;
|
||||
__ movq(rdi, Operand(rsp, kSubjectOffset));
|
||||
// Make a copy of the original subject string.
|
||||
__ movq(r15, rdi);
|
||||
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
|
||||
__ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset));
|
||||
// First check for flat two byte string.
|
||||
@ -2457,28 +2460,40 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0);
|
||||
__ j(zero, &seq_two_byte_string, Label::kNear);
|
||||
// Any other flat string must be a flat ascii string.
|
||||
__ testb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
|
||||
__ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask));
|
||||
__ j(zero, &seq_ascii_string, Label::kNear);
|
||||
|
||||
// Check for flat cons string.
|
||||
// Check for flat cons string or sliced string.
|
||||
// A flat cons string is a cons string where the second part is the empty
|
||||
// string. In that case the subject string is just the first part of the cons
|
||||
// string. Also in this case the first part of the cons string is known to be
|
||||
// a sequential string or an external string.
|
||||
STATIC_ASSERT(kExternalStringTag !=0);
|
||||
STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
|
||||
__ testb(rbx, Immediate(kIsNotStringMask | kExternalStringTag));
|
||||
__ j(not_zero, &runtime);
|
||||
// String is a cons string.
|
||||
// In the case of a sliced string its offset has to be taken into account.
|
||||
Label cons_string, check_encoding;
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmpq(rbx, Immediate(kExternalStringTag));
|
||||
__ j(less, &cons_string, Label::kNear);
|
||||
__ j(equal, &runtime);
|
||||
|
||||
// String is sliced.
|
||||
__ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset));
|
||||
__ movq(rdi, FieldOperand(rdi, SlicedString::kParentOffset));
|
||||
// r14: slice offset
|
||||
// r15: original subject string
|
||||
// rdi: parent string
|
||||
__ jmp(&check_encoding, Label::kNear);
|
||||
// String is a cons string, check whether it is flat.
|
||||
__ bind(&cons_string);
|
||||
__ CompareRoot(FieldOperand(rdi, ConsString::kSecondOffset),
|
||||
Heap::kEmptyStringRootIndex);
|
||||
__ j(not_equal, &runtime);
|
||||
__ movq(rdi, FieldOperand(rdi, ConsString::kFirstOffset));
|
||||
// rdi: first part of cons string or parent of sliced string.
|
||||
// rbx: map of first part of cons string or map of parent of sliced string.
|
||||
// Is first part of cons or parent of slice a flat two byte string?
|
||||
__ bind(&check_encoding);
|
||||
__ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset));
|
||||
// String is a cons string with empty second part.
|
||||
// rdi: first part of cons string.
|
||||
// rbx: map of first part of cons string.
|
||||
// Is first part a flat two byte string?
|
||||
__ testb(FieldOperand(rbx, Map::kInstanceTypeOffset),
|
||||
Immediate(kStringRepresentationMask | kStringEncodingMask));
|
||||
STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0);
|
||||
@ -2575,33 +2590,40 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
|
||||
// rbx: previous index
|
||||
// rcx: encoding of subject string (1 if ascii 0 if two_byte);
|
||||
// r11: code
|
||||
// r14: slice offset
|
||||
// r15: original subject string
|
||||
|
||||
// Argument 4: End of string data
|
||||
// Argument 3: Start of string data
|
||||
Label setup_two_byte, setup_rest;
|
||||
__ testb(rcx, rcx); // Last use of rcx as encoding of subject string.
|
||||
__ j(zero, &setup_two_byte, Label::kNear);
|
||||
__ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
|
||||
__ lea(arg4, FieldOperand(rdi, rcx, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ lea(arg3, FieldOperand(rdi, rbx, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ jmp(&setup_rest, Label::kNear);
|
||||
__ bind(&setup_two_byte);
|
||||
__ SmiToInteger32(rcx, FieldOperand(rdi, String::kLengthOffset));
|
||||
__ lea(arg4, FieldOperand(rdi, rcx, times_2, SeqTwoByteString::kHeaderSize));
|
||||
__ lea(arg3, FieldOperand(rdi, rbx, times_2, SeqTwoByteString::kHeaderSize));
|
||||
|
||||
__ bind(&setup_rest);
|
||||
// Argument 2: Previous index.
|
||||
__ movq(arg2, rbx);
|
||||
|
||||
// Argument 1: Subject string.
|
||||
#ifdef _WIN64
|
||||
__ movq(arg1, rdi);
|
||||
#else
|
||||
// Already there in AMD64 calling convention.
|
||||
ASSERT(arg1.is(rdi));
|
||||
USE(arg1);
|
||||
#endif
|
||||
// Argument 4: End of string data
|
||||
// Argument 3: Start of string data
|
||||
Label setup_two_byte, setup_rest, got_length, length_not_from_slice;
|
||||
// Prepare start and end index of the input.
|
||||
// Load the length from the original sliced string if that is the case.
|
||||
__ addq(rbx, r14);
|
||||
__ SmiToInteger32(arg3, FieldOperand(r15, String::kLengthOffset));
|
||||
__ addq(r14, arg3); // Using arg3 as scratch.
|
||||
|
||||
// rbx: start index of the input
|
||||
// r14: end index of the input
|
||||
// r15: original subject string
|
||||
__ testb(rcx, rcx); // Last use of rcx as encoding of subject string.
|
||||
__ j(zero, &setup_two_byte, Label::kNear);
|
||||
__ lea(arg4, FieldOperand(rdi, r14, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ lea(arg3, FieldOperand(rdi, rbx, times_1, SeqAsciiString::kHeaderSize));
|
||||
__ jmp(&setup_rest, Label::kNear);
|
||||
__ bind(&setup_two_byte);
|
||||
__ lea(arg4, FieldOperand(rdi, r14, times_2, SeqTwoByteString::kHeaderSize));
|
||||
__ lea(arg3, FieldOperand(rdi, rbx, times_2, SeqTwoByteString::kHeaderSize));
|
||||
__ bind(&setup_rest);
|
||||
|
||||
// Argument 1: Original subject string.
|
||||
// The original subject is in the previous stack frame. Therefore we have to
|
||||
// use rbp, which points exactly to one pointer size below the previous rsp.
|
||||
// (Because creating a new stack frame pushes the previous rbp onto the stack
|
||||
// and thereby moves up rsp by one kPointerSize.)
|
||||
__ movq(arg1, r15);
|
||||
|
||||
// Locate the code entry and call it.
|
||||
__ addq(r11, Immediate(Code::kHeaderSize - kHeapObjectTag));
|
||||
@ -3851,6 +3873,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
Label flat_string;
|
||||
Label ascii_string;
|
||||
Label got_char_code;
|
||||
Label sliced_string;
|
||||
|
||||
// If the receiver is a smi trigger the non-string case.
|
||||
__ JumpIfSmi(object_, receiver_not_string_);
|
||||
@ -3879,25 +3902,39 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
|
||||
__ j(zero, &flat_string);
|
||||
|
||||
// Handle non-flat strings.
|
||||
__ testb(result_, Immediate(kIsConsStringMask));
|
||||
__ j(zero, &call_runtime_);
|
||||
__ and_(result_, Immediate(kStringRepresentationMask));
|
||||
STATIC_ASSERT((kConsStringTag < kExternalStringTag));
|
||||
STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
|
||||
__ cmpb(result_, Immediate(kExternalStringTag));
|
||||
__ j(greater, &sliced_string);
|
||||
__ j(equal, &call_runtime_);
|
||||
|
||||
// ConsString.
|
||||
// Check whether the right hand side is the empty string (i.e. if
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
Label assure_seq_string;
|
||||
__ CompareRoot(FieldOperand(object_, ConsString::kSecondOffset),
|
||||
Heap::kEmptyStringRootIndex);
|
||||
__ j(not_equal, &call_runtime_);
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ movq(object_, FieldOperand(object_, ConsString::kFirstOffset));
|
||||
__ jmp(&assure_seq_string, Label::kNear);
|
||||
|
||||
// SlicedString, unpack and add offset.
|
||||
__ bind(&sliced_string);
|
||||
__ addq(scratch_, FieldOperand(object_, SlicedString::kOffsetOffset));
|
||||
__ movq(object_, FieldOperand(object_, SlicedString::kParentOffset));
|
||||
|
||||
__ bind(&assure_seq_string);
|
||||
__ movq(result_, FieldOperand(object_, HeapObject::kMapOffset));
|
||||
__ movzxbl(result_, FieldOperand(result_, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ testb(result_, Immediate(kStringRepresentationMask));
|
||||
__ j(not_zero, &call_runtime_);
|
||||
__ jmp(&flat_string);
|
||||
|
||||
// Check for 1-byte or 2-byte string.
|
||||
__ bind(&flat_string);
|
||||
@ -4208,6 +4245,8 @@ void StringAddStub::Generate(MacroAssembler* masm) {
|
||||
__ and_(rcx, Immediate(kStringRepresentationMask));
|
||||
__ cmpl(rcx, Immediate(kExternalStringTag));
|
||||
__ j(equal, &string_add_runtime);
|
||||
// We cannot encounter sliced strings here since:
|
||||
STATIC_ASSERT(SlicedString::kMinLength >= String::kMinNonFlatLength);
|
||||
// Now check if both strings are ascii strings.
|
||||
// rax: first string
|
||||
// rbx: length of resulting flat string
|
||||
@ -4600,6 +4639,9 @@ void StringHelper::GenerateHashGetHash(MacroAssembler* masm,
|
||||
void SubStringStub::Generate(MacroAssembler* masm) {
|
||||
Label runtime;
|
||||
|
||||
if (FLAG_string_slices) {
|
||||
__ jmp(&runtime);
|
||||
}
|
||||
// Stack frame on entry.
|
||||
// rsp[0]: return address
|
||||
// rsp[8]: to
|
||||
|
@ -3200,95 +3200,80 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) {
|
||||
};
|
||||
|
||||
Register string = ToRegister(instr->string());
|
||||
Register index = no_reg;
|
||||
int const_index = -1;
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
const_index = ToInteger32(LConstantOperand::cast(instr->index()));
|
||||
STATIC_ASSERT(String::kMaxLength <= Smi::kMaxValue);
|
||||
if (!Smi::IsValid(const_index)) {
|
||||
// Guaranteed to be out of bounds because of the assert above.
|
||||
// So the bounds check that must dominate this instruction must
|
||||
// have deoptimized already.
|
||||
if (FLAG_debug_code) {
|
||||
__ Abort("StringCharCodeAt: out of bounds index.");
|
||||
}
|
||||
// No code needs to be generated.
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
index = ToRegister(instr->index());
|
||||
}
|
||||
Register index = ToRegister(instr->index());
|
||||
Register result = ToRegister(instr->result());
|
||||
|
||||
DeferredStringCharCodeAt* deferred =
|
||||
new DeferredStringCharCodeAt(this, instr);
|
||||
|
||||
Label flat_string, ascii_string, done;
|
||||
|
||||
// Fetch the instance type of the receiver into result register.
|
||||
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
|
||||
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
|
||||
|
||||
// We need special handling for non-sequential strings.
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ testb(result, Immediate(kStringRepresentationMask));
|
||||
__ j(zero, &flat_string, Label::kNear);
|
||||
// We need special handling for indirect strings.
|
||||
Label check_sequential;
|
||||
__ testb(result, Immediate(kIsIndirectStringMask));
|
||||
__ j(zero, &check_sequential, Label::kNear);
|
||||
|
||||
// Handle cons strings and go to deferred code for the rest.
|
||||
__ testb(result, Immediate(kIsConsStringMask));
|
||||
__ j(zero, deferred->entry());
|
||||
// Dispatch on the indirect string shape: slice or cons.
|
||||
Label cons_string;
|
||||
const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag;
|
||||
ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0);
|
||||
__ testb(result, Immediate(kSlicedNotConsMask));
|
||||
__ j(zero, &cons_string, Label::kNear);
|
||||
|
||||
// ConsString.
|
||||
// Handle slices.
|
||||
Label indirect_string_loaded;
|
||||
__ SmiToInteger32(result, FieldOperand(string, SlicedString::kOffsetOffset));
|
||||
__ addq(index, result);
|
||||
__ movq(string, FieldOperand(string, SlicedString::kParentOffset));
|
||||
__ jmp(&indirect_string_loaded, Label::kNear);
|
||||
|
||||
// Handle conses.
|
||||
// Check whether the right hand side is the empty string (i.e. if
|
||||
// this is really a flat string in a cons string). If that is not
|
||||
// the case we would rather go to the runtime system now to flatten
|
||||
// the string.
|
||||
__ bind(&cons_string);
|
||||
__ CompareRoot(FieldOperand(string, ConsString::kSecondOffset),
|
||||
Heap::kEmptyStringRootIndex);
|
||||
__ j(not_equal, deferred->entry());
|
||||
// Get the first of the two strings and load its instance type.
|
||||
__ movq(string, FieldOperand(string, ConsString::kFirstOffset));
|
||||
|
||||
__ bind(&indirect_string_loaded);
|
||||
__ movq(result, FieldOperand(string, HeapObject::kMapOffset));
|
||||
__ movzxbl(result, FieldOperand(result, Map::kInstanceTypeOffset));
|
||||
// If the first cons component is also non-flat, then go to runtime.
|
||||
|
||||
// Check whether the string is sequential. The only non-sequential
|
||||
// shapes we support have just been unwrapped above.
|
||||
__ bind(&check_sequential);
|
||||
STATIC_ASSERT(kSeqStringTag == 0);
|
||||
__ testb(result, Immediate(kStringRepresentationMask));
|
||||
__ j(not_zero, deferred->entry());
|
||||
|
||||
// Check for ASCII or two-byte string.
|
||||
__ bind(&flat_string);
|
||||
// Dispatch on the encoding: ASCII or two-byte.
|
||||
Label ascii_string;
|
||||
STATIC_ASSERT(kAsciiStringTag != 0);
|
||||
__ testb(result, Immediate(kStringEncodingMask));
|
||||
__ j(not_zero, &ascii_string, Label::kNear);
|
||||
|
||||
// Two-byte string.
|
||||
// Load the two-byte character code into the result register.
|
||||
Label done;
|
||||
STATIC_ASSERT(kSmiTag == 0 && kSmiTagSize == 1);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ movzxwl(result,
|
||||
FieldOperand(string,
|
||||
SeqTwoByteString::kHeaderSize +
|
||||
(kUC16Size * const_index)));
|
||||
} else {
|
||||
__ movzxwl(result, FieldOperand(string,
|
||||
index,
|
||||
times_2,
|
||||
SeqTwoByteString::kHeaderSize));
|
||||
}
|
||||
__ movzxwl(result, FieldOperand(string,
|
||||
index,
|
||||
times_2,
|
||||
SeqTwoByteString::kHeaderSize));
|
||||
__ jmp(&done, Label::kNear);
|
||||
|
||||
// ASCII string.
|
||||
// Load the byte into the result register.
|
||||
__ bind(&ascii_string);
|
||||
if (instr->index()->IsConstantOperand()) {
|
||||
__ movzxbl(result, FieldOperand(string,
|
||||
SeqAsciiString::kHeaderSize + const_index));
|
||||
} else {
|
||||
__ movzxbl(result, FieldOperand(string,
|
||||
index,
|
||||
times_1,
|
||||
SeqAsciiString::kHeaderSize));
|
||||
}
|
||||
__ movzxbl(result, FieldOperand(string,
|
||||
index,
|
||||
times_1,
|
||||
SeqAsciiString::kHeaderSize));
|
||||
__ bind(&done);
|
||||
__ bind(deferred->exit());
|
||||
}
|
||||
|
@ -1984,8 +1984,8 @@ LInstruction* LChunkBuilder::DoStringAdd(HStringAdd* instr) {
|
||||
|
||||
|
||||
LInstruction* LChunkBuilder::DoStringCharCodeAt(HStringCharCodeAt* instr) {
|
||||
LOperand* string = UseRegister(instr->string());
|
||||
LOperand* index = UseRegisterOrConstant(instr->index());
|
||||
LOperand* string = UseTempRegister(instr->string());
|
||||
LOperand* index = UseTempRegister(instr->index());
|
||||
LStringCharCodeAt* result = new LStringCharCodeAt(string, index);
|
||||
return AssignEnvironment(AssignPointerMap(DefineAsRegister(result)));
|
||||
}
|
||||
|
@ -1170,12 +1170,13 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
|
||||
}
|
||||
|
||||
// Prepare for possible GC.
|
||||
HandleScope handles;
|
||||
HandleScope handles(isolate);
|
||||
Handle<Code> code_handle(re_code);
|
||||
|
||||
Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
|
||||
|
||||
// Current string.
|
||||
bool is_ascii = subject->IsAsciiRepresentation();
|
||||
bool is_ascii = subject->IsAsciiRepresentationUnderneath();
|
||||
|
||||
ASSERT(re_code->instruction_start() <= *return_address);
|
||||
ASSERT(*return_address <=
|
||||
@ -1184,7 +1185,7 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
|
||||
MaybeObject* result = Execution::HandleStackGuardInterrupt();
|
||||
|
||||
if (*code_handle != re_code) { // Return address no longer valid
|
||||
intptr_t delta = *code_handle - re_code;
|
||||
int delta = *code_handle - re_code;
|
||||
// Overwrite the return address on the stack.
|
||||
*return_address += delta;
|
||||
}
|
||||
@ -1193,8 +1194,20 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
|
||||
return EXCEPTION;
|
||||
}
|
||||
|
||||
Handle<String> subject_tmp = subject;
|
||||
int slice_offset = 0;
|
||||
|
||||
// Extract the underlying string and the slice offset.
|
||||
if (StringShape(*subject_tmp).IsCons()) {
|
||||
subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
|
||||
} else if (StringShape(*subject_tmp).IsSliced()) {
|
||||
SlicedString* slice = SlicedString::cast(*subject_tmp);
|
||||
subject_tmp = Handle<String>(slice->parent());
|
||||
slice_offset = slice->offset();
|
||||
}
|
||||
|
||||
// String might have changed.
|
||||
if (subject->IsAsciiRepresentation() != is_ascii) {
|
||||
if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
|
||||
// If we changed between an ASCII and an UC16 string, the specialized
|
||||
// code cannot be used, and we need to restart regexp matching from
|
||||
// scratch (including, potentially, compiling a new version of the code).
|
||||
@ -1205,8 +1218,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
|
||||
// be a sequential or external string with the same content.
|
||||
// Update the start and end pointers in the stack frame to the current
|
||||
// location (whether it has actually moved or not).
|
||||
ASSERT(StringShape(*subject).IsSequential() ||
|
||||
StringShape(*subject).IsExternal());
|
||||
ASSERT(StringShape(*subject_tmp).IsSequential() ||
|
||||
StringShape(*subject_tmp).IsExternal());
|
||||
|
||||
// The original start address of the characters to match.
|
||||
const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
|
||||
@ -1214,7 +1227,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
|
||||
// Find the current start address of the same character at the current string
|
||||
// position.
|
||||
int start_index = frame_entry<int>(re_frame, kStartIndex);
|
||||
const byte* new_address = StringCharacterPosition(*subject, start_index);
|
||||
const byte* new_address = StringCharacterPosition(*subject_tmp,
|
||||
start_index + slice_offset);
|
||||
|
||||
if (start_address != new_address) {
|
||||
// If there is a difference, update the object pointer and start and end
|
||||
|
@ -2174,7 +2174,7 @@ TEST(ScriptBreakPointLine) {
|
||||
f = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("f")));
|
||||
g = v8::Local<v8::Function>::Cast(env->Global()->Get(v8::String::New("g")));
|
||||
|
||||
// Chesk that a break point was hit when the script was run.
|
||||
// Check that a break point was hit when the script was run.
|
||||
CHECK_EQ(1, break_point_hit_count);
|
||||
CHECK_EQ(0, StrLength(last_function_hit));
|
||||
|
||||
|
@ -430,8 +430,7 @@ TEST(ExternalShortStringAdd) {
|
||||
" return 0;"
|
||||
"};"
|
||||
"test()";
|
||||
CHECK_EQ(0,
|
||||
v8::Script::Compile(v8::String::New(source))->Run()->Int32Value());
|
||||
CHECK_EQ(0, CompileRun(source)->Int32Value());
|
||||
}
|
||||
|
||||
|
||||
@ -481,3 +480,52 @@ TEST(CachedHashOverflow) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(SliceFromCons) {
|
||||
FLAG_string_slices = true;
|
||||
InitializeVM();
|
||||
v8::HandleScope scope;
|
||||
Handle<String> string =
|
||||
FACTORY->NewStringFromAscii(CStrVector("parentparentparent"));
|
||||
Handle<String> parent = FACTORY->NewConsString(string, string);
|
||||
CHECK(parent->IsConsString());
|
||||
CHECK(!parent->IsFlat());
|
||||
Handle<String> slice = FACTORY->NewSubString(parent, 1, 25);
|
||||
// After slicing, the original string becomes a flat cons.
|
||||
CHECK(parent->IsFlat());
|
||||
CHECK(slice->IsSlicedString());
|
||||
CHECK_EQ(SlicedString::cast(*slice)->parent(),
|
||||
ConsString::cast(*parent)->first());
|
||||
CHECK(SlicedString::cast(*slice)->parent()->IsSeqString());
|
||||
CHECK(slice->IsFlat());
|
||||
}
|
||||
|
||||
|
||||
TEST(TrivialSlice) {
|
||||
// This tests whether a slice that contains the entire parent string
|
||||
// actually creates a new string (it should not).
|
||||
FLAG_string_slices = true;
|
||||
InitializeVM();
|
||||
HandleScope scope;
|
||||
v8::Local<v8::Value> result;
|
||||
Handle<String> string;
|
||||
const char* init = "var str = 'abcdefghijklmnopqrstuvwxyz';";
|
||||
const char* check = "str.slice(0,26)";
|
||||
const char* crosscheck = "str.slice(1,25)";
|
||||
|
||||
CompileRun(init);
|
||||
|
||||
result = CompileRun(check);
|
||||
CHECK(result->IsString());
|
||||
string = v8::Utils::OpenHandle(v8::String::Cast(*result));
|
||||
CHECK(!string->IsSlicedString());
|
||||
|
||||
string = FACTORY->NewSubString(string, 0, 26);
|
||||
CHECK(!string->IsSlicedString());
|
||||
result = CompileRun(crosscheck);
|
||||
CHECK(result->IsString());
|
||||
string = v8::Utils::OpenHandle(v8::String::Cast(*result));
|
||||
CHECK(string->IsSlicedString());
|
||||
CHECK_EQ("bcdefghijklmnopqrstuvwxy", *(string->ToCString()));
|
||||
}
|
||||
|
81
test/mjsunit/string-slices-regexp.js
Normal file
81
test/mjsunit/string-slices-regexp.js
Normal file
@ -0,0 +1,81 @@
|
||||
// Copyright 2009 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Flags: --string-slices
|
||||
|
||||
//assertEquals('345"12345 6"1234567"123',
|
||||
// '12345""12345 6""1234567""1234'.slice(2,-1).replace(/""/g, '"'));
|
||||
|
||||
var foo = "lsdfj sldkfj sdklfj læsdfjl sdkfjlsdk fjsdl fjsdljskdj flsj flsdkj flskd regexp: /foobar/\nldkfj sdlkfj sdkl";
|
||||
for(var i = 0; i < 1000; i++) {
|
||||
assertTrue(/^([a-z]+): (.*)/.test(foo.substring(foo.indexOf("regexp:"))));
|
||||
assertEquals("regexp", RegExp.$1, "RegExp.$1");
|
||||
}
|
||||
|
||||
var re = /^(((N({)?)|(R)|(U)|(V)|(B)|(H)|(n((n)|(r)|(v)|(h))?)|(r(r)?)|(v)|(b((n)|(b))?)|(h))|((Y)|(A)|(E)|(o(u)?)|(p(u)?)|(q(u)?)|(s)|(t)|(u)|(w)|(x(u)?)|(y)|(z)|(a((T)|(A)|(L))?)|(c)|(e)|(f(u)?)|(g(u)?)|(i)|(j)|(l)|(m(u)?)))+/;
|
||||
var r = new RegExp(re)
|
||||
var str = "_Avtnennan gunzvmu pubExnY nEvln vaTxh rmuhguhaTxnY_".slice(1,-1);
|
||||
str = str + str;
|
||||
assertTrue(r.test(str));
|
||||
assertTrue(r.test(str));
|
||||
var re = /x/;
|
||||
assertEquals("a.yb", "_axyb_".slice(1,-1).replace(re, "."));
|
||||
re.compile("y");
|
||||
assertEquals("ax.b", "_axyb_".slice(1,-1).replace(re, "."));
|
||||
re.compile("(x)");
|
||||
assertEquals(["x", "x"], re.exec("_axyb_".slice(1,-1)));
|
||||
re.compile("(y)");
|
||||
assertEquals(["y", "y"], re.exec("_axyb_".slice(1,-1)));
|
||||
|
||||
for(var i = 0; i < 100; i++) {
|
||||
var a = "aaaaaaaaaaaaaaaaaaaaaaaabbaacabbabaaaaabbaaaabbac".slice(24,-1);
|
||||
var b = "bbaacabbabaaaaabbaaaabba" + a;
|
||||
// The first time, the cons string will be flattened and handled by the
|
||||
// runtime system.
|
||||
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
|
||||
// The second time, the cons string is already flattened and will be
|
||||
// handled by generated code.
|
||||
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(b));
|
||||
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
|
||||
assertEquals(["bbaa", "a", "", "a"], /((\3|b)\2(a)){2,}/.exec(a));
|
||||
}
|
||||
|
||||
var c = "ABCDEFGHIJKLMN".slice(2,-2);
|
||||
var d = "ABCDEF\u1234GHIJKLMN".slice(2,-2);
|
||||
var e = "ABCDEFGHIJKLMN".slice(0,-2);
|
||||
assertTrue(/^C.*L$/.test(c));
|
||||
assertTrue(/^C.*L$/.test(c));
|
||||
assertTrue(/^C.*L$/.test(d));
|
||||
assertTrue(/^C.*L$/.test(d));
|
||||
assertTrue(/^A\w{10}L$/.test(e));
|
||||
assertTrue(/^A\w{10}L$/.test(e));
|
||||
|
||||
var e = "qui-opIasd-fghjklzx-cvbn-mqwer-tyuio-pasdf-ghIjkl-zx".slice(6,-6);
|
||||
var e_split = e.split("-");
|
||||
assertEquals(e_split[0], "Iasd");
|
||||
assertEquals(e_split[1], "fghjklzx");
|
||||
assertEquals(e_split[6], "ghI");
|
198
test/mjsunit/string-slices.js
Executable file
198
test/mjsunit/string-slices.js
Executable file
@ -0,0 +1,198 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Flags: --string-slices --expose-externalize-string
|
||||
|
||||
var s = 'abcdefghijklmn';
|
||||
assertEquals(s, s.substr());
|
||||
assertEquals(s, s.substr(0));
|
||||
assertEquals(s, s.substr('0'));
|
||||
assertEquals(s, s.substr(void 0));
|
||||
assertEquals(s, s.substr(null));
|
||||
assertEquals(s, s.substr(false));
|
||||
assertEquals(s, s.substr(0.9));
|
||||
assertEquals(s, s.substr({ valueOf: function() { return 0; } }));
|
||||
assertEquals(s, s.substr({ toString: function() { return '0'; } }));
|
||||
|
||||
var s1 = s.substring(1);
|
||||
assertEquals(s1, s.substr(1));
|
||||
assertEquals(s1, s.substr('1'));
|
||||
assertEquals(s1, s.substr(true));
|
||||
assertEquals(s1, s.substr(1.1));
|
||||
assertEquals(s1, s.substr({ valueOf: function() { return 1; } }));
|
||||
assertEquals(s1, s.substr({ toString: function() { return '1'; } }));
|
||||
|
||||
|
||||
assertEquals(s.substring(s.length - 1), s.substr(-1));
|
||||
assertEquals(s.substring(s.length - 1), s.substr(-1.2));
|
||||
assertEquals(s.substring(s.length - 1), s.substr(-1.7));
|
||||
assertEquals(s.substring(s.length - 2), s.substr(-2));
|
||||
assertEquals(s.substring(s.length - 2), s.substr(-2.3));
|
||||
assertEquals(s.substring(s.length - 2, s.length - 1), s.substr(-2, 1));
|
||||
assertEquals(s, s.substr(-100));
|
||||
assertEquals('abc', s.substr(-100, 3));
|
||||
assertEquals(s1, s.substr(-s.length + 1));
|
||||
|
||||
// assertEquals('', s.substr(0, void 0)); // smjs and rhino
|
||||
assertEquals('abcdefghijklmn', s.substr(0, void 0)); // kjs and v8
|
||||
assertEquals('', s.substr(0, null));
|
||||
assertEquals(s, s.substr(0, String(s.length)));
|
||||
assertEquals('a', s.substr(0, true));
|
||||
|
||||
|
||||
// Test substrings of different lengths and alignments.
|
||||
// First ASCII.
|
||||
var x = "ASCII";
|
||||
for (var i = 0; i < 25; i++) {
|
||||
x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
|
||||
}
|
||||
/x/.exec(x); // Try to force a flatten.
|
||||
for (var i = 5; i < 25; i++) {
|
||||
for (var j = 12; j < 25; j++) {
|
||||
var z = x.substring(i, i+j);
|
||||
var w = Math.random() * 42; // Allocate something new in new-space.
|
||||
assertEquals(j, z.length);
|
||||
for (var k = 0; k < j; k++) {
|
||||
assertEquals(x.charAt(i+k), z.charAt(k));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Then two-byte strings.
|
||||
x = "UC16\u2028"; // Non-ascii char forces two-byte string.
|
||||
for (var i = 0; i < 25; i++) {
|
||||
x += (i >> 4).toString(16) + (i & 0x0f).toString(16);
|
||||
}
|
||||
/x/.exec(x); // Try to force a flatten.
|
||||
for (var i = 5; i < 25; i++) {
|
||||
for (var j = 0; j < 25; j++) {
|
||||
var z = x.substring(i, i + j);
|
||||
var w = Math.random() * 42; // Allocate something new in new-space.
|
||||
assertEquals(j, z.length);
|
||||
for (var k = 0; k < j; k++) {
|
||||
assertEquals(x.charAt(i+k), z.charAt(k));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keep creating strings to to force allocation failure on substring creation.
|
||||
var x = "0123456789ABCDEF";
|
||||
x += x; // 2^5
|
||||
x += x;
|
||||
x += x;
|
||||
x += x;
|
||||
x += x;
|
||||
x += x; // 2^10
|
||||
x += x;
|
||||
x += x;
|
||||
var xl = x.length;
|
||||
var cache = [];
|
||||
for (var i = 0; i < 10000; i++) {
|
||||
var z = x.substring(i % xl);
|
||||
assertEquals(xl - (i % xl), z.length);
|
||||
cache.push(z);
|
||||
}
|
||||
|
||||
|
||||
// Same with two-byte strings
|
||||
var x = "\u2028123456789ABCDEF";
|
||||
x += x; // 2^5
|
||||
x += x;
|
||||
x += x;
|
||||
x += x;
|
||||
x += x;
|
||||
x += x; // 2^10
|
||||
x += x;
|
||||
x += x;
|
||||
var xl = x.length;
|
||||
var cache = [];
|
||||
for (var i = 0; i < 10000; i++) {
|
||||
var z = x.substring(i % xl);
|
||||
assertEquals(xl - (i % xl), z.length);
|
||||
cache.push(z);
|
||||
}
|
||||
|
||||
// Substring of substring.
|
||||
var cache = [];
|
||||
var last = x;
|
||||
var offset = 0;
|
||||
for (var i = 0; i < 64; i++) {
|
||||
var z = last.substring(i);
|
||||
last = z;
|
||||
cache.push(z);
|
||||
offset += i;
|
||||
}
|
||||
for (var i = 63; i >= 0; i--) {
|
||||
var z = cache.pop();
|
||||
assertTrue(/\u2028123456789ABCDEF/.test(z));
|
||||
assertEquals(xl - offset, z.length);
|
||||
offset -= i;
|
||||
}
|
||||
|
||||
// Test charAt for different strings.
|
||||
function f(s1, s2, s3, i) {
|
||||
assertEquals(String.fromCharCode(97+i%11), s1.charAt(i%11));
|
||||
assertEquals(String.fromCharCode(97+i%11), s2.charAt(i%11));
|
||||
assertEquals(String.fromCharCode(98+i%11), s3.charAt(i%11));
|
||||
assertEquals(String.fromCharCode(101), s3.charAt(3));
|
||||
}
|
||||
|
||||
flat = "abcdefghijkl12345";
|
||||
cons = flat + flat.toUpperCase();
|
||||
slice = "abcdefghijklmn12345".slice(1, -1);
|
||||
for ( var i = 0; i < 1000; i++) {
|
||||
f(flat, cons, slice, i);
|
||||
}
|
||||
flat = "abcdefghijkl1\u20232345";
|
||||
cons = flat + flat.toUpperCase();
|
||||
slice = "abcdefghijklmn1\u20232345".slice(1, -1);
|
||||
for ( var i = 0; i < 1000; i++) {
|
||||
f(flat, cons, slice, i);
|
||||
}
|
||||
|
||||
// Concatenate substrings.
|
||||
var ascii = 'abcdefghijklmnop';
|
||||
var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB';
|
||||
assertEquals("klmno", ascii.substring(10,15) + ascii.substring(16));
|
||||
assertEquals("\u03B4\u03B7", utf.substring(3,4) + utf.substring(6,7));
|
||||
assertEquals("klp", ascii.substring(10,12) + ascii.substring(15,16));
|
||||
assertEquals("\u03B1\u03B4\u03B5", utf.substring(0,1) + utf.substring(5,3));
|
||||
assertEquals("", ascii.substring(16) + utf.substring(16));
|
||||
assertEquals("bcdef\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9",
|
||||
ascii.substring(1,6) + utf.substring(3,9));
|
||||
assertEquals("\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9abcdefghijklmnop",
|
||||
utf.substring(3,9) + ascii);
|
||||
assertEquals("\u03B2\u03B3\u03B4\u03B5\u03B4\u03B5\u03B6\u03B7",
|
||||
utf.substring(5,1) + utf.substring(3,7));
|
||||
|
||||
/*
|
||||
// Externalizing strings.
|
||||
var a = "123456789qwertyuiopasdfghjklzxcvbnm";
|
||||
var b = a.slice(1,-1);
|
||||
assertEquals(a.slice(1,-1), b);
|
||||
externalizeString(a);
|
||||
assertEquals(a.slice(1,-1), b);
|
||||
*/
|
@ -135,3 +135,20 @@ for (var i = 0; i < 10000; i++) {
|
||||
assertEquals(xl - (i % xl), z.length);
|
||||
cache.push(z);
|
||||
}
|
||||
|
||||
// Substring of substring.
|
||||
var cache = [];
|
||||
var last = x;
|
||||
var offset = 0;
|
||||
for (var i = 0; i < 64; i++) {
|
||||
var z = last.substring(i);
|
||||
last = z;
|
||||
cache.push(z);
|
||||
offset += i;
|
||||
}
|
||||
for (var i = 63; i >= 0; i--) {
|
||||
var z = cache.pop();
|
||||
assertTrue(/\u2028123456789ABCDEF/.test(z));
|
||||
assertEquals(xl - offset, z.length);
|
||||
offset -= i;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user