From e41c17084f5efb3d2b4d0cd40000a824513bfa0b Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Wed, 9 Jan 2013 15:47:53 +0000 Subject: [PATCH] Continues Latin-1 support. All tests pass with ENABLE_LATIN_1 flag. R=yangguo@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/11818025 Patch from Dan Carney . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13344 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/api.cc | 5 +- src/arm/regexp-macro-assembler-arm.cc | 11 ++- .../externalize-string-extension.cc | 4 +- src/handles.cc | 4 +- src/heap.cc | 24 ++--- src/ia32/code-stubs-ia32.cc | 18 +++- src/ia32/regexp-macro-assembler-ia32.cc | 14 ++- src/interpreter-irregexp.cc | 4 +- src/isolate.cc | 5 +- src/json-parser.h | 34 +++---- src/json-stringifier.h | 54 ++++++----- src/jsregexp.cc | 8 +- src/log.cc | 5 +- src/objects-inl.h | 32 +++---- src/objects.cc | 20 ++--- src/objects.h | 24 +++-- src/regexp-macro-assembler.cc | 4 +- src/runtime.cc | 90 ++++++++++--------- src/string-search.h | 23 ++++- src/v8conversions.cc | 8 +- src/x64/code-stubs-x64.cc | 16 ++++ src/x64/regexp-macro-assembler-x64.cc | 11 ++- test/cctest/test-hashing.cc | 40 +++++---- test/mjsunit/regexp-capture-3.js | 33 ++++--- test/mjsunit/regress/regress-latin-1.js | 59 ++++++++++++ 25 files changed, 343 insertions(+), 207 deletions(-) create mode 100644 test/mjsunit/regress/regress-latin-1.js diff --git a/src/api.cc b/src/api.cc index 1097200d25..352b63dfca 100644 --- a/src/api.cc +++ b/src/api.cc @@ -4149,7 +4149,10 @@ int String::WriteAscii(char* buffer, // WriteToFlat is faster than using the StringCharacterStream. if (length == -1) length = str->length() + 1; int len = i::Min(length, str->length() - start); - i::String::WriteToFlat(*str, buffer, start, start + len); + i::String::WriteToFlat(*str, + reinterpret_cast(buffer), + start, + start + len); if (!(options & PRESERVE_ASCII_NULL)) { for (int i = 0; i < len; i++) { if (buffer[i] == '\0') buffer[i] = ' '; diff --git a/src/arm/regexp-macro-assembler-arm.cc b/src/arm/regexp-macro-assembler-arm.cc index 608c20990a..acb24ef14e 100644 --- a/src/arm/regexp-macro-assembler-arm.cc +++ b/src/arm/regexp-macro-assembler-arm.cc @@ -337,8 +337,17 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( __ b(ne, &fail); __ sub(r3, r3, Operand('a')); __ cmp(r3, Operand('z' - 'a')); // Is r3 a lowercase letter? +#ifndef ENABLE_LATIN_1 __ b(hi, &fail); - +#else + __ b(ls, &loop_check); // In range 'a'-'z'. + // Latin-1: Check for values in range [224,254] but not 247. + __ sub(r3, r3, Operand(224 - 'a')); + __ cmp(r3, Operand(254 - 224)); + __ b(hi, &fail); // Weren't Latin-1 letters. + __ cmp(r3, Operand(247 - 224)); // Check for 247. + __ b(eq, &fail); +#endif __ bind(&loop_check); __ cmp(r0, r1); diff --git a/src/extensions/externalize-string-extension.cc b/src/extensions/externalize-string-extension.cc index a126a5a569..614c243b3b 100644 --- a/src/extensions/externalize-string-extension.cc +++ b/src/extensions/externalize-string-extension.cc @@ -94,10 +94,10 @@ v8::Handle ExternalizeStringExtension::Externalize( "externalizeString() can't externalize twice.")); } if (string->IsOneByteRepresentation() && !force_two_byte) { - char* data = new char[string->length()]; + uint8_t* data = new uint8_t[string->length()]; String::WriteToFlat(*string, data, 0, string->length()); SimpleAsciiStringResource* resource = new SimpleAsciiStringResource( - data, string->length()); + reinterpret_cast(data), string->length()); result = string->MakeExternal(resource); if (result && !string->IsSymbol()) { HEAP->external_string_table()->AddString(*string); diff --git a/src/handles.cc b/src/handles.cc index 16fe0c795c..ed92c414e3 100644 --- a/src/handles.cc +++ b/src/handles.cc @@ -423,7 +423,7 @@ static void CalculateLineEnds(Isolate* isolate, Vector src, bool with_last_line) { const int src_len = src.length(); - StringSearch search(isolate, CStrVector("\n")); + StringSearch search(isolate, STATIC_ASCII_VECTOR("\n")); // Find and record line ends. int position = 0; @@ -457,7 +457,7 @@ Handle CalculateLineEnds(Handle src, if (content.IsAscii()) { CalculateLineEnds(isolate, &line_ends, - content.ToAsciiVector(), + content.ToOneByteVector(), with_last_line); } else { CalculateLineEnds(isolate, diff --git a/src/heap.cc b/src/heap.cc index dc425dd1bf..013a18c320 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -3339,9 +3339,9 @@ MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString( { MaybeObject* maybe_result = heap->AllocateRawOneByteString(2); if (!maybe_result->ToObject(&result)) return maybe_result; } - char* dest = SeqOneByteString::cast(result)->GetChars(); - dest[0] = static_cast(c1); - dest[1] = static_cast(c2); + uint8_t* dest = SeqOneByteString::cast(result)->GetChars(); + dest[0] = static_cast(c1); + dest[1] = static_cast(c2); return result; } else { Object* result; @@ -3412,9 +3412,9 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) { if (!maybe_result->ToObject(&result)) return maybe_result; } // Copy the characters into the new object. - char* dest = SeqOneByteString::cast(result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(result)->GetChars(); // Copy first part. - const char* src; + const uint8_t* src; if (first->IsExternalString()) { src = ExternalAsciiString::cast(first)->GetChars(); } else { @@ -3436,7 +3436,7 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) { if (!maybe_result->ToObject(&result)) return maybe_result; } // Copy the characters into the new object. - char* dest = SeqOneByteString::cast(result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(result)->GetChars(); String::WriteToFlat(first, dest, 0, first_length); String::WriteToFlat(second, dest + first_length, 0, second_length); isolate_->counters()->string_add_runtime_ext_to_ascii()->Increment(); @@ -3513,7 +3513,7 @@ MaybeObject* Heap::AllocateSubString(String* buffer, // Copy the characters into the new object. if (is_one_byte) { ASSERT(string_result->IsOneByteRepresentation()); - char* dest = SeqOneByteString::cast(string_result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(string_result)->GetChars(); String::WriteToFlat(buffer, dest, start, end); } else { ASSERT(string_result->IsTwoByteRepresentation()); @@ -4555,7 +4555,7 @@ MaybeObject* Heap::AllocateStringFromOneByte(Vector string, } // Copy the characters into the new object. - CopyChars(SeqOneByteString::cast(result)->GetCharsU(), + CopyChars(SeqOneByteString::cast(result)->GetChars(), string.start(), length); return result; @@ -4654,7 +4654,7 @@ template<> class AllocateInternalSymbolHelper< Vector > { public: static inline void WriteOneByteData(Vector vector, - char* chars, + uint8_t* chars, int len) { // Only works for ascii. ASSERT(vector.length() == len); @@ -4696,7 +4696,7 @@ class AllocateInternalSymbolHelper< Vector > { template<> class AllocateInternalSymbolHelper { public: - static inline void WriteOneByteData(String* s, char* chars, int len) { + static inline void WriteOneByteData(String* s, uint8_t* chars, int len) { ASSERT(s->length() == len); String::WriteToFlat(s, chars, 0, len); } @@ -4806,13 +4806,15 @@ MaybeObject* Heap::AllocateRawOneByteString(int length, String::cast(result)->set_hash_field(String::kEmptyHashField); ASSERT_EQ(size, HeapObject::cast(result)->Size()); +#ifndef ENABLE_LATIN_1 #ifdef VERIFY_HEAP if (FLAG_verify_heap) { // Initialize string's content to ensure ASCII-ness (character range 0-127) // as required when verifying the heap. - char* dest = SeqOneByteString::cast(result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(result)->GetChars(); memset(dest, 0x0F, length * kCharSize); } +#endif #endif return result; diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index cd92e712d7..28663ab556 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -6478,7 +6478,12 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm, // Compare lengths - strings up to min-length are equal. __ bind(&compare_lengths); __ test(length_delta, length_delta); +#ifndef ENABLE_LATIN_1 __ j(not_zero, &result_not_equal, Label::kNear); +#else + Label length_not_equal; + __ j(not_zero, &length_not_equal, Label::kNear); +#endif // Result is EQUAL. STATIC_ASSERT(EQUAL == 0); @@ -6487,8 +6492,19 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm, __ ret(0); Label result_greater; - __ bind(&result_not_equal); +#ifdef ENABLE_LATIN_1 + Label result_less; + __ bind(&length_not_equal); __ j(greater, &result_greater, Label::kNear); + __ jmp(&result_less, Label::kNear); +#endif + __ bind(&result_not_equal); +#ifndef ENABLE_LATIN_1 + __ j(greater, &result_greater, Label::kNear); +#else + __ j(above, &result_greater, Label::kNear); + __ bind(&result_less); +#endif // Result is LESS. __ Set(eax, Immediate(Smi::FromInt(LESS))); diff --git a/src/ia32/regexp-macro-assembler-ia32.cc b/src/ia32/regexp-macro-assembler-ia32.cc index 8ebc3a9a5f..49c75e1330 100644 --- a/src/ia32/regexp-macro-assembler-ia32.cc +++ b/src/ia32/regexp-macro-assembler-ia32.cc @@ -344,7 +344,19 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase( __ or_(eax, 0x20); // Convert match character to lower-case. __ lea(ecx, Operand(eax, -'a')); __ cmp(ecx, static_cast('z' - 'a')); // Is eax a lowercase letter? - __ j(above, &fail); +#ifndef ENABLE_LATIN_1 + __ j(above, &fail); // Weren't letters anyway. +#else + Label convert_capture; + __ j(below_equal, &convert_capture); // In range 'a'-'z'. + // Latin-1: Check for values in range [224,254] but not 247. + __ sub(ecx, Immediate(224 - 'a')); + __ cmp(ecx, Immediate(254 - 224)); + __ j(above, &fail); // Weren't Latin-1 letters. + __ cmp(ecx, Immediate(247 - 224)); // Check for 247. + __ j(equal, &fail); + __ bind(&convert_capture); +#endif // Also convert capture character. __ movzx_b(ecx, Operand(edx, 0)); __ or_(ecx, 0x20); diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc index 3a92b84554..5abeb5a106 100644 --- a/src/interpreter-irregexp.cc +++ b/src/interpreter-irregexp.cc @@ -68,7 +68,7 @@ static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, int from, int current, int len, - Vector subject) { + Vector subject) { for (int i = 0; i < len; i++) { unsigned int old_char = subject[from++]; unsigned int new_char = subject[current++]; @@ -617,7 +617,7 @@ RegExpImpl::IrregexpResult IrregexpInterpreter::Match( uc16 previous_char = '\n'; String::FlatContent subject_content = subject->GetFlatContent(); if (subject_content.IsAscii()) { - Vector subject_vector = subject_content.ToAsciiVector(); + Vector subject_vector = subject_content.ToOneByteVector(); if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(isolate, code_base, diff --git a/src/isolate.cc b/src/isolate.cc index 7c51b53465..c1360691a6 100644 --- a/src/isolate.cc +++ b/src/isolate.cc @@ -536,14 +536,15 @@ void Isolate::PushStackTraceAndDie(unsigned int magic, unsigned int magic2) { const int kMaxStackTraceSize = 8192; Handle trace = StackTraceString(); - char buffer[kMaxStackTraceSize]; + uint8_t buffer[kMaxStackTraceSize]; int length = Min(kMaxStackTraceSize - 1, trace->length()); String::WriteToFlat(*trace, buffer, 0, length); buffer[length] = '\0'; + // TODO(dcarney): convert buffer to utf8? OS::PrintError("Stacktrace (%x-%x) %p %p: %s\n", magic, magic2, static_cast(object), static_cast(map), - buffer); + reinterpret_cast(buffer)); OS::Abort(); } diff --git a/src/json-parser.h b/src/json-parser.h index 31328d79ae..ccb32866f9 100644 --- a/src/json-parser.h +++ b/src/json-parser.h @@ -441,21 +441,21 @@ Handle JsonParser::ParseJsonNumber() { int length = position_ - beg_pos; double number; if (seq_ascii) { - Vector chars(seq_source_->GetChars() + beg_pos, length); + Vector chars(seq_source_->GetChars() + beg_pos, length); number = StringToDouble(isolate()->unicode_cache(), - chars, + Vector::cast(chars), NO_FLAGS, // Hex, octal or trailing junk. OS::nan_value()); } else { - Vector buffer = Vector::New(length); + Vector buffer = Vector::New(length); String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); - Vector result = - Vector(reinterpret_cast(buffer.start()), - length); + Vector result = + Vector(buffer.start(), length); number = StringToDouble(isolate()->unicode_cache(), - result, - NO_FLAGS, // Hex, octal or trailing junk. - 0.0); + // TODO(dcarney): Convert StringToDouble to uint_t. + Vector::cast(result), + NO_FLAGS, // Hex, octal or trailing junk. + 0.0); buffer.Dispose(); } SkipWhitespace(); @@ -627,9 +627,9 @@ Handle JsonParser::ScanJsonString() { c0_ = c0; int beg_pos = position_; position_ = position; - return SlowScanJsonString(source_, - beg_pos, - position_); + return SlowScanJsonString(source_, + beg_pos, + position_); } if (c0 < 0x20) return Handle::null(); if (static_cast(c0) > @@ -651,7 +651,7 @@ Handle JsonParser::ScanJsonString() { uint32_t hash = (length <= String::kMaxHashCalcLength) ? StringHasher::GetHashCore(running_hash) : length; Vector string_vector( - seq_source_->GetCharsU() + position_, length); + seq_source_->GetChars() + position_, length); SymbolTable* symbol_table = isolate()->heap()->symbol_table(); uint32_t capacity = symbol_table->Capacity(); uint32_t entry = SymbolTable::FirstProbe(hash, capacity); @@ -688,9 +688,9 @@ Handle JsonParser::ScanJsonString() { position_); } } else { - return SlowScanJsonString(source_, - beg_pos, - position_); + return SlowScanJsonString(source_, + beg_pos, + position_); } } while (c0_ != '"'); int length = position_ - beg_pos; @@ -699,7 +699,7 @@ Handle JsonParser::ScanJsonString() { result = factory()->LookupOneByteSymbol(seq_source_, beg_pos, length); } else { result = factory()->NewRawOneByteString(length, pretenure_); - char* dest = SeqOneByteString::cast(*result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(); String::WriteToFlat(*source_, dest, beg_pos, position_); } ASSERT_EQ('"', c0_); diff --git a/src/json-stringifier.h b/src/json-stringifier.h index 426a3707cd..cec98443a5 100644 --- a/src/json-stringifier.h +++ b/src/json-stringifier.h @@ -60,7 +60,7 @@ class BasicJsonStringifier BASE_EMBEDDED { template INLINE(void Append_(const Char* chars)); - INLINE(void Append(char c)) { + INLINE(void Append(uint8_t c)) { if (is_ascii_) { Append_(c); } else { @@ -68,11 +68,11 @@ class BasicJsonStringifier BASE_EMBEDDED { } } - INLINE(void Append(const char* chars)) { + INLINE(void AppendAscii(const char* chars)) { if (is_ascii_) { - Append_(chars); + Append_(reinterpret_cast(chars)); } else { - Append_(chars); + Append_(reinterpret_cast(chars)); } } @@ -327,15 +327,15 @@ BasicJsonStringifier::Result BasicJsonStringifier::Serialize_( switch (Oddball::cast(*object)->kind()) { case Oddball::kFalse: if (deferred_string_key) SerializeDeferredKey(comma, key); - Append("false"); + AppendAscii("false"); return SUCCESS; case Oddball::kTrue: if (deferred_string_key) SerializeDeferredKey(comma, key); - Append("true"); + AppendAscii("true"); return SUCCESS; case Oddball::kNull: if (deferred_string_key) SerializeDeferredKey(comma, key); - Append("null"); + AppendAscii("null"); return SUCCESS; default: return UNCHANGED; @@ -412,7 +412,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSValue( ASSERT(class_name == isolate_->heap()->Boolean_symbol()); Object* value = JSValue::cast(*object)->value(); ASSERT(value->IsBoolean()); - Append(value->IsTrue() ? "true" : "false"); + AppendAscii(value->IsTrue() ? "true" : "false"); } return SUCCESS; } @@ -422,7 +422,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeSmi(Smi* object) { static const int kBufferSize = 100; char chars[kBufferSize]; Vector buffer(chars, kBufferSize); - Append(IntToCString(object->value(), buffer)); + AppendAscii(IntToCString(object->value(), buffer)); return SUCCESS; } @@ -430,13 +430,13 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeSmi(Smi* object) { BasicJsonStringifier::Result BasicJsonStringifier::SerializeDouble( double number) { if (isinf(number) || isnan(number)) { - Append("null"); + AppendAscii("null"); return SUCCESS; } static const int kBufferSize = 100; char chars[kBufferSize]; Vector buffer(chars, kBufferSize); - Append(DoubleToCString(number, buffer)); + AppendAscii(DoubleToCString(number, buffer)); return SUCCESS; } @@ -476,7 +476,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSArray( SerializeElement(Handle(elements->get(i), isolate_), i); if (result == SUCCESS) continue; if (result == UNCHANGED) { - Append("null"); + AppendAscii("null"); } else { return result; } @@ -505,12 +505,12 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSArraySlow( if (i > 0) Append(','); Handle element = Object::GetElement(object, i); if (element->IsUndefined()) { - Append("null"); + AppendAscii("null"); } else { Result result = SerializeElement(element, i); if (result == SUCCESS) continue; if (result == UNCHANGED) { - Append("null"); + AppendAscii("null"); } else { return result; } @@ -682,8 +682,9 @@ void BasicJsonStringifier::SerializeString_(Handle string) { if (DoNotEscape(c)) { Append_(c); } else { - Append_( - &JsonEscapeTable[c * kJsonEscapeTableEntrySize]); + Append_( + reinterpret_cast( + &JsonEscapeTable[c * kJsonEscapeTableEntrySize])); } // If GC moved the string, we need to refresh the vector. if (*string != string_location) { @@ -693,27 +694,22 @@ void BasicJsonStringifier::SerializeString_(Handle string) { } } - Append_('"'); + Append_('"'); } -template <> -bool BasicJsonStringifier::DoNotEscape(char c) { - return c >= '#' && c <= '~' && c != '\\'; -} - - -template <> -bool BasicJsonStringifier::DoNotEscape(uc16 c) { +template +bool BasicJsonStringifier::DoNotEscape(Char c) { return (c >= 0x80) || (c >= '#' && c <= '~' && c != '\\'); } template <> -Vector BasicJsonStringifier::GetCharVector(Handle string) { +Vector BasicJsonStringifier::GetCharVector( + Handle string) { String::FlatContent flat = string->GetFlatContent(); ASSERT(flat.IsAscii()); - return flat.ToAsciiVector(); + return flat.ToOneByteVector(); } @@ -730,14 +726,14 @@ void BasicJsonStringifier::SerializeString(Handle object) { String::FlatContent flat = object->GetFlatContent(); if (is_ascii_) { if (flat.IsAscii()) { - SerializeString_(object); + SerializeString_(object); } else { ChangeEncoding(); SerializeString(object); } } else { if (flat.IsAscii()) { - SerializeString_(object); + SerializeString_(object); } else { SerializeString_(object); } diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 347fc03e7b..a3efb859ac 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -309,16 +309,16 @@ int RegExpImpl::AtomExecRaw(Handle regexp, index = (needle_content.IsAscii() ? (subject_content.IsAscii() ? SearchString(isolate, - subject_content.ToAsciiVector(), - needle_content.ToAsciiVector(), + subject_content.ToOneByteVector(), + needle_content.ToOneByteVector(), index) : SearchString(isolate, subject_content.ToUC16Vector(), - needle_content.ToAsciiVector(), + needle_content.ToOneByteVector(), index)) : (subject_content.IsAscii() ? SearchString(isolate, - subject_content.ToAsciiVector(), + subject_content.ToOneByteVector(), needle_content.ToUC16Vector(), index) : SearchString(isolate, diff --git a/src/log.cc b/src/log.cc index d30ef5a133..efbb8f7f57 100644 --- a/src/log.cc +++ b/src/log.cc @@ -384,7 +384,10 @@ class Logger::NameBuffer { if (str == NULL) return; if (str->HasOnlyAsciiChars()) { int utf8_length = Min(str->length(), kUtf8BufferSize - utf8_pos_); - String::WriteToFlat(str, utf8_buffer_ + utf8_pos_, 0, utf8_length); + String::WriteToFlat(str, + reinterpret_cast(utf8_buffer_ + utf8_pos_), + 0, + utf8_length); utf8_pos_ += utf8_length; return; } diff --git a/src/objects-inl.h b/src/objects-inl.h index 7760842e31..658214a229 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -2551,31 +2551,26 @@ void String::Visit( switch (type & (kStringRepresentationMask | kStringEncodingMask)) { case kSeqStringTag | kOneByteStringTag: visitor.VisitOneByteString( - reinterpret_cast( - SeqOneByteString::cast(string)->GetChars()) + slice_offset, - length - offset); + SeqOneByteString::cast(string)->GetChars() + slice_offset, + length - offset); return; case kSeqStringTag | kTwoByteStringTag: visitor.VisitTwoByteString( - reinterpret_cast( - SeqTwoByteString::cast(string)->GetChars()) + slice_offset, - length - offset); + SeqTwoByteString::cast(string)->GetChars() + slice_offset, + length - offset); return; case kExternalStringTag | kOneByteStringTag: visitor.VisitOneByteString( - reinterpret_cast( - ExternalAsciiString::cast(string)->GetChars()) + slice_offset, - length - offset); + ExternalAsciiString::cast(string)->GetChars() + slice_offset, + length - offset); return; case kExternalStringTag | kTwoByteStringTag: visitor.VisitTwoByteString( - reinterpret_cast( - ExternalTwoByteString::cast(string)->GetChars()) - + slice_offset, - length - offset); + ExternalTwoByteString::cast(string)->GetChars() + slice_offset, + length - offset); return; case kSlicedStringTag | kOneByteStringTag: @@ -2621,12 +2616,7 @@ Address SeqOneByteString::GetCharsAddress() { } -char* SeqOneByteString::GetChars() { - return reinterpret_cast(GetCharsAddress()); -} - - -uint8_t* SeqOneByteString::GetCharsU() { +uint8_t* SeqOneByteString::GetChars() { return reinterpret_cast(GetCharsAddress()); } @@ -2737,8 +2727,8 @@ void ExternalAsciiString::set_resource( } -const char* ExternalAsciiString::GetChars() { - return resource()->data(); +const uint8_t* ExternalAsciiString::GetChars() { + return reinterpret_cast(resource()->data()); } diff --git a/src/objects.cc b/src/objects.cc index fe2d1c95d6..2d8626be72 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -903,7 +903,7 @@ MaybeObject* String::SlowTryFlatten(PretenureFlag pretenure) { result = String::cast(object); String* first = cs->first(); int first_length = first->length(); - char* dest = SeqOneByteString::cast(result)->GetChars(); + uint8_t* dest = SeqOneByteString::cast(result)->GetChars(); WriteToFlat(first, dest, 0, first_length); String* second = cs->second(); WriteToFlat(second, @@ -6561,13 +6561,13 @@ String::FlatContent String::GetFlatContent() { shape.representation_tag() != kSlicedStringTag); } if (shape.encoding_tag() == kOneByteStringTag) { - const char* start; + const uint8_t* start; if (shape.representation_tag() == kSeqStringTag) { start = SeqOneByteString::cast(string)->GetChars(); } else { start = ExternalAsciiString::cast(string)->GetChars(); } - return FlatContent(Vector(start + offset, length)); + return FlatContent(Vector(start + offset, length)); } else { ASSERT(shape.encoding_tag() == kTwoByteStringTag); const uc16* start; @@ -6770,7 +6770,7 @@ void FlatStringReader::PostGarbageCollection() { ASSERT(content.IsFlat()); is_ascii_ = content.IsAscii(); if (is_ascii_) { - start_ = content.ToAsciiVector().start(); + start_ = content.ToOneByteVector().start(); } else { start_ = content.ToUC16Vector().start(); } @@ -7254,8 +7254,8 @@ bool String::SlowEquals(String* other) { // TODO(dcarney): Compare all types of flat strings with a Visitor. if (StringShape(lhs).IsSequentialAscii() && StringShape(rhs).IsSequentialAscii()) { - const char* str1 = SeqOneByteString::cast(lhs)->GetChars(); - const char* str2 = SeqOneByteString::cast(rhs)->GetChars(); + const uint8_t* str1 = SeqOneByteString::cast(lhs)->GetChars(); + const uint8_t* str2 = SeqOneByteString::cast(rhs)->GetChars(); return CompareRawStringContents(str1, str2, len); } @@ -11525,7 +11525,7 @@ class SubStringOneByteSymbolKey : public HashTableKey { uint32_t Hash() { ASSERT(length_ >= 0); ASSERT(from_ + length_ <= string_->length()); - char* chars = string_->GetChars() + from_; + uint8_t* chars = string_->GetChars() + from_; hash_field_ = StringHasher::HashSequentialString( chars, length_, string_->GetHeap()->HashSeed()); uint32_t result = hash_field_ >> String::kHashShift; @@ -11539,15 +11539,13 @@ class SubStringOneByteSymbolKey : public HashTableKey { } bool IsMatch(Object* string) { - Vector chars(string_->GetCharsU() + from_, length_); + Vector chars(string_->GetChars() + from_, length_); return String::cast(string)->IsOneByteEqualTo(chars); } MaybeObject* AsObject() { if (hash_field_ == 0) Hash(); - Vector chars( - reinterpret_cast(string_->GetChars()) + from_, - length_); + Vector chars(string_->GetChars() + from_, length_); return HEAP->AllocateOneByteSymbol(chars, hash_field_); } diff --git a/src/objects.h b/src/objects.h index abfa05ad54..79f1f1d52b 100644 --- a/src/objects.h +++ b/src/objects.h @@ -7090,13 +7090,6 @@ class String: public HeapObject { // Returns true if the structure contains two-byte content. bool IsTwoByte() { return state_ == TWO_BYTE; } - // TODO(dcarney): Remove this function. - // Return the ASCII content of the string. Only use if IsAscii() returns - // true. - Vector ToAsciiVector() { - ASSERT_EQ(ASCII, state_); - return Vector::cast(buffer_); - } // Return the one byte content of the string. Only use if IsAscii() returns // true. Vector ToOneByteVector() { @@ -7114,15 +7107,15 @@ class String: public HeapObject { enum State { NON_FLAT, ASCII, TWO_BYTE }; // Constructors only used by String::GetFlatContent(). - explicit FlatContent(Vector chars) - : buffer_(Vector::cast(chars)), + explicit FlatContent(Vector chars) + : buffer_(chars), state_(ASCII) { } explicit FlatContent(Vector chars) : buffer_(Vector::cast(chars)), state_(TWO_BYTE) { } FlatContent() : buffer_(), state_(NON_FLAT) { } - Vector buffer_; + Vector buffer_; State state_; friend class String; @@ -7391,6 +7384,11 @@ class String: public HeapObject { return NonAsciiStart(chars, length) >= length; } + static inline bool IsAscii(const uint8_t* chars, int length) { + return + NonAsciiStart(reinterpret_cast(chars), length) >= length; + } + static inline int NonOneByteStart(const uc16* chars, int length) { const uc16* limit = chars + length; const uc16* start = chars; @@ -7467,9 +7465,7 @@ class SeqOneByteString: public SeqString { // Get the address of the characters in this string. inline Address GetCharsAddress(); - // TODO(dcarney): remove GetChars and rename GetCharsU to GetChars. - inline char* GetChars(); - inline uint8_t* GetCharsU(); + inline uint8_t* GetChars(); // Casting static inline SeqOneByteString* cast(Object* obj); @@ -7682,7 +7678,7 @@ class ExternalAsciiString: public ExternalString { // which the pointer cache has to be refreshed. inline void update_data_cache(); - inline const char* GetChars(); + inline const uint8_t* GetChars(); // Dispatched behavior. inline uint16_t ExternalAsciiStringGet(int index); diff --git a/src/regexp-macro-assembler.cc b/src/regexp-macro-assembler.cc index ee9347acbb..f73726a329 100644 --- a/src/regexp-macro-assembler.cc +++ b/src/regexp-macro-assembler.cc @@ -80,11 +80,11 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition( if (subject->IsOneByteRepresentation()) { const byte* address; if (StringShape(subject).IsExternal()) { - const char* data = ExternalAsciiString::cast(subject)->GetChars(); + const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars(); address = reinterpret_cast(data); } else { ASSERT(subject->IsSeqOneByteString()); - char* data = SeqOneByteString::cast(subject)->GetChars(); + const uint8_t* data = SeqOneByteString::cast(subject)->GetChars(); address = reinterpret_cast(data); } return address + start_index; diff --git a/src/runtime.cc b/src/runtime.cc index 22a72042dc..0b630563b1 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2407,7 +2407,7 @@ class ReplacementStringBuilder { if (is_ascii_) { Handle seq = NewRawOneByteString(character_count_); AssertNoAllocation no_alloc; - char* char_buffer = seq->GetChars(); + uint8_t* char_buffer = seq->GetChars(); StringBuilderConcatHelper(*subject_, char_buffer, *array_builder_.array(), @@ -2664,7 +2664,7 @@ bool CompiledReplacement::Compile(Handle replacement, bool simple = false; if (content.IsAscii()) { simple = ParseReplacementPattern(&parts_, - content.ToAsciiVector(), + content.ToOneByteVector(), capture_count, subject_length, zone()); @@ -2740,7 +2740,7 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder, } -void FindAsciiStringIndices(Vector subject, +void FindAsciiStringIndices(Vector subject, char pattern, ZoneList* indices, unsigned int limit, @@ -2748,11 +2748,11 @@ void FindAsciiStringIndices(Vector subject, ASSERT(limit > 0); // Collect indices of pattern in subject using memchr. // Stop after finding at most limit values. - const char* subject_start = reinterpret_cast(subject.start()); - const char* subject_end = subject_start + subject.length(); - const char* pos = subject_start; + const uint8_t* subject_start = subject.start(); + const uint8_t* subject_end = subject_start + subject.length(); + const uint8_t* pos = subject_start; while (limit > 0) { - pos = reinterpret_cast( + pos = reinterpret_cast( memchr(pos, pattern, subject_end - pos)); if (pos == NULL) return; indices->Add(static_cast(pos - subject_start), zone); @@ -2815,9 +2815,10 @@ void FindStringIndicesDispatch(Isolate* isolate, ASSERT(subject_content.IsFlat()); ASSERT(pattern_content.IsFlat()); if (subject_content.IsAscii()) { - Vector subject_vector = subject_content.ToAsciiVector(); + Vector subject_vector = subject_content.ToOneByteVector(); if (pattern_content.IsAscii()) { - Vector pattern_vector = pattern_content.ToAsciiVector(); + Vector pattern_vector = + pattern_content.ToOneByteVector(); if (pattern_vector.length() == 1) { FindAsciiStringIndices(subject_vector, pattern_vector[0], @@ -2843,7 +2844,8 @@ void FindStringIndicesDispatch(Isolate* isolate, } else { Vector subject_vector = subject_content.ToUC16Vector(); if (pattern_content.IsAscii()) { - Vector pattern_vector = pattern_content.ToAsciiVector(); + Vector pattern_vector = + pattern_content.ToOneByteVector(); if (pattern_vector.length() == 1) { FindTwoByteStringIndices(subject_vector, pattern_vector[0], @@ -3325,10 +3327,10 @@ int Runtime::StringMatch(Isolate* isolate, // dispatch on type of strings if (seq_pat.IsAscii()) { - Vector pat_vector = seq_pat.ToAsciiVector(); + Vector pat_vector = seq_pat.ToOneByteVector(); if (seq_sub.IsAscii()) { return SearchString(isolate, - seq_sub.ToAsciiVector(), + seq_sub.ToOneByteVector(), pat_vector, start_index); } @@ -3340,7 +3342,7 @@ int Runtime::StringMatch(Isolate* isolate, Vector pat_vector = seq_pat.ToUC16Vector(); if (seq_sub.IsAscii()) { return SearchString(isolate, - seq_sub.ToAsciiVector(), + seq_sub.ToOneByteVector(), pat_vector, start_index); } @@ -3435,9 +3437,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) { String::FlatContent pat_content = pat->GetFlatContent(); if (pat_content.IsAscii()) { - Vector pat_vector = pat_content.ToAsciiVector(); + Vector pat_vector = pat_content.ToOneByteVector(); if (sub_content.IsAscii()) { - position = StringMatchBackwards(sub_content.ToAsciiVector(), + position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector, start_index); } else { @@ -3448,7 +3450,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) { } else { Vector pat_vector = pat_content.ToUC16Vector(); if (sub_content.IsAscii()) { - position = StringMatchBackwards(sub_content.ToAsciiVector(), + position = StringMatchBackwards(sub_content.ToOneByteVector(), pat_vector, start_index); } else { @@ -5002,7 +5004,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_Typeof) { } -static bool AreDigits(const char*s, int from, int to) { +static bool AreDigits(const uint8_t*s, int from, int to) { for (int i = from; i < to; i++) { if (s[i] < '0' || s[i] > '9') return false; } @@ -5011,7 +5013,7 @@ static bool AreDigits(const char*s, int from, int to) { } -static int ParseDecimalInteger(const char*s, int from, int to) { +static int ParseDecimalInteger(const uint8_t*s, int from, int to) { ASSERT(to - from < 10); // Overflow is not possible. ASSERT(from < to); int d = s[from] - '0'; @@ -5035,7 +5037,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToNumber) { if (subject->IsSeqOneByteString()) { if (len == 0) return Smi::FromInt(0); - char const* data = SeqOneByteString::cast(subject)->GetChars(); + uint8_t const* data = SeqOneByteString::cast(subject)->GetChars(); bool minus = (data[0] == '-'); int start_pos = (minus ? 1 : 0); @@ -5530,8 +5532,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) { return QuoteJsonString(isolate, flat.ToUC16Vector()); } else { - return QuoteJsonString(isolate, - flat.ToAsciiVector()); + return QuoteJsonString( + isolate, + flat.ToOneByteVector()); } } @@ -5553,8 +5556,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) { return QuoteJsonString(isolate, flat.ToUC16Vector()); } else { - return QuoteJsonString(isolate, - flat.ToAsciiVector()); + return QuoteJsonString( + isolate, + flat.ToOneByteVector()); } } @@ -5595,9 +5599,10 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate, write_cursor, content.ToUC16Vector()); } else { - write_cursor = WriteQuoteJsonString(isolate, - write_cursor, - content.ToAsciiVector()); + write_cursor = + WriteQuoteJsonString(isolate, + write_cursor, + content.ToOneByteVector()); } } *(write_cursor++) = ']'; @@ -5950,7 +5955,9 @@ MUST_USE_RESULT static MaybeObject* ConvertCase( } SeqOneByteString* result = SeqOneByteString::cast(o); bool has_changed_character = ConvertTraits::AsciiConverter::Convert( - result->GetChars(), SeqOneByteString::cast(s)->GetChars(), length); + reinterpret_cast(result->GetChars()), + reinterpret_cast(SeqOneByteString::cast(s)->GetChars()), + length); return has_changed_character ? result : s; } #endif @@ -6112,7 +6119,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { // not in the cache and fills the remainder with smi zeros. Returns // the length of the successfully copied prefix. static int CopyCachedAsciiCharsToArray(Heap* heap, - const char* chars, + const uint8_t* chars, FixedArray* elements, int length) { AssertNoAllocation no_gc; @@ -6163,7 +6170,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) { elements = Handle(FixedArray::cast(obj), isolate); String::FlatContent content = s->GetFlatContent(); if (content.IsAscii()) { - Vector chars = content.ToAsciiVector(); + Vector chars = content.ToOneByteVector(); // Note, this will initialize all elements (not only the prefix) // to prevent GC from seeing partially initialized array. position = CopyCachedAsciiCharsToArray(isolate->heap(), @@ -6746,12 +6753,13 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_SparseJoinWithSeparator) { if (result_allocation->IsFailure()) return result_allocation; SeqOneByteString* result_string = SeqOneByteString::cast(result_allocation->ToObjectUnchecked()); - JoinSparseArrayWithSeparator(elements, - elements_length, - array_length, - separator, - Vector(result_string->GetChars(), - string_length)); + JoinSparseArrayWithSeparator(elements, + elements_length, + array_length, + separator, + Vector( + result_string->GetChars(), + string_length)); return result_string; } else { MaybeObject* result_allocation = @@ -6999,9 +7007,9 @@ static Object* FlatStringCompare(String* x, String* y) { String::FlatContent x_content = x->GetFlatContent(); String::FlatContent y_content = y->GetFlatContent(); if (x_content.IsAscii()) { - Vector x_chars = x_content.ToAsciiVector(); + Vector x_chars = x_content.ToOneByteVector(); if (y_content.IsAscii()) { - Vector y_chars = y_content.ToAsciiVector(); + Vector y_chars = y_content.ToOneByteVector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } else { Vector y_chars = y_content.ToUC16Vector(); @@ -7010,7 +7018,7 @@ static Object* FlatStringCompare(String* x, String* y) { } else { Vector x_chars = x_content.ToUC16Vector(); if (y_content.IsAscii()) { - Vector y_chars = y_content.ToAsciiVector(); + Vector y_chars = y_content.ToOneByteVector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } else { Vector y_chars = y_content.ToUC16Vector(); @@ -8960,7 +8968,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) { bool result; String::FlatContent str_content = str->GetFlatContent(); if (str_content.IsAscii()) { - result = DateParser::Parse(str_content.ToAsciiVector(), + result = DateParser::Parse(str_content.ToOneByteVector(), output_array, isolate->unicode_cache()); } else { @@ -13418,8 +13426,8 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) { CONVERT_ARG_CHECKED(JSArray, elms, 1); String::FlatContent format_content = format->GetFlatContent(); RUNTIME_ASSERT(format_content.IsAscii()); - Vector chars = format_content.ToAsciiVector(); - LOGGER->LogRuntime(chars, elms); + Vector chars = format_content.ToOneByteVector(); + LOGGER->LogRuntime(Vector::cast(chars), elms); return isolate->heap()->undefined_value(); } diff --git a/src/string-search.h b/src/string-search.h index ae5f60809d..86237f3aeb 100644 --- a/src/string-search.h +++ b/src/string-search.h @@ -53,7 +53,11 @@ class StringSearchBase { // a potentially less efficient searching, but is a safe approximation. // For needles using only characters in the same Unicode 256-code point page, // there is no search speed degradation. +#ifndef ENABLE_LATIN_1 static const int kAsciiAlphabetSize = 128; +#else + static const int kAsciiAlphabetSize = 256; +#endif static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; // Bad-char shift table stored in the state. It's length is the alphabet size. @@ -61,7 +65,7 @@ class StringSearchBase { // to compensate for the algorithmic overhead compared to simple brute force. static const int kBMMinPatternLength = 7; - static inline bool IsOneByteString(Vector string) { + static inline bool IsOneByteString(Vector string) { return true; } @@ -150,13 +154,25 @@ class StringSearch : private StringSearchBase { void PopulateBoyerMooreTable(); + static inline bool exceedsOneByte(uint8_t c) { +#ifdef ENABLE_LATIN_1 + return false; +#else + return c > String::kMaxOneByteCharCodeU; +#endif + } + + static inline bool exceedsOneByte(uint16_t c) { + return c > String::kMaxOneByteCharCodeU; + } + static inline int CharOccurrence(int* bad_char_occurrence, SubjectChar char_code) { if (sizeof(SubjectChar) == 1) { return bad_char_occurrence[static_cast(char_code)]; } if (sizeof(PatternChar) == 1) { - if (static_cast(char_code) > String::kMaxOneByteCharCodeU) { + if (exceedsOneByte(char_code)) { return -1; } return bad_char_occurrence[static_cast(char_code)]; @@ -223,8 +239,7 @@ int StringSearch::SingleCharSearch( return static_cast(pos - subject.start()); } else { if (sizeof(PatternChar) > sizeof(SubjectChar)) { - if (static_cast(pattern_first_char) > - String::kMaxOneByteCharCodeU) { + if (exceedsOneByte(pattern_first_char)) { return -1; } } diff --git a/src/v8conversions.cc b/src/v8conversions.cc index 26a4868a8d..900b62d10b 100644 --- a/src/v8conversions.cc +++ b/src/v8conversions.cc @@ -85,8 +85,8 @@ double StringToDouble(UnicodeCache* unicode_cache, StringShape shape(str); // TODO(dcarney): Use a Visitor here. if (shape.IsSequentialAscii()) { - const char* begin = SeqOneByteString::cast(str)->GetChars(); - const char* end = begin + str->length(); + const uint8_t* begin = SeqOneByteString::cast(str)->GetChars(); + const uint8_t* end = begin + str->length(); return InternalStringToDouble(unicode_cache, begin, end, flags, empty_string_val); } else if (shape.IsSequentialTwoByte()) { @@ -112,8 +112,8 @@ double StringToInt(UnicodeCache* unicode_cache, StringShape shape(str); // TODO(dcarney): Use a Visitor here. if (shape.IsSequentialAscii()) { - const char* begin = SeqOneByteString::cast(str)->GetChars(); - const char* end = begin + str->length(); + const uint8_t* begin = SeqOneByteString::cast(str)->GetChars(); + const uint8_t* end = begin + str->length(); return InternalStringToInt(unicode_cache, begin, end, radix); } else if (shape.IsSequentialTwoByte()) { const uc16* begin = SeqTwoByteString::cast(str)->GetChars(); diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index c93340f14d..3f17d03b92 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -5487,16 +5487,32 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm, // Compare lengths (precomputed). __ bind(&compare_lengths); __ SmiTest(length_difference); +#ifndef ENABLE_LATIN_1 __ j(not_zero, &result_not_equal, Label::kNear); +#else + Label length_not_equal; + __ j(not_zero, &length_not_equal, Label::kNear); +#endif // Result is EQUAL. __ Move(rax, Smi::FromInt(EQUAL)); __ ret(0); Label result_greater; +#ifdef ENABLE_LATIN_1 + Label result_less; + __ bind(&length_not_equal); + __ j(greater, &result_greater, Label::kNear); + __ jmp(&result_less, Label::kNear); +#endif __ bind(&result_not_equal); // Unequal comparison of left to right, either character or length. +#ifndef ENABLE_LATIN_1 __ j(greater, &result_greater, Label::kNear); +#else + __ j(above, &result_greater, Label::kNear); + __ bind(&result_less); +#endif // Result is LESS. __ Move(rax, Smi::FromInt(LESS)); diff --git a/src/x64/regexp-macro-assembler-x64.cc b/src/x64/regexp-macro-assembler-x64.cc index 152aebb910..f5b5e954ad 100644 --- a/src/x64/regexp-macro-assembler-x64.cc +++ b/src/x64/regexp-macro-assembler-x64.cc @@ -393,8 +393,17 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase( __ j(not_equal, on_no_match); // Definitely not equal. __ subb(rax, Immediate('a')); __ cmpb(rax, Immediate('z' - 'a')); +#ifndef ENABLE_LATIN_1 __ j(above, on_no_match); // Weren't letters anyway. - +#else + __ j(below_equal, &loop_increment); // In range 'a'-'z'. + // Latin-1: Check for values in range [224,254] but not 247. + __ subb(rax, Immediate(224 - 'a')); + __ cmpb(rax, Immediate(254 - 224)); + __ j(above, on_no_match); // Weren't Latin-1 letters. + __ cmpb(rax, Immediate(247 - 224)); // Check for 247. + __ j(equal, on_no_match); +#endif __ bind(&loop_increment); // Increment pointers into match and capture strings. __ addq(r11, Immediate(1)); diff --git a/test/cctest/test-hashing.cc b/test/cctest/test-hashing.cc index e607e8496e..605b59b5f4 100644 --- a/test/cctest/test-hashing.cc +++ b/test/cctest/test-hashing.cc @@ -153,22 +153,25 @@ void generate(MacroAssembler* masm, uint32_t key) { void check(i::Vector string) { - v8::HandleScope scope; + Isolate* isolate = Isolate::Current(); + Factory* factory = isolate->factory(); + HandleScope scope(isolate); + v8::internal::byte buffer[2048]; - MacroAssembler masm(Isolate::Current(), buffer, sizeof buffer); + MacroAssembler masm(isolate, buffer, sizeof buffer); generate(&masm, string); CodeDesc desc; masm.GetCode(&desc); - Code* code = Code::cast(HEAP->CreateCode( - desc, - Code::ComputeFlags(Code::STUB), - Handle(HEAP->undefined_value()))->ToObjectChecked()); + Handle undefined(isolate->heap()->undefined_value(), isolate); + Handle code = factory->NewCode(desc, + Code::ComputeFlags(Code::STUB), + undefined); CHECK(code->IsCode()); HASH_FUNCTION hash = FUNCTION_CAST(code->entry()); - Handle v8_string = FACTORY->NewStringFromOneByte(string); + Handle v8_string = factory->NewStringFromOneByte(string); v8_string->set_hash_field(String::kEmptyHashField); #ifdef USE_SIMULATOR uint32_t codegen_hash = @@ -187,7 +190,10 @@ void check(i::Vector s) { void check(uint32_t key) { - v8::HandleScope scope; + Isolate* isolate = Isolate::Current(); + Factory* factory = isolate->factory(); + HandleScope scope(isolate); + v8::internal::byte buffer[2048]; MacroAssembler masm(Isolate::Current(), buffer, sizeof buffer); @@ -195,10 +201,10 @@ void check(uint32_t key) { CodeDesc desc; masm.GetCode(&desc); - Code* code = Code::cast(HEAP->CreateCode( - desc, - Code::ComputeFlags(Code::STUB), - Handle(HEAP->undefined_value()))->ToObjectChecked()); + Handle undefined(isolate->heap()->undefined_value(), isolate); + Handle code = factory->NewCode(desc, + Code::ComputeFlags(Code::STUB), + undefined); CHECK(code->IsCode()); HASH_FUNCTION hash = FUNCTION_CAST(code->entry()); @@ -209,9 +215,7 @@ void check(uint32_t key) { uint32_t codegen_hash = hash(); #endif - uint32_t runtime_hash = ComputeIntegerHash( - key, - Isolate::Current()->heap()->HashSeed()); + uint32_t runtime_hash = ComputeIntegerHash(key, isolate->heap()->HashSeed()); CHECK(runtime_hash == codegen_hash); } @@ -229,12 +233,12 @@ static uint32_t PseudoRandom(uint32_t i, uint32_t j) { TEST(StringHash) { if (env.IsEmpty()) env = v8::Context::New(); - for (int a = 0; a < String::kMaxOneByteCharCode; a++) { + for (uint8_t a = 0; a < String::kMaxOneByteCharCode; a++) { // Numbers are hashed differently. if (a >= '0' && a <= '9') continue; - for (int b = 0; b < String::kMaxOneByteCharCode; b++) { + for (uint8_t b = 0; b < String::kMaxOneByteCharCode; b++) { if (b >= '0' && b <= '9') continue; - check_twochars(static_cast(a), static_cast(b)); + check_twochars(a, b); } } check(i::Vector("*", 1)); diff --git a/test/mjsunit/regexp-capture-3.js b/test/mjsunit/regexp-capture-3.js index b676f01c2c..4c27ea454b 100755 --- a/test/mjsunit/regexp-capture-3.js +++ b/test/mjsunit/regexp-capture-3.js @@ -165,23 +165,22 @@ function NoHang(re) { "This is an ASCII string that could take forever".match(re); } - -NoHang(/(((.*)*)*x)å/); // Continuation after loop is filtered, so is loop. -NoHang(/(((.*)*)*å)foo/); // Body of loop filtered. -NoHang(/å(((.*)*)*x)/); // Everything after a filtered character is filtered. -NoHang(/(((.*)*)*x)å/); // Everything before a filtered character is filtered. -NoHang(/[æøå](((.*)*)*x)/); // Everything after a filtered class is filtered. -NoHang(/(((.*)*)*x)[æøå]/); // Everything before a filtered class is filtered. -NoHang(/[^\x00-\x7f](((.*)*)*x)/); // After negated class. -NoHang(/(((.*)*)*x)[^\x00-\x7f]/); // Before negated class. -NoHang(/(?!(((.*)*)*x)å)foo/); // Negative lookahead is filtered. -NoHang(/(?!(((.*)*)*x))å/); // Continuation branch of negative lookahead. -NoHang(/(?=(((.*)*)*x)å)foo/); // Positive lookahead is filtered. -NoHang(/(?=(((.*)*)*x))å/); // Continuation branch of positive lookahead. -NoHang(/(?=å)(((.*)*)*x)/); // Positive lookahead also prunes continuation. -NoHang(/(æ|ø|å)(((.*)*)*x)/); // All branches of alternation are filtered. -NoHang(/(a|b|(((.*)*)*x))å/); // 1 out of 3 branches pruned. -NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/); // 2 out of 3 branches pruned. +NoHang(/(((.*)*)*x)Ā/); // Continuation after loop is filtered, so is loop. +NoHang(/(((.*)*)*Ā)foo/); // Body of loop filtered. +NoHang(/Ā(((.*)*)*x)/); // Everything after a filtered character is filtered. +NoHang(/(((.*)*)*x)Ā/); // Everything before a filtered character is filtered. +NoHang(/[ćăĀ](((.*)*)*x)/); // Everything after a filtered class is filtered. +NoHang(/(((.*)*)*x)[ćăĀ]/); // Everything before a filtered class is filtered. +NoHang(/[^\x00-\xff](((.*)*)*x)/); // After negated class. +NoHang(/(((.*)*)*x)[^\x00-\xff]/); // Before negated class. +NoHang(/(?!(((.*)*)*x)Ā)foo/); // Negative lookahead is filtered. +NoHang(/(?!(((.*)*)*x))Ā/); // Continuation branch of negative lookahead. +NoHang(/(?=(((.*)*)*x)Ā)foo/); // Positive lookahead is filtered. +NoHang(/(?=(((.*)*)*x))Ā/); // Continuation branch of positive lookahead. +NoHang(/(?=Ā)(((.*)*)*x)/); // Positive lookahead also prunes continuation. +NoHang(/(æ|ø|Ā)(((.*)*)*x)/); // All branches of alternation are filtered. +NoHang(/(a|b|(((.*)*)*x))Ā/); // 1 out of 3 branches pruned. +NoHang(/(a|(((.*)*)*x)ă|(((.*)*)*x)Ā)/); // 2 out of 3 branches pruned. var s = "Don't prune based on a repetition of length 0"; assertEquals(null, s.match(/å{1,1}prune/)); diff --git a/test/mjsunit/regress/regress-latin-1.js b/test/mjsunit/regress/regress-latin-1.js new file mode 100644 index 0000000000..b1f006d95f --- /dev/null +++ b/test/mjsunit/regress/regress-latin-1.js @@ -0,0 +1,59 @@ +// Copyright 2013 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +assertEquals(String.fromCharCode(97, 220, 256), 'a' + '\u00DC' + '\u0100'); +assertEquals(String.fromCharCode(97, 220, 256), 'a\u00DC\u0100'); + +assertEquals(0x80, JSON.stringify("\x80").charCodeAt(1)); + +assertEquals(['a', 'b', '\xdc'], ['b', '\xdc', 'a'].sort()); + +assertEquals(['\xfc\xdc', '\xfc'], new RegExp('(\xdc)\\1', 'i').exec('\xfc\xdc')); +// Same test but for all values in Latin-1 range. +var total_lo = 0; +for (var i = 0; i < 0xff; i++) { + var base = String.fromCharCode(i); + var escaped = base; + if (base == '(' || base == ')' || base == '*' || base == '+' || + base == '?' || base == '[' || base == ']' || base == '\\' || + base == '$' || base == '^' || base == '|') { + escaped = '\\' + base; + } + var lo = String.fromCharCode(i + 0x20); + base_result = new RegExp('(' + escaped + ')\\1', 'i').exec(base + base); + assertEquals( base_result, [base + base, base]); + lo_result = new RegExp('(' + escaped + ')\\1', 'i').exec(base + lo); + if (base.toLowerCase() == lo) { + assertEquals([base + lo, base], lo_result); + total_lo++; + } else { + assertEquals(null, lo_result); + } +} +// Should have hit the branch for the following char codes: +// [A-Z], [192-222] but not 215 +assertEquals((90-65+1)+(222-192-1+1), total_lo);