Continues Latin-1 support. All tests pass with ENABLE_LATIN_1 flag.

R=yangguo@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/11818025
Patch from Dan Carney <dcarney@google.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13344 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2013-01-09 15:47:53 +00:00
parent 7541dff7bb
commit e41c17084f
25 changed files with 343 additions and 207 deletions

View File

@ -4149,7 +4149,10 @@ int String::WriteAscii(char* buffer,
// WriteToFlat is faster than using the StringCharacterStream.
if (length == -1) length = str->length() + 1;
int len = i::Min(length, str->length() - start);
i::String::WriteToFlat(*str, buffer, start, start + len);
i::String::WriteToFlat(*str,
reinterpret_cast<uint8_t*>(buffer),
start,
start + len);
if (!(options & PRESERVE_ASCII_NULL)) {
for (int i = 0; i < len; i++) {
if (buffer[i] == '\0') buffer[i] = ' ';

View File

@ -337,8 +337,17 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
__ b(ne, &fail);
__ sub(r3, r3, Operand('a'));
__ cmp(r3, Operand('z' - 'a')); // Is r3 a lowercase letter?
#ifndef ENABLE_LATIN_1
__ b(hi, &fail);
#else
__ b(ls, &loop_check); // In range 'a'-'z'.
// Latin-1: Check for values in range [224,254] but not 247.
__ sub(r3, r3, Operand(224 - 'a'));
__ cmp(r3, Operand(254 - 224));
__ b(hi, &fail); // Weren't Latin-1 letters.
__ cmp(r3, Operand(247 - 224)); // Check for 247.
__ b(eq, &fail);
#endif
__ bind(&loop_check);
__ cmp(r0, r1);

View File

@ -94,10 +94,10 @@ v8::Handle<v8::Value> ExternalizeStringExtension::Externalize(
"externalizeString() can't externalize twice."));
}
if (string->IsOneByteRepresentation() && !force_two_byte) {
char* data = new char[string->length()];
uint8_t* data = new uint8_t[string->length()];
String::WriteToFlat(*string, data, 0, string->length());
SimpleAsciiStringResource* resource = new SimpleAsciiStringResource(
data, string->length());
reinterpret_cast<char*>(data), string->length());
result = string->MakeExternal(resource);
if (result && !string->IsSymbol()) {
HEAP->external_string_table()->AddString(*string);

View File

@ -423,7 +423,7 @@ static void CalculateLineEnds(Isolate* isolate,
Vector<const SourceChar> src,
bool with_last_line) {
const int src_len = src.length();
StringSearch<char, SourceChar> search(isolate, CStrVector("\n"));
StringSearch<uint8_t, SourceChar> search(isolate, STATIC_ASCII_VECTOR("\n"));
// Find and record line ends.
int position = 0;
@ -457,7 +457,7 @@ Handle<FixedArray> CalculateLineEnds(Handle<String> src,
if (content.IsAscii()) {
CalculateLineEnds(isolate,
&line_ends,
content.ToAsciiVector(),
content.ToOneByteVector(),
with_last_line);
} else {
CalculateLineEnds(isolate,

View File

@ -3339,9 +3339,9 @@ MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
{ MaybeObject* maybe_result = heap->AllocateRawOneByteString(2);
if (!maybe_result->ToObject(&result)) return maybe_result;
}
char* dest = SeqOneByteString::cast(result)->GetChars();
dest[0] = static_cast<char>(c1);
dest[1] = static_cast<char>(c2);
uint8_t* dest = SeqOneByteString::cast(result)->GetChars();
dest[0] = static_cast<uint8_t>(c1);
dest[1] = static_cast<uint8_t>(c2);
return result;
} else {
Object* result;
@ -3412,9 +3412,9 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
if (!maybe_result->ToObject(&result)) return maybe_result;
}
// Copy the characters into the new object.
char* dest = SeqOneByteString::cast(result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(result)->GetChars();
// Copy first part.
const char* src;
const uint8_t* src;
if (first->IsExternalString()) {
src = ExternalAsciiString::cast(first)->GetChars();
} else {
@ -3436,7 +3436,7 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
if (!maybe_result->ToObject(&result)) return maybe_result;
}
// Copy the characters into the new object.
char* dest = SeqOneByteString::cast(result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
isolate_->counters()->string_add_runtime_ext_to_ascii()->Increment();
@ -3513,7 +3513,7 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
// Copy the characters into the new object.
if (is_one_byte) {
ASSERT(string_result->IsOneByteRepresentation());
char* dest = SeqOneByteString::cast(string_result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(string_result)->GetChars();
String::WriteToFlat(buffer, dest, start, end);
} else {
ASSERT(string_result->IsTwoByteRepresentation());
@ -4555,7 +4555,7 @@ MaybeObject* Heap::AllocateStringFromOneByte(Vector<const uint8_t> string,
}
// Copy the characters into the new object.
CopyChars(SeqOneByteString::cast(result)->GetCharsU(),
CopyChars(SeqOneByteString::cast(result)->GetChars(),
string.start(),
length);
return result;
@ -4654,7 +4654,7 @@ template<>
class AllocateInternalSymbolHelper< Vector<const char> > {
public:
static inline void WriteOneByteData(Vector<const char> vector,
char* chars,
uint8_t* chars,
int len) {
// Only works for ascii.
ASSERT(vector.length() == len);
@ -4696,7 +4696,7 @@ class AllocateInternalSymbolHelper< Vector<const char> > {
template<>
class AllocateInternalSymbolHelper<String*> {
public:
static inline void WriteOneByteData(String* s, char* chars, int len) {
static inline void WriteOneByteData(String* s, uint8_t* chars, int len) {
ASSERT(s->length() == len);
String::WriteToFlat(s, chars, 0, len);
}
@ -4806,13 +4806,15 @@ MaybeObject* Heap::AllocateRawOneByteString(int length,
String::cast(result)->set_hash_field(String::kEmptyHashField);
ASSERT_EQ(size, HeapObject::cast(result)->Size());
#ifndef ENABLE_LATIN_1
#ifdef VERIFY_HEAP
if (FLAG_verify_heap) {
// Initialize string's content to ensure ASCII-ness (character range 0-127)
// as required when verifying the heap.
char* dest = SeqOneByteString::cast(result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(result)->GetChars();
memset(dest, 0x0F, length * kCharSize);
}
#endif
#endif
return result;

View File

@ -6478,7 +6478,12 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm,
// Compare lengths - strings up to min-length are equal.
__ bind(&compare_lengths);
__ test(length_delta, length_delta);
#ifndef ENABLE_LATIN_1
__ j(not_zero, &result_not_equal, Label::kNear);
#else
Label length_not_equal;
__ j(not_zero, &length_not_equal, Label::kNear);
#endif
// Result is EQUAL.
STATIC_ASSERT(EQUAL == 0);
@ -6487,8 +6492,19 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm,
__ ret(0);
Label result_greater;
__ bind(&result_not_equal);
#ifdef ENABLE_LATIN_1
Label result_less;
__ bind(&length_not_equal);
__ j(greater, &result_greater, Label::kNear);
__ jmp(&result_less, Label::kNear);
#endif
__ bind(&result_not_equal);
#ifndef ENABLE_LATIN_1
__ j(greater, &result_greater, Label::kNear);
#else
__ j(above, &result_greater, Label::kNear);
__ bind(&result_less);
#endif
// Result is LESS.
__ Set(eax, Immediate(Smi::FromInt(LESS)));

View File

@ -344,7 +344,19 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
__ or_(eax, 0x20); // Convert match character to lower-case.
__ lea(ecx, Operand(eax, -'a'));
__ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter?
__ j(above, &fail);
#ifndef ENABLE_LATIN_1
__ j(above, &fail); // Weren't letters anyway.
#else
Label convert_capture;
__ j(below_equal, &convert_capture); // In range 'a'-'z'.
// Latin-1: Check for values in range [224,254] but not 247.
__ sub(ecx, Immediate(224 - 'a'));
__ cmp(ecx, Immediate(254 - 224));
__ j(above, &fail); // Weren't Latin-1 letters.
__ cmp(ecx, Immediate(247 - 224)); // Check for 247.
__ j(equal, &fail);
__ bind(&convert_capture);
#endif
// Also convert capture character.
__ movzx_b(ecx, Operand(edx, 0));
__ or_(ecx, 0x20);

View File

@ -68,7 +68,7 @@ static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
int from,
int current,
int len,
Vector<const char> subject) {
Vector<const uint8_t> subject) {
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
unsigned int new_char = subject[current++];
@ -617,7 +617,7 @@ RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
uc16 previous_char = '\n';
String::FlatContent subject_content = subject->GetFlatContent();
if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate,
code_base,

View File

@ -536,14 +536,15 @@ void Isolate::PushStackTraceAndDie(unsigned int magic,
unsigned int magic2) {
const int kMaxStackTraceSize = 8192;
Handle<String> trace = StackTraceString();
char buffer[kMaxStackTraceSize];
uint8_t buffer[kMaxStackTraceSize];
int length = Min(kMaxStackTraceSize - 1, trace->length());
String::WriteToFlat(*trace, buffer, 0, length);
buffer[length] = '\0';
// TODO(dcarney): convert buffer to utf8?
OS::PrintError("Stacktrace (%x-%x) %p %p: %s\n",
magic, magic2,
static_cast<void*>(object), static_cast<void*>(map),
buffer);
reinterpret_cast<char*>(buffer));
OS::Abort();
}

View File

@ -441,19 +441,19 @@ Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {
int length = position_ - beg_pos;
double number;
if (seq_ascii) {
Vector<const char> chars(seq_source_->GetChars() + beg_pos, length);
Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length);
number = StringToDouble(isolate()->unicode_cache(),
chars,
Vector<const char>::cast(chars),
NO_FLAGS, // Hex, octal or trailing junk.
OS::nan_value());
} else {
Vector<char> buffer = Vector<char>::New(length);
Vector<uint8_t> buffer = Vector<uint8_t>::New(length);
String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
Vector<const char> result =
Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
length);
Vector<const uint8_t> result =
Vector<const uint8_t>(buffer.start(), length);
number = StringToDouble(isolate()->unicode_cache(),
result,
// TODO(dcarney): Convert StringToDouble to uint_t.
Vector<const char>::cast(result),
NO_FLAGS, // Hex, octal or trailing junk.
0.0);
buffer.Dispose();
@ -627,7 +627,7 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
c0_ = c0;
int beg_pos = position_;
position_ = position;
return SlowScanJsonString<SeqOneByteString, char>(source_,
return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
beg_pos,
position_);
}
@ -651,7 +651,7 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
uint32_t hash = (length <= String::kMaxHashCalcLength)
? StringHasher::GetHashCore(running_hash) : length;
Vector<const uint8_t> string_vector(
seq_source_->GetCharsU() + position_, length);
seq_source_->GetChars() + position_, length);
SymbolTable* symbol_table = isolate()->heap()->symbol_table();
uint32_t capacity = symbol_table->Capacity();
uint32_t entry = SymbolTable::FirstProbe(hash, capacity);
@ -688,7 +688,7 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
position_);
}
} else {
return SlowScanJsonString<SeqOneByteString, char>(source_,
return SlowScanJsonString<SeqOneByteString, uint8_t>(source_,
beg_pos,
position_);
}
@ -699,7 +699,7 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
result = factory()->LookupOneByteSymbol(seq_source_, beg_pos, length);
} else {
result = factory()->NewRawOneByteString(length, pretenure_);
char* dest = SeqOneByteString::cast(*result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(*result)->GetChars();
String::WriteToFlat(*source_, dest, beg_pos, position_);
}
ASSERT_EQ('"', c0_);

View File

@ -60,7 +60,7 @@ class BasicJsonStringifier BASE_EMBEDDED {
template <bool is_ascii, typename Char>
INLINE(void Append_(const Char* chars));
INLINE(void Append(char c)) {
INLINE(void Append(uint8_t c)) {
if (is_ascii_) {
Append_<true>(c);
} else {
@ -68,11 +68,11 @@ class BasicJsonStringifier BASE_EMBEDDED {
}
}
INLINE(void Append(const char* chars)) {
INLINE(void AppendAscii(const char* chars)) {
if (is_ascii_) {
Append_<true>(chars);
Append_<true>(reinterpret_cast<const uint8_t*>(chars));
} else {
Append_<false>(chars);
Append_<false>(reinterpret_cast<const uint8_t*>(chars));
}
}
@ -327,15 +327,15 @@ BasicJsonStringifier::Result BasicJsonStringifier::Serialize_(
switch (Oddball::cast(*object)->kind()) {
case Oddball::kFalse:
if (deferred_string_key) SerializeDeferredKey(comma, key);
Append("false");
AppendAscii("false");
return SUCCESS;
case Oddball::kTrue:
if (deferred_string_key) SerializeDeferredKey(comma, key);
Append("true");
AppendAscii("true");
return SUCCESS;
case Oddball::kNull:
if (deferred_string_key) SerializeDeferredKey(comma, key);
Append("null");
AppendAscii("null");
return SUCCESS;
default:
return UNCHANGED;
@ -412,7 +412,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSValue(
ASSERT(class_name == isolate_->heap()->Boolean_symbol());
Object* value = JSValue::cast(*object)->value();
ASSERT(value->IsBoolean());
Append(value->IsTrue() ? "true" : "false");
AppendAscii(value->IsTrue() ? "true" : "false");
}
return SUCCESS;
}
@ -422,7 +422,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeSmi(Smi* object) {
static const int kBufferSize = 100;
char chars[kBufferSize];
Vector<char> buffer(chars, kBufferSize);
Append(IntToCString(object->value(), buffer));
AppendAscii(IntToCString(object->value(), buffer));
return SUCCESS;
}
@ -430,13 +430,13 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeSmi(Smi* object) {
BasicJsonStringifier::Result BasicJsonStringifier::SerializeDouble(
double number) {
if (isinf(number) || isnan(number)) {
Append("null");
AppendAscii("null");
return SUCCESS;
}
static const int kBufferSize = 100;
char chars[kBufferSize];
Vector<char> buffer(chars, kBufferSize);
Append(DoubleToCString(number, buffer));
AppendAscii(DoubleToCString(number, buffer));
return SUCCESS;
}
@ -476,7 +476,7 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSArray(
SerializeElement(Handle<Object>(elements->get(i), isolate_), i);
if (result == SUCCESS) continue;
if (result == UNCHANGED) {
Append("null");
AppendAscii("null");
} else {
return result;
}
@ -505,12 +505,12 @@ BasicJsonStringifier::Result BasicJsonStringifier::SerializeJSArraySlow(
if (i > 0) Append(',');
Handle<Object> element = Object::GetElement(object, i);
if (element->IsUndefined()) {
Append("null");
AppendAscii("null");
} else {
Result result = SerializeElement(element, i);
if (result == SUCCESS) continue;
if (result == UNCHANGED) {
Append("null");
AppendAscii("null");
} else {
return result;
}
@ -682,8 +682,9 @@ void BasicJsonStringifier::SerializeString_(Handle<String> string) {
if (DoNotEscape(c)) {
Append_<is_ascii, Char>(c);
} else {
Append_<is_ascii, char>(
&JsonEscapeTable[c * kJsonEscapeTableEntrySize]);
Append_<is_ascii, uint8_t>(
reinterpret_cast<const uint8_t*>(
&JsonEscapeTable[c * kJsonEscapeTableEntrySize]));
}
// If GC moved the string, we need to refresh the vector.
if (*string != string_location) {
@ -693,27 +694,22 @@ void BasicJsonStringifier::SerializeString_(Handle<String> string) {
}
}
Append_<is_ascii, char>('"');
Append_<is_ascii, uint8_t>('"');
}
template <>
bool BasicJsonStringifier::DoNotEscape(char c) {
return c >= '#' && c <= '~' && c != '\\';
}
template <>
bool BasicJsonStringifier::DoNotEscape(uc16 c) {
template <typename Char>
bool BasicJsonStringifier::DoNotEscape(Char c) {
return (c >= 0x80) || (c >= '#' && c <= '~' && c != '\\');
}
template <>
Vector<const char> BasicJsonStringifier::GetCharVector(Handle<String> string) {
Vector<const uint8_t> BasicJsonStringifier::GetCharVector(
Handle<String> string) {
String::FlatContent flat = string->GetFlatContent();
ASSERT(flat.IsAscii());
return flat.ToAsciiVector();
return flat.ToOneByteVector();
}
@ -730,14 +726,14 @@ void BasicJsonStringifier::SerializeString(Handle<String> object) {
String::FlatContent flat = object->GetFlatContent();
if (is_ascii_) {
if (flat.IsAscii()) {
SerializeString_<true, char>(object);
SerializeString_<true, uint8_t>(object);
} else {
ChangeEncoding();
SerializeString(object);
}
} else {
if (flat.IsAscii()) {
SerializeString_<false, char>(object);
SerializeString_<false, uint8_t>(object);
} else {
SerializeString_<false, uc16>(object);
}

View File

@ -309,16 +309,16 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
index = (needle_content.IsAscii()
? (subject_content.IsAscii()
? SearchString(isolate,
subject_content.ToAsciiVector(),
needle_content.ToAsciiVector(),
subject_content.ToOneByteVector(),
needle_content.ToOneByteVector(),
index)
: SearchString(isolate,
subject_content.ToUC16Vector(),
needle_content.ToAsciiVector(),
needle_content.ToOneByteVector(),
index))
: (subject_content.IsAscii()
? SearchString(isolate,
subject_content.ToAsciiVector(),
subject_content.ToOneByteVector(),
needle_content.ToUC16Vector(),
index)
: SearchString(isolate,

View File

@ -384,7 +384,10 @@ class Logger::NameBuffer {
if (str == NULL) return;
if (str->HasOnlyAsciiChars()) {
int utf8_length = Min(str->length(), kUtf8BufferSize - utf8_pos_);
String::WriteToFlat(str, utf8_buffer_ + utf8_pos_, 0, utf8_length);
String::WriteToFlat(str,
reinterpret_cast<uint8_t*>(utf8_buffer_ + utf8_pos_),
0,
utf8_length);
utf8_pos_ += utf8_length;
return;
}

View File

@ -2551,30 +2551,25 @@ void String::Visit(
switch (type & (kStringRepresentationMask | kStringEncodingMask)) {
case kSeqStringTag | kOneByteStringTag:
visitor.VisitOneByteString(
reinterpret_cast<const uint8_t*>(
SeqOneByteString::cast(string)->GetChars()) + slice_offset,
SeqOneByteString::cast(string)->GetChars() + slice_offset,
length - offset);
return;
case kSeqStringTag | kTwoByteStringTag:
visitor.VisitTwoByteString(
reinterpret_cast<const uint16_t*>(
SeqTwoByteString::cast(string)->GetChars()) + slice_offset,
SeqTwoByteString::cast(string)->GetChars() + slice_offset,
length - offset);
return;
case kExternalStringTag | kOneByteStringTag:
visitor.VisitOneByteString(
reinterpret_cast<const uint8_t*>(
ExternalAsciiString::cast(string)->GetChars()) + slice_offset,
ExternalAsciiString::cast(string)->GetChars() + slice_offset,
length - offset);
return;
case kExternalStringTag | kTwoByteStringTag:
visitor.VisitTwoByteString(
reinterpret_cast<const uint16_t*>(
ExternalTwoByteString::cast(string)->GetChars())
+ slice_offset,
ExternalTwoByteString::cast(string)->GetChars() + slice_offset,
length - offset);
return;
@ -2621,12 +2616,7 @@ Address SeqOneByteString::GetCharsAddress() {
}
char* SeqOneByteString::GetChars() {
return reinterpret_cast<char*>(GetCharsAddress());
}
uint8_t* SeqOneByteString::GetCharsU() {
uint8_t* SeqOneByteString::GetChars() {
return reinterpret_cast<uint8_t*>(GetCharsAddress());
}
@ -2737,8 +2727,8 @@ void ExternalAsciiString::set_resource(
}
const char* ExternalAsciiString::GetChars() {
return resource()->data();
const uint8_t* ExternalAsciiString::GetChars() {
return reinterpret_cast<const uint8_t*>(resource()->data());
}

View File

@ -903,7 +903,7 @@ MaybeObject* String::SlowTryFlatten(PretenureFlag pretenure) {
result = String::cast(object);
String* first = cs->first();
int first_length = first->length();
char* dest = SeqOneByteString::cast(result)->GetChars();
uint8_t* dest = SeqOneByteString::cast(result)->GetChars();
WriteToFlat(first, dest, 0, first_length);
String* second = cs->second();
WriteToFlat(second,
@ -6561,13 +6561,13 @@ String::FlatContent String::GetFlatContent() {
shape.representation_tag() != kSlicedStringTag);
}
if (shape.encoding_tag() == kOneByteStringTag) {
const char* start;
const uint8_t* start;
if (shape.representation_tag() == kSeqStringTag) {
start = SeqOneByteString::cast(string)->GetChars();
} else {
start = ExternalAsciiString::cast(string)->GetChars();
}
return FlatContent(Vector<const char>(start + offset, length));
return FlatContent(Vector<const uint8_t>(start + offset, length));
} else {
ASSERT(shape.encoding_tag() == kTwoByteStringTag);
const uc16* start;
@ -6770,7 +6770,7 @@ void FlatStringReader::PostGarbageCollection() {
ASSERT(content.IsFlat());
is_ascii_ = content.IsAscii();
if (is_ascii_) {
start_ = content.ToAsciiVector().start();
start_ = content.ToOneByteVector().start();
} else {
start_ = content.ToUC16Vector().start();
}
@ -7254,8 +7254,8 @@ bool String::SlowEquals(String* other) {
// TODO(dcarney): Compare all types of flat strings with a Visitor.
if (StringShape(lhs).IsSequentialAscii() &&
StringShape(rhs).IsSequentialAscii()) {
const char* str1 = SeqOneByteString::cast(lhs)->GetChars();
const char* str2 = SeqOneByteString::cast(rhs)->GetChars();
const uint8_t* str1 = SeqOneByteString::cast(lhs)->GetChars();
const uint8_t* str2 = SeqOneByteString::cast(rhs)->GetChars();
return CompareRawStringContents(str1, str2, len);
}
@ -11525,7 +11525,7 @@ class SubStringOneByteSymbolKey : public HashTableKey {
uint32_t Hash() {
ASSERT(length_ >= 0);
ASSERT(from_ + length_ <= string_->length());
char* chars = string_->GetChars() + from_;
uint8_t* chars = string_->GetChars() + from_;
hash_field_ = StringHasher::HashSequentialString(
chars, length_, string_->GetHeap()->HashSeed());
uint32_t result = hash_field_ >> String::kHashShift;
@ -11539,15 +11539,13 @@ class SubStringOneByteSymbolKey : public HashTableKey {
}
bool IsMatch(Object* string) {
Vector<const uint8_t> chars(string_->GetCharsU() + from_, length_);
Vector<const uint8_t> chars(string_->GetChars() + from_, length_);
return String::cast(string)->IsOneByteEqualTo(chars);
}
MaybeObject* AsObject() {
if (hash_field_ == 0) Hash();
Vector<const uint8_t> chars(
reinterpret_cast<uint8_t*>(string_->GetChars()) + from_,
length_);
Vector<const uint8_t> chars(string_->GetChars() + from_, length_);
return HEAP->AllocateOneByteSymbol(chars, hash_field_);
}

View File

@ -7090,13 +7090,6 @@ class String: public HeapObject {
// Returns true if the structure contains two-byte content.
bool IsTwoByte() { return state_ == TWO_BYTE; }
// TODO(dcarney): Remove this function.
// Return the ASCII content of the string. Only use if IsAscii() returns
// true.
Vector<const char> ToAsciiVector() {
ASSERT_EQ(ASCII, state_);
return Vector<const char>::cast(buffer_);
}
// Return the one byte content of the string. Only use if IsAscii() returns
// true.
Vector<const uint8_t> ToOneByteVector() {
@ -7114,15 +7107,15 @@ class String: public HeapObject {
enum State { NON_FLAT, ASCII, TWO_BYTE };
// Constructors only used by String::GetFlatContent().
explicit FlatContent(Vector<const char> chars)
: buffer_(Vector<const byte>::cast(chars)),
explicit FlatContent(Vector<const uint8_t> chars)
: buffer_(chars),
state_(ASCII) { }
explicit FlatContent(Vector<const uc16> chars)
: buffer_(Vector<const byte>::cast(chars)),
state_(TWO_BYTE) { }
FlatContent() : buffer_(), state_(NON_FLAT) { }
Vector<const byte> buffer_;
Vector<const uint8_t> buffer_;
State state_;
friend class String;
@ -7391,6 +7384,11 @@ class String: public HeapObject {
return NonAsciiStart(chars, length) >= length;
}
static inline bool IsAscii(const uint8_t* chars, int length) {
return
NonAsciiStart(reinterpret_cast<const char*>(chars), length) >= length;
}
static inline int NonOneByteStart(const uc16* chars, int length) {
const uc16* limit = chars + length;
const uc16* start = chars;
@ -7467,9 +7465,7 @@ class SeqOneByteString: public SeqString {
// Get the address of the characters in this string.
inline Address GetCharsAddress();
// TODO(dcarney): remove GetChars and rename GetCharsU to GetChars.
inline char* GetChars();
inline uint8_t* GetCharsU();
inline uint8_t* GetChars();
// Casting
static inline SeqOneByteString* cast(Object* obj);
@ -7682,7 +7678,7 @@ class ExternalAsciiString: public ExternalString {
// which the pointer cache has to be refreshed.
inline void update_data_cache();
inline const char* GetChars();
inline const uint8_t* GetChars();
// Dispatched behavior.
inline uint16_t ExternalAsciiStringGet(int index);

View File

@ -80,11 +80,11 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
if (subject->IsOneByteRepresentation()) {
const byte* address;
if (StringShape(subject).IsExternal()) {
const char* data = ExternalAsciiString::cast(subject)->GetChars();
const uint8_t* data = ExternalAsciiString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
} else {
ASSERT(subject->IsSeqOneByteString());
char* data = SeqOneByteString::cast(subject)->GetChars();
const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
address = reinterpret_cast<const byte*>(data);
}
return address + start_index;

View File

@ -2407,7 +2407,7 @@ class ReplacementStringBuilder {
if (is_ascii_) {
Handle<SeqOneByteString> seq = NewRawOneByteString(character_count_);
AssertNoAllocation no_alloc;
char* char_buffer = seq->GetChars();
uint8_t* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_,
char_buffer,
*array_builder_.array(),
@ -2664,7 +2664,7 @@ bool CompiledReplacement::Compile(Handle<String> replacement,
bool simple = false;
if (content.IsAscii()) {
simple = ParseReplacementPattern(&parts_,
content.ToAsciiVector(),
content.ToOneByteVector(),
capture_count,
subject_length,
zone());
@ -2740,7 +2740,7 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
}
void FindAsciiStringIndices(Vector<const char> subject,
void FindAsciiStringIndices(Vector<const uint8_t> subject,
char pattern,
ZoneList<int>* indices,
unsigned int limit,
@ -2748,11 +2748,11 @@ void FindAsciiStringIndices(Vector<const char> subject,
ASSERT(limit > 0);
// Collect indices of pattern in subject using memchr.
// Stop after finding at most limit values.
const char* subject_start = reinterpret_cast<const char*>(subject.start());
const char* subject_end = subject_start + subject.length();
const char* pos = subject_start;
const uint8_t* subject_start = subject.start();
const uint8_t* subject_end = subject_start + subject.length();
const uint8_t* pos = subject_start;
while (limit > 0) {
pos = reinterpret_cast<const char*>(
pos = reinterpret_cast<const uint8_t*>(
memchr(pos, pattern, subject_end - pos));
if (pos == NULL) return;
indices->Add(static_cast<int>(pos - subject_start), zone);
@ -2815,9 +2815,10 @@ void FindStringIndicesDispatch(Isolate* isolate,
ASSERT(subject_content.IsFlat());
ASSERT(pattern_content.IsFlat());
if (subject_content.IsAscii()) {
Vector<const char> subject_vector = subject_content.ToAsciiVector();
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (pattern_content.IsAscii()) {
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
Vector<const uint8_t> pattern_vector =
pattern_content.ToOneByteVector();
if (pattern_vector.length() == 1) {
FindAsciiStringIndices(subject_vector,
pattern_vector[0],
@ -2843,7 +2844,8 @@ void FindStringIndicesDispatch(Isolate* isolate,
} else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern_content.IsAscii()) {
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
Vector<const uint8_t> pattern_vector =
pattern_content.ToOneByteVector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
@ -3325,10 +3327,10 @@ int Runtime::StringMatch(Isolate* isolate,
// dispatch on type of strings
if (seq_pat.IsAscii()) {
Vector<const char> pat_vector = seq_pat.ToAsciiVector();
Vector<const uint8_t> pat_vector = seq_pat.ToOneByteVector();
if (seq_sub.IsAscii()) {
return SearchString(isolate,
seq_sub.ToAsciiVector(),
seq_sub.ToOneByteVector(),
pat_vector,
start_index);
}
@ -3340,7 +3342,7 @@ int Runtime::StringMatch(Isolate* isolate,
Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
if (seq_sub.IsAscii()) {
return SearchString(isolate,
seq_sub.ToAsciiVector(),
seq_sub.ToOneByteVector(),
pat_vector,
start_index);
}
@ -3435,9 +3437,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) {
String::FlatContent pat_content = pat->GetFlatContent();
if (pat_content.IsAscii()) {
Vector<const char> pat_vector = pat_content.ToAsciiVector();
Vector<const uint8_t> pat_vector = pat_content.ToOneByteVector();
if (sub_content.IsAscii()) {
position = StringMatchBackwards(sub_content.ToAsciiVector(),
position = StringMatchBackwards(sub_content.ToOneByteVector(),
pat_vector,
start_index);
} else {
@ -3448,7 +3450,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) {
} else {
Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
if (sub_content.IsAscii()) {
position = StringMatchBackwards(sub_content.ToAsciiVector(),
position = StringMatchBackwards(sub_content.ToOneByteVector(),
pat_vector,
start_index);
} else {
@ -5002,7 +5004,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_Typeof) {
}
static bool AreDigits(const char*s, int from, int to) {
static bool AreDigits(const uint8_t*s, int from, int to) {
for (int i = from; i < to; i++) {
if (s[i] < '0' || s[i] > '9') return false;
}
@ -5011,7 +5013,7 @@ static bool AreDigits(const char*s, int from, int to) {
}
static int ParseDecimalInteger(const char*s, int from, int to) {
static int ParseDecimalInteger(const uint8_t*s, int from, int to) {
ASSERT(to - from < 10); // Overflow is not possible.
ASSERT(from < to);
int d = s[from] - '0';
@ -5035,7 +5037,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToNumber) {
if (subject->IsSeqOneByteString()) {
if (len == 0) return Smi::FromInt(0);
char const* data = SeqOneByteString::cast(subject)->GetChars();
uint8_t const* data = SeqOneByteString::cast(subject)->GetChars();
bool minus = (data[0] == '-');
int start_pos = (minus ? 1 : 0);
@ -5530,8 +5532,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) {
return QuoteJsonString<uc16, SeqTwoByteString, false>(isolate,
flat.ToUC16Vector());
} else {
return QuoteJsonString<char, SeqOneByteString, false>(isolate,
flat.ToAsciiVector());
return QuoteJsonString<uint8_t, SeqOneByteString, false>(
isolate,
flat.ToOneByteVector());
}
}
@ -5553,8 +5556,9 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) {
return QuoteJsonString<uc16, SeqTwoByteString, true>(isolate,
flat.ToUC16Vector());
} else {
return QuoteJsonString<char, SeqOneByteString, true>(isolate,
flat.ToAsciiVector());
return QuoteJsonString<uint8_t, SeqOneByteString, true>(
isolate,
flat.ToOneByteVector());
}
}
@ -5595,9 +5599,10 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate,
write_cursor,
content.ToUC16Vector());
} else {
write_cursor = WriteQuoteJsonString<Char, char>(isolate,
write_cursor =
WriteQuoteJsonString<Char, uint8_t>(isolate,
write_cursor,
content.ToAsciiVector());
content.ToOneByteVector());
}
}
*(write_cursor++) = ']';
@ -5950,7 +5955,9 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(
}
SeqOneByteString* result = SeqOneByteString::cast(o);
bool has_changed_character = ConvertTraits::AsciiConverter::Convert(
result->GetChars(), SeqOneByteString::cast(s)->GetChars(), length);
reinterpret_cast<char*>(result->GetChars()),
reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),
length);
return has_changed_character ? result : s;
}
#endif
@ -6112,7 +6119,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
// not in the cache and fills the remainder with smi zeros. Returns
// the length of the successfully copied prefix.
static int CopyCachedAsciiCharsToArray(Heap* heap,
const char* chars,
const uint8_t* chars,
FixedArray* elements,
int length) {
AssertNoAllocation no_gc;
@ -6163,7 +6170,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) {
elements = Handle<FixedArray>(FixedArray::cast(obj), isolate);
String::FlatContent content = s->GetFlatContent();
if (content.IsAscii()) {
Vector<const char> chars = content.ToAsciiVector();
Vector<const uint8_t> chars = content.ToOneByteVector();
// Note, this will initialize all elements (not only the prefix)
// to prevent GC from seeing partially initialized array.
position = CopyCachedAsciiCharsToArray(isolate->heap(),
@ -6746,11 +6753,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_SparseJoinWithSeparator) {
if (result_allocation->IsFailure()) return result_allocation;
SeqOneByteString* result_string =
SeqOneByteString::cast(result_allocation->ToObjectUnchecked());
JoinSparseArrayWithSeparator<char>(elements,
JoinSparseArrayWithSeparator<uint8_t>(elements,
elements_length,
array_length,
separator,
Vector<char>(result_string->GetChars(),
Vector<uint8_t>(
result_string->GetChars(),
string_length));
return result_string;
} else {
@ -6999,9 +7007,9 @@ static Object* FlatStringCompare(String* x, String* y) {
String::FlatContent x_content = x->GetFlatContent();
String::FlatContent y_content = y->GetFlatContent();
if (x_content.IsAscii()) {
Vector<const char> x_chars = x_content.ToAsciiVector();
Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
if (y_content.IsAscii()) {
Vector<const char> y_chars = y_content.ToAsciiVector();
Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else {
Vector<const uc16> y_chars = y_content.ToUC16Vector();
@ -7010,7 +7018,7 @@ static Object* FlatStringCompare(String* x, String* y) {
} else {
Vector<const uc16> x_chars = x_content.ToUC16Vector();
if (y_content.IsAscii()) {
Vector<const char> y_chars = y_content.ToAsciiVector();
Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else {
Vector<const uc16> y_chars = y_content.ToUC16Vector();
@ -8960,7 +8968,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) {
bool result;
String::FlatContent str_content = str->GetFlatContent();
if (str_content.IsAscii()) {
result = DateParser::Parse(str_content.ToAsciiVector(),
result = DateParser::Parse(str_content.ToOneByteVector(),
output_array,
isolate->unicode_cache());
} else {
@ -13418,8 +13426,8 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) {
CONVERT_ARG_CHECKED(JSArray, elms, 1);
String::FlatContent format_content = format->GetFlatContent();
RUNTIME_ASSERT(format_content.IsAscii());
Vector<const char> chars = format_content.ToAsciiVector();
LOGGER->LogRuntime(chars, elms);
Vector<const uint8_t> chars = format_content.ToOneByteVector();
LOGGER->LogRuntime(Vector<const char>::cast(chars), elms);
return isolate->heap()->undefined_value();
}

View File

@ -53,7 +53,11 @@ class StringSearchBase {
// a potentially less efficient searching, but is a safe approximation.
// For needles using only characters in the same Unicode 256-code point page,
// there is no search speed degradation.
#ifndef ENABLE_LATIN_1
static const int kAsciiAlphabetSize = 128;
#else
static const int kAsciiAlphabetSize = 256;
#endif
static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
// Bad-char shift table stored in the state. It's length is the alphabet size.
@ -61,7 +65,7 @@ class StringSearchBase {
// to compensate for the algorithmic overhead compared to simple brute force.
static const int kBMMinPatternLength = 7;
static inline bool IsOneByteString(Vector<const char> string) {
static inline bool IsOneByteString(Vector<const uint8_t> string) {
return true;
}
@ -150,13 +154,25 @@ class StringSearch : private StringSearchBase {
void PopulateBoyerMooreTable();
static inline bool exceedsOneByte(uint8_t c) {
#ifdef ENABLE_LATIN_1
return false;
#else
return c > String::kMaxOneByteCharCodeU;
#endif
}
static inline bool exceedsOneByte(uint16_t c) {
return c > String::kMaxOneByteCharCodeU;
}
static inline int CharOccurrence(int* bad_char_occurrence,
SubjectChar char_code) {
if (sizeof(SubjectChar) == 1) {
return bad_char_occurrence[static_cast<int>(char_code)];
}
if (sizeof(PatternChar) == 1) {
if (static_cast<unsigned int>(char_code) > String::kMaxOneByteCharCodeU) {
if (exceedsOneByte(char_code)) {
return -1;
}
return bad_char_occurrence[static_cast<unsigned int>(char_code)];
@ -223,8 +239,7 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
return static_cast<int>(pos - subject.start());
} else {
if (sizeof(PatternChar) > sizeof(SubjectChar)) {
if (static_cast<uc16>(pattern_first_char) >
String::kMaxOneByteCharCodeU) {
if (exceedsOneByte(pattern_first_char)) {
return -1;
}
}

View File

@ -85,8 +85,8 @@ double StringToDouble(UnicodeCache* unicode_cache,
StringShape shape(str);
// TODO(dcarney): Use a Visitor here.
if (shape.IsSequentialAscii()) {
const char* begin = SeqOneByteString::cast(str)->GetChars();
const char* end = begin + str->length();
const uint8_t* begin = SeqOneByteString::cast(str)->GetChars();
const uint8_t* end = begin + str->length();
return InternalStringToDouble(unicode_cache, begin, end, flags,
empty_string_val);
} else if (shape.IsSequentialTwoByte()) {
@ -112,8 +112,8 @@ double StringToInt(UnicodeCache* unicode_cache,
StringShape shape(str);
// TODO(dcarney): Use a Visitor here.
if (shape.IsSequentialAscii()) {
const char* begin = SeqOneByteString::cast(str)->GetChars();
const char* end = begin + str->length();
const uint8_t* begin = SeqOneByteString::cast(str)->GetChars();
const uint8_t* end = begin + str->length();
return InternalStringToInt(unicode_cache, begin, end, radix);
} else if (shape.IsSequentialTwoByte()) {
const uc16* begin = SeqTwoByteString::cast(str)->GetChars();

View File

@ -5487,16 +5487,32 @@ void StringCompareStub::GenerateCompareFlatAsciiStrings(MacroAssembler* masm,
// Compare lengths (precomputed).
__ bind(&compare_lengths);
__ SmiTest(length_difference);
#ifndef ENABLE_LATIN_1
__ j(not_zero, &result_not_equal, Label::kNear);
#else
Label length_not_equal;
__ j(not_zero, &length_not_equal, Label::kNear);
#endif
// Result is EQUAL.
__ Move(rax, Smi::FromInt(EQUAL));
__ ret(0);
Label result_greater;
#ifdef ENABLE_LATIN_1
Label result_less;
__ bind(&length_not_equal);
__ j(greater, &result_greater, Label::kNear);
__ jmp(&result_less, Label::kNear);
#endif
__ bind(&result_not_equal);
// Unequal comparison of left to right, either character or length.
#ifndef ENABLE_LATIN_1
__ j(greater, &result_greater, Label::kNear);
#else
__ j(above, &result_greater, Label::kNear);
__ bind(&result_less);
#endif
// Result is LESS.
__ Move(rax, Smi::FromInt(LESS));

View File

@ -393,8 +393,17 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
__ j(not_equal, on_no_match); // Definitely not equal.
__ subb(rax, Immediate('a'));
__ cmpb(rax, Immediate('z' - 'a'));
#ifndef ENABLE_LATIN_1
__ j(above, on_no_match); // Weren't letters anyway.
#else
__ j(below_equal, &loop_increment); // In range 'a'-'z'.
// Latin-1: Check for values in range [224,254] but not 247.
__ subb(rax, Immediate(224 - 'a'));
__ cmpb(rax, Immediate(254 - 224));
__ j(above, on_no_match); // Weren't Latin-1 letters.
__ cmpb(rax, Immediate(247 - 224)); // Check for 247.
__ j(equal, on_no_match);
#endif
__ bind(&loop_increment);
// Increment pointers into match and capture strings.
__ addq(r11, Immediate(1));

View File

@ -153,22 +153,25 @@ void generate(MacroAssembler* masm, uint32_t key) {
void check(i::Vector<const uint8_t> string) {
v8::HandleScope scope;
Isolate* isolate = Isolate::Current();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
v8::internal::byte buffer[2048];
MacroAssembler masm(Isolate::Current(), buffer, sizeof buffer);
MacroAssembler masm(isolate, buffer, sizeof buffer);
generate(&masm, string);
CodeDesc desc;
masm.GetCode(&desc);
Code* code = Code::cast(HEAP->CreateCode(
desc,
Handle<Object> undefined(isolate->heap()->undefined_value(), isolate);
Handle<Code> code = factory->NewCode(desc,
Code::ComputeFlags(Code::STUB),
Handle<Object>(HEAP->undefined_value()))->ToObjectChecked());
undefined);
CHECK(code->IsCode());
HASH_FUNCTION hash = FUNCTION_CAST<HASH_FUNCTION>(code->entry());
Handle<String> v8_string = FACTORY->NewStringFromOneByte(string);
Handle<String> v8_string = factory->NewStringFromOneByte(string);
v8_string->set_hash_field(String::kEmptyHashField);
#ifdef USE_SIMULATOR
uint32_t codegen_hash =
@ -187,7 +190,10 @@ void check(i::Vector<const char> s) {
void check(uint32_t key) {
v8::HandleScope scope;
Isolate* isolate = Isolate::Current();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
v8::internal::byte buffer[2048];
MacroAssembler masm(Isolate::Current(), buffer, sizeof buffer);
@ -195,10 +201,10 @@ void check(uint32_t key) {
CodeDesc desc;
masm.GetCode(&desc);
Code* code = Code::cast(HEAP->CreateCode(
desc,
Handle<Object> undefined(isolate->heap()->undefined_value(), isolate);
Handle<Code> code = factory->NewCode(desc,
Code::ComputeFlags(Code::STUB),
Handle<Object>(HEAP->undefined_value()))->ToObjectChecked());
undefined);
CHECK(code->IsCode());
HASH_FUNCTION hash = FUNCTION_CAST<HASH_FUNCTION>(code->entry());
@ -209,9 +215,7 @@ void check(uint32_t key) {
uint32_t codegen_hash = hash();
#endif
uint32_t runtime_hash = ComputeIntegerHash(
key,
Isolate::Current()->heap()->HashSeed());
uint32_t runtime_hash = ComputeIntegerHash(key, isolate->heap()->HashSeed());
CHECK(runtime_hash == codegen_hash);
}
@ -229,12 +233,12 @@ static uint32_t PseudoRandom(uint32_t i, uint32_t j) {
TEST(StringHash) {
if (env.IsEmpty()) env = v8::Context::New();
for (int a = 0; a < String::kMaxOneByteCharCode; a++) {
for (uint8_t a = 0; a < String::kMaxOneByteCharCode; a++) {
// Numbers are hashed differently.
if (a >= '0' && a <= '9') continue;
for (int b = 0; b < String::kMaxOneByteCharCode; b++) {
for (uint8_t b = 0; b < String::kMaxOneByteCharCode; b++) {
if (b >= '0' && b <= '9') continue;
check_twochars(static_cast<uint8_t>(a), static_cast<uint8_t>(b));
check_twochars(a, b);
}
}
check(i::Vector<const char>("*", 1));

View File

@ -165,23 +165,22 @@ function NoHang(re) {
"This is an ASCII string that could take forever".match(re);
}
NoHang(/(((.*)*)*x)å/); // Continuation after loop is filtered, so is loop.
NoHang(/(((.*)*)*å)foo/); // Body of loop filtered.
NoHang(/å(((.*)*)*x)/); // Everything after a filtered character is filtered.
NoHang(/(((.*)*)*x)å/); // Everything before a filtered character is filtered.
NoHang(/[æøå](((.*)*)*x)/); // Everything after a filtered class is filtered.
NoHang(/(((.*)*)*x)[æøå]/); // Everything before a filtered class is filtered.
NoHang(/[^\x00-\x7f](((.*)*)*x)/); // After negated class.
NoHang(/(((.*)*)*x)[^\x00-\x7f]/); // Before negated class.
NoHang(/(?!(((.*)*)*x)å)foo/); // Negative lookahead is filtered.
NoHang(/(?!(((.*)*)*x))å/); // Continuation branch of negative lookahead.
NoHang(/(?=(((.*)*)*x)å)foo/); // Positive lookahead is filtered.
NoHang(/(?=(((.*)*)*x))å/); // Continuation branch of positive lookahead.
NoHang(/(?=å)(((.*)*)*x)/); // Positive lookahead also prunes continuation.
NoHang(/(æ|ø|å)(((.*)*)*x)/); // All branches of alternation are filtered.
NoHang(/(a|b|(((.*)*)*x))å/); // 1 out of 3 branches pruned.
NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/); // 2 out of 3 branches pruned.
NoHang(/(((.*)*)*x)Ā/); // Continuation after loop is filtered, so is loop.
NoHang(/(((.*)*)*Ā)foo/); // Body of loop filtered.
NoHang(/Ā(((.*)*)*x)/); // Everything after a filtered character is filtered.
NoHang(/(((.*)*)*x)Ā/); // Everything before a filtered character is filtered.
NoHang(/[ćăĀ](((.*)*)*x)/); // Everything after a filtered class is filtered.
NoHang(/(((.*)*)*x)[ćăĀ]/); // Everything before a filtered class is filtered.
NoHang(/[^\x00-\xff](((.*)*)*x)/); // After negated class.
NoHang(/(((.*)*)*x)[^\x00-\xff]/); // Before negated class.
NoHang(/(?!(((.*)*)*x)Ā)foo/); // Negative lookahead is filtered.
NoHang(/(?!(((.*)*)*x))Ā/); // Continuation branch of negative lookahead.
NoHang(/(?=(((.*)*)*x)Ā)foo/); // Positive lookahead is filtered.
NoHang(/(?=(((.*)*)*x))Ā/); // Continuation branch of positive lookahead.
NoHang(/(?=Ā)(((.*)*)*x)/); // Positive lookahead also prunes continuation.
NoHang(/(æ|ø|Ā)(((.*)*)*x)/); // All branches of alternation are filtered.
NoHang(/(a|b|(((.*)*)*x))Ā/); // 1 out of 3 branches pruned.
NoHang(/(a|(((.*)*)*x)ă|(((.*)*)*x)Ā)/); // 2 out of 3 branches pruned.
var s = "Don't prune based on a repetition of length 0";
assertEquals(null, s.match(/å{1,1}prune/));

View File

@ -0,0 +1,59 @@
// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
assertEquals(String.fromCharCode(97, 220, 256), 'a' + '\u00DC' + '\u0100');
assertEquals(String.fromCharCode(97, 220, 256), 'a\u00DC\u0100');
assertEquals(0x80, JSON.stringify("\x80").charCodeAt(1));
assertEquals(['a', 'b', '\xdc'], ['b', '\xdc', 'a'].sort());
assertEquals(['\xfc\xdc', '\xfc'], new RegExp('(\xdc)\\1', 'i').exec('\xfc\xdc'));
// Same test but for all values in Latin-1 range.
var total_lo = 0;
for (var i = 0; i < 0xff; i++) {
var base = String.fromCharCode(i);
var escaped = base;
if (base == '(' || base == ')' || base == '*' || base == '+' ||
base == '?' || base == '[' || base == ']' || base == '\\' ||
base == '$' || base == '^' || base == '|') {
escaped = '\\' + base;
}
var lo = String.fromCharCode(i + 0x20);
base_result = new RegExp('(' + escaped + ')\\1', 'i').exec(base + base);
assertEquals( base_result, [base + base, base]);
lo_result = new RegExp('(' + escaped + ')\\1', 'i').exec(base + lo);
if (base.toLowerCase() == lo) {
assertEquals([base + lo, base], lo_result);
total_lo++;
} else {
assertEquals(null, lo_result);
}
}
// Should have hit the branch for the following char codes:
// [A-Z], [192-222] but not 215
assertEquals((90-65+1)+(222-192-1+1), total_lo);