From a609cf61c50a4226b7ce1a9ea6af33eae0e1e043 Mon Sep 17 00:00:00 2001 From: "ager@chromium.org" Date: Tue, 21 Dec 2010 13:24:23 +0000 Subject: [PATCH] Avoid decoding overhead when allocating ascii strings. The assumption is that most utf8 strings allocated are actually ascii and that if they are not we will encounter a non-ascii char pretty quickly. Review URL: http://codereview.chromium.org/6072004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6099 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/heap-inl.h | 15 +++++++++++++++ src/heap.cc | 12 +++--------- src/heap.h | 5 ++++- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/heap-inl.h b/src/heap-inl.h index ef839988da..62e810fcb1 100644 --- a/src/heap-inl.h +++ b/src/heap-inl.h @@ -40,6 +40,21 @@ int Heap::MaxObjectSizeInPagedSpace() { } +MaybeObject* Heap::AllocateStringFromUtf8(Vector str, + PretenureFlag pretenure) { + // Check for ASCII first since this is the common case. + for (int i = 0; i < str.length(); ++i) { + if (static_cast(str[i]) > String::kMaxAsciiCharCodeU) { + // Non-ASCII and we need to decode. + return AllocateStringFromUtf8Slow(str, pretenure); + } + } + // If the string is ASCII, we do not need to convert the characters + // since UTF8 is backwards compatible with ASCII. + return AllocateStringFromAscii(str, pretenure); +} + + MaybeObject* Heap::AllocateSymbol(Vector str, int chars, uint32_t hash_field) { diff --git a/src/heap.cc b/src/heap.cc index 1e9999164c..2f70ef0188 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -3307,8 +3307,8 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector string, } -MaybeObject* Heap::AllocateStringFromUtf8(Vector string, - PretenureFlag pretenure) { +MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector string, + PretenureFlag pretenure) { // V8 only supports characters in the Basic Multilingual Plane. const uc32 kMaxSupportedChar = 0xFFFF; // Count the number of characters in the UTF-8 string and check if @@ -3317,17 +3317,11 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector string, decoder(ScannerConstants::utf8_decoder()); decoder->Reset(string.start(), string.length()); int chars = 0; - bool is_ascii = true; while (decoder->has_more()) { - uc32 r = decoder->GetNext(); - if (r > String::kMaxAsciiCharCode) is_ascii = false; + decoder->GetNext(); chars++; } - // If the string is ascii, we do not need to convert the characters - // since UTF8 is backwards compatible with ascii. - if (is_ascii) return AllocateStringFromAscii(string, pretenure); - Object* result; { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure); if (!maybe_result->ToObject(&result)) return maybe_result; diff --git a/src/heap.h b/src/heap.h index c2d36e248f..c612f2f413 100644 --- a/src/heap.h +++ b/src/heap.h @@ -412,7 +412,10 @@ class Heap : public AllStatic { MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii( Vector str, PretenureFlag pretenure = NOT_TENURED); - MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8( + MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8( + Vector str, + PretenureFlag pretenure = NOT_TENURED); + MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow( Vector str, PretenureFlag pretenure = NOT_TENURED); MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte(