Keep track of the first non-ascii word/char to avoid redoing the work.

Review URL: https://chromiumcodereview.appspot.com/11194053

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12762 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
verwaest@chromium.org 2012-10-18 15:08:11 +00:00
parent 852de2e1ab
commit d04a1fff1f
4 changed files with 36 additions and 16 deletions

View File

@ -85,13 +85,16 @@ void PromotionQueue::ActivateGuardIfOnTheSamePage() {
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
PretenureFlag pretenure) {
// Check for ASCII first since this is the common case.
if (String::IsAscii(str.start(), str.length())) {
const char* start = str.start();
int length = str.length();
int non_ascii_start = String::NonAsciiStart(start, length);
if (non_ascii_start >= length) {
// If the string is ASCII, we do not need to convert the characters
// since UTF8 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
}
// Non-ASCII and we need to decode.
return AllocateStringFromUtf8Slow(str, pretenure);
return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
}

View File

@ -4428,13 +4428,14 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
int non_ascii_start,
PretenureFlag pretenure) {
// Count the number of characters in the UTF-8 string and check if
// it is an ASCII string.
// Continue counting the number of characters in the UTF-8 string, starting
// from the first non-ascii character or word.
int chars = non_ascii_start;
Access<UnicodeCache::Utf8Decoder>
decoder(isolate_->unicode_cache()->utf8_decoder());
decoder->Reset(string.start(), string.length());
int chars = 0;
decoder->Reset(string.start() + non_ascii_start, string.length() - chars);
while (decoder->has_more()) {
uint32_t r = decoder->GetNext();
if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {

View File

@ -705,6 +705,7 @@ class Heap {
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
Vector<const char> str,
int non_ascii_start,
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
Vector<const uc16> str,

View File

@ -7413,32 +7413,47 @@ class String: public HeapObject {
int from,
int to);
static inline bool IsAscii(const char* chars, int length) {
// The return value may point to the first aligned word containing the
// first non-ascii character, rather than directly to the non-ascii character.
// If the return value is >= the passed length, the entire string was ASCII.
static inline int NonAsciiStart(const char* chars, int length) {
const char* start = chars;
const char* limit = chars + length;
#ifdef V8_HOST_CAN_READ_UNALIGNED
ASSERT(kMaxAsciiCharCode == 0x7F);
const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
while (chars + sizeof(uintptr_t) <= limit) {
if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
return false;
return chars - start;
}
chars += sizeof(uintptr_t);
}
#endif
while (chars < limit) {
if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) {
return chars - start;
}
++chars;
}
return true;
return chars - start;
}
static inline bool IsAscii(const char* chars, int length) {
return NonAsciiStart(chars, length) >= length;
}
static inline int NonAsciiStart(const uc16* chars, int length) {
const uc16* limit = chars + length;
const uc16* start = chars;
while (chars < limit) {
if (*chars > kMaxAsciiCharCodeU) return chars - start;
++chars;
}
return chars - start;
}
static inline bool IsAscii(const uc16* chars, int length) {
const uc16* limit = chars + length;
while (chars < limit) {
if (*chars > kMaxAsciiCharCodeU) return false;
++chars;
}
return true;
return NonAsciiStart(chars, length) >= length;
}
protected: