Keep track of the first non-ascii word/char to avoid redoing the work.
Review URL: https://chromiumcodereview.appspot.com/11194053 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12762 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
852de2e1ab
commit
d04a1fff1f
@ -85,13 +85,16 @@ void PromotionQueue::ActivateGuardIfOnTheSamePage() {
|
||||
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
|
||||
PretenureFlag pretenure) {
|
||||
// Check for ASCII first since this is the common case.
|
||||
if (String::IsAscii(str.start(), str.length())) {
|
||||
const char* start = str.start();
|
||||
int length = str.length();
|
||||
int non_ascii_start = String::NonAsciiStart(start, length);
|
||||
if (non_ascii_start >= length) {
|
||||
// If the string is ASCII, we do not need to convert the characters
|
||||
// since UTF8 is backwards compatible with ASCII.
|
||||
return AllocateStringFromAscii(str, pretenure);
|
||||
}
|
||||
// Non-ASCII and we need to decode.
|
||||
return AllocateStringFromUtf8Slow(str, pretenure);
|
||||
return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
|
||||
}
|
||||
|
||||
|
||||
|
@ -4428,13 +4428,14 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
|
||||
|
||||
|
||||
MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
|
||||
int non_ascii_start,
|
||||
PretenureFlag pretenure) {
|
||||
// Count the number of characters in the UTF-8 string and check if
|
||||
// it is an ASCII string.
|
||||
// Continue counting the number of characters in the UTF-8 string, starting
|
||||
// from the first non-ascii character or word.
|
||||
int chars = non_ascii_start;
|
||||
Access<UnicodeCache::Utf8Decoder>
|
||||
decoder(isolate_->unicode_cache()->utf8_decoder());
|
||||
decoder->Reset(string.start(), string.length());
|
||||
int chars = 0;
|
||||
decoder->Reset(string.start() + non_ascii_start, string.length() - chars);
|
||||
while (decoder->has_more()) {
|
||||
uint32_t r = decoder->GetNext();
|
||||
if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
|
@ -705,6 +705,7 @@ class Heap {
|
||||
PretenureFlag pretenure = NOT_TENURED);
|
||||
MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
|
||||
Vector<const char> str,
|
||||
int non_ascii_start,
|
||||
PretenureFlag pretenure = NOT_TENURED);
|
||||
MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
|
||||
Vector<const uc16> str,
|
||||
|
@ -7413,32 +7413,47 @@ class String: public HeapObject {
|
||||
int from,
|
||||
int to);
|
||||
|
||||
static inline bool IsAscii(const char* chars, int length) {
|
||||
// The return value may point to the first aligned word containing the
|
||||
// first non-ascii character, rather than directly to the non-ascii character.
|
||||
// If the return value is >= the passed length, the entire string was ASCII.
|
||||
static inline int NonAsciiStart(const char* chars, int length) {
|
||||
const char* start = chars;
|
||||
const char* limit = chars + length;
|
||||
#ifdef V8_HOST_CAN_READ_UNALIGNED
|
||||
ASSERT(kMaxAsciiCharCode == 0x7F);
|
||||
const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
|
||||
while (chars + sizeof(uintptr_t) <= limit) {
|
||||
if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
|
||||
return false;
|
||||
return chars - start;
|
||||
}
|
||||
chars += sizeof(uintptr_t);
|
||||
}
|
||||
#endif
|
||||
while (chars < limit) {
|
||||
if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
|
||||
if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) {
|
||||
return chars - start;
|
||||
}
|
||||
++chars;
|
||||
}
|
||||
return true;
|
||||
return chars - start;
|
||||
}
|
||||
|
||||
static inline bool IsAscii(const char* chars, int length) {
|
||||
return NonAsciiStart(chars, length) >= length;
|
||||
}
|
||||
|
||||
static inline int NonAsciiStart(const uc16* chars, int length) {
|
||||
const uc16* limit = chars + length;
|
||||
const uc16* start = chars;
|
||||
while (chars < limit) {
|
||||
if (*chars > kMaxAsciiCharCodeU) return chars - start;
|
||||
++chars;
|
||||
}
|
||||
return chars - start;
|
||||
}
|
||||
|
||||
static inline bool IsAscii(const uc16* chars, int length) {
|
||||
const uc16* limit = chars + length;
|
||||
while (chars < limit) {
|
||||
if (*chars > kMaxAsciiCharCodeU) return false;
|
||||
++chars;
|
||||
}
|
||||
return true;
|
||||
return NonAsciiStart(chars, length) >= length;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
Loading…
Reference in New Issue
Block a user