[Intl] Cutting 43K by removing Unibrow when ICU available
Making 43K of room for landing ICU64. Size Change (on x64.release) D8 before 23,683,192 D8 after 23,639,296 Reduce 43,896 bytes Bugs: v8:8348 Change-Id: I057f7d59e955a2e5e017873e5b3b5daf5b142ae2 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1478710 Commit-Queue: Frank Tang <ftang@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Cr-Commit-Position: refs/heads/master@{#60616}
This commit is contained in:
parent
46e944dbad
commit
bb24140cb3
@ -970,6 +970,7 @@ class Isolate final : private HiddenFactory {
|
||||
|
||||
ThreadManager* thread_manager() { return thread_manager_; }
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
|
||||
return &jsregexp_uncanonicalize_;
|
||||
}
|
||||
@ -978,14 +979,15 @@ class Isolate final : private HiddenFactory {
|
||||
return &jsregexp_canonrange_;
|
||||
}
|
||||
|
||||
RuntimeState* runtime_state() { return &runtime_state_; }
|
||||
|
||||
Builtins* builtins() { return &builtins_; }
|
||||
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
|
||||
regexp_macro_assembler_canonicalize() {
|
||||
return ®exp_macro_assembler_canonicalize_;
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
RuntimeState* runtime_state() { return &runtime_state_; }
|
||||
|
||||
Builtins* builtins() { return &builtins_; }
|
||||
|
||||
RegExpStack* regexp_stack() { return regexp_stack_; }
|
||||
|
||||
@ -996,11 +998,6 @@ class Isolate final : private HiddenFactory {
|
||||
|
||||
std::vector<int>* regexp_indices() { return ®exp_indices_; }
|
||||
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>*
|
||||
interp_canonicalize_mapping() {
|
||||
return ®exp_macro_assembler_canonicalize_;
|
||||
}
|
||||
|
||||
Debug* debug() { return debug_; }
|
||||
|
||||
bool* is_profiling_address() { return &is_profiling_; }
|
||||
@ -1642,10 +1639,12 @@ class Isolate final : private HiddenFactory {
|
||||
RuntimeState runtime_state_;
|
||||
Builtins builtins_;
|
||||
SetupIsolateDelegate* setup_delegate_ = nullptr;
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_;
|
||||
unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_;
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>
|
||||
regexp_macro_assembler_canonicalize_;
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
RegExpStack* regexp_stack_ = nullptr;
|
||||
std::vector<int> regexp_indices_;
|
||||
DateCache* date_cache_ = nullptr;
|
||||
|
@ -21,8 +21,6 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
using Canonicalize = unibrow::Mapping<unibrow::Ecma262Canonicalize>;
|
||||
|
||||
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||
int len, Vector<const uc16> subject,
|
||||
bool unicode) {
|
||||
|
@ -1543,7 +1543,26 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
|
||||
// that cannot occur in the source string because it is Latin1.
|
||||
static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
|
||||
bool one_byte_subject,
|
||||
unibrow::uchar* letters) {
|
||||
unibrow::uchar* letters,
|
||||
int letter_length) {
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
icu::UnicodeSet set;
|
||||
set.add(character);
|
||||
set = set.closeOver(USET_CASE_INSENSITIVE);
|
||||
int32_t range_count = set.getRangeCount();
|
||||
int items = 0;
|
||||
for (int32_t i = 0; i < range_count; i++) {
|
||||
UChar32 start = set.getRangeStart(i);
|
||||
UChar32 end = set.getRangeEnd(i);
|
||||
CHECK(end - start + items <= letter_length);
|
||||
while (start <= end) {
|
||||
if (one_byte_subject && start > String::kMaxOneByteCharCode) break;
|
||||
letters[items++] = (unibrow::uchar)(start);
|
||||
start++;
|
||||
}
|
||||
}
|
||||
return items;
|
||||
#else
|
||||
int length =
|
||||
isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
|
||||
// Unibrow returns 0 or 1 for characters where case independence is
|
||||
@ -1564,9 +1583,9 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
|
||||
}
|
||||
|
||||
return length;
|
||||
#endif // V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
|
||||
static inline bool EmitSimpleCharacter(Isolate* isolate,
|
||||
RegExpCompiler* compiler,
|
||||
uc16 c,
|
||||
@ -1599,8 +1618,8 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
|
||||
bool preloaded) {
|
||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||
bool one_byte = compiler->one_byte();
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
|
||||
if (length < 1) {
|
||||
// This can't match. Must be an one-byte subject and a non-one-byte
|
||||
// character. We do not need to do anything since the one-byte pass
|
||||
@ -1675,8 +1694,8 @@ static inline bool EmitAtomLetter(Isolate* isolate,
|
||||
bool preloaded) {
|
||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||
bool one_byte = compiler->one_byte();
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
|
||||
if (length <= 1) return false;
|
||||
// We may not need to check against the end of the input string
|
||||
// if this character lies before a character that matched.
|
||||
@ -1684,7 +1703,6 @@ static inline bool EmitAtomLetter(Isolate* isolate,
|
||||
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
|
||||
}
|
||||
Label ok;
|
||||
DCHECK_EQ(4, unibrow::Ecma262UnCanonicalize::kMaxWidth);
|
||||
switch (length) {
|
||||
case 2: {
|
||||
if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
|
||||
@ -2480,9 +2498,9 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
details->positions(characters_filled_in);
|
||||
uc16 c = quarks[i];
|
||||
if (elm.atom()->ignore_case()) {
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = GetCaseIndependentLetters(isolate, c,
|
||||
compiler->one_byte(), chars);
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(
|
||||
isolate, c, compiler->one_byte(), chars, 4);
|
||||
if (length == 0) {
|
||||
// This can happen because all case variants are non-Latin1, but we
|
||||
// know the input is Latin1.
|
||||
@ -5110,6 +5128,17 @@ int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
|
||||
// Case Insensitve comparesion
|
||||
int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
|
||||
RegExpAtom* atom1 = (*a)->AsAtom();
|
||||
RegExpAtom* atom2 = (*b)->AsAtom();
|
||||
icu::UnicodeString character1(atom1->data().at(0));
|
||||
return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static unibrow::uchar Canonical(
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
||||
@ -5122,7 +5151,6 @@ static unibrow::uchar Canonical(
|
||||
return canonical;
|
||||
}
|
||||
|
||||
|
||||
int CompareFirstCharCaseIndependent(
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
||||
RegExpTree* const* a, RegExpTree* const* b) {
|
||||
@ -5137,7 +5165,7 @@ int CompareFirstCharCaseIndependent(
|
||||
}
|
||||
return static_cast<int>(character1) - static_cast<int>(character2);
|
||||
}
|
||||
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
// We can stable sort runs of atoms, since the order does not matter if they
|
||||
// start with different characters.
|
||||
@ -5173,6 +5201,10 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
DCHECK_LE(i, alternatives->length());
|
||||
DCHECK_LE(first_atom, i);
|
||||
if (IgnoreCase(flags)) {
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
|
||||
i - first_atom);
|
||||
#else
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||
compiler->isolate()->regexp_macro_assembler_canonicalize();
|
||||
auto compare_closure =
|
||||
@ -5180,6 +5212,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
return CompareFirstCharCaseIndependent(canonicalize, a, b);
|
||||
};
|
||||
alternatives->StableSort(compare_closure, first_atom, i - first_atom);
|
||||
#endif // V8_INTL_SUPPORT
|
||||
} else {
|
||||
alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
|
||||
}
|
||||
@ -5206,7 +5239,11 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
}
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
JSRegExp::Flags flags = atom->flags();
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
icu::UnicodeString common_prefix(atom->data().at(0));
|
||||
#else
|
||||
unibrow::uchar common_prefix = atom->data().at(0);
|
||||
#endif // V8_INTL_SUPPORT
|
||||
int first_with_prefix = i;
|
||||
int prefix_length = atom->length();
|
||||
i++;
|
||||
@ -5215,6 +5252,14 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
if (!alternative->IsAtom()) break;
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
if (atom->flags() != flags) break;
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
icu::UnicodeString new_prefix(atom->data().at(0));
|
||||
if (new_prefix != common_prefix) {
|
||||
if (!IgnoreCase(flags)) break;
|
||||
if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
unibrow::uchar new_prefix = atom->data().at(0);
|
||||
if (new_prefix != common_prefix) {
|
||||
if (!IgnoreCase(flags)) break;
|
||||
@ -5224,6 +5269,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
common_prefix = Canonical(canonicalize, common_prefix);
|
||||
if (new_prefix != common_prefix) break;
|
||||
}
|
||||
#endif // V8_INTL_SUPPORT
|
||||
prefix_length = Min(prefix_length, atom->length());
|
||||
i++;
|
||||
}
|
||||
@ -5889,6 +5935,53 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
||||
bool is_one_byte) {
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
int range_count = ranges->length();
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
icu::UnicodeSet already_added;
|
||||
icu::UnicodeSet others;
|
||||
for (int i = 0; i < range_count; i++) {
|
||||
CharacterRange range = ranges->at(i);
|
||||
uc32 bottom = range.from();
|
||||
if (bottom > String::kMaxUtf16CodeUnit) continue;
|
||||
uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
|
||||
// Nothing to be done for surrogates.
|
||||
if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
|
||||
if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
|
||||
if (bottom > String::kMaxOneByteCharCode) continue;
|
||||
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
|
||||
}
|
||||
already_added.add(bottom, top);
|
||||
while (bottom <= top) {
|
||||
icu::UnicodeString upper(bottom);
|
||||
upper.toUpper();
|
||||
icu::UnicodeSet expanded(bottom, bottom);
|
||||
expanded.closeOver(USET_CASE_INSENSITIVE);
|
||||
for (int32_t i = 0; i < expanded.getRangeCount(); i++) {
|
||||
UChar32 start = expanded.getRangeStart(i);
|
||||
UChar32 end = expanded.getRangeEnd(i);
|
||||
while (start <= end) {
|
||||
icu::UnicodeString upper2(start);
|
||||
upper2.toUpper();
|
||||
// Only add if the upper case are the same.
|
||||
if (upper[0] == upper2[0]) {
|
||||
others.add(start);
|
||||
}
|
||||
start++;
|
||||
}
|
||||
}
|
||||
bottom++;
|
||||
}
|
||||
}
|
||||
others.removeAll(already_added);
|
||||
for (int32_t i = 0; i < others.getRangeCount(); i++) {
|
||||
UChar32 start = others.getRangeStart(i);
|
||||
UChar32 end = others.getRangeEnd(i);
|
||||
if (start == end) {
|
||||
ranges->Add(CharacterRange::Singleton(start), zone);
|
||||
} else {
|
||||
ranges->Add(CharacterRange::Range(start, end), zone);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < range_count; i++) {
|
||||
CharacterRange range = ranges->at(i);
|
||||
uc32 bottom = range.from();
|
||||
@ -5954,9 +6047,9 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
|
||||
bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
|
||||
DCHECK_NOT_NULL(ranges);
|
||||
int n = ranges->length();
|
||||
@ -6434,10 +6527,10 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
|
||||
}
|
||||
uc16 character = atom->data()[j];
|
||||
if (IgnoreCase(atom->flags())) {
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(
|
||||
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
|
||||
chars);
|
||||
chars, 4);
|
||||
for (int j = 0; j < length; j++) {
|
||||
bm->Set(offset, chars[j]);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/unistr.h"
|
||||
#endif // V8_INTL_SUPPORT
|
||||
|
||||
namespace v8 {
|
||||
@ -33,37 +34,17 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
|
||||
// A GC might move the calling generated code and invalidate the
|
||||
// return address on the stack.
|
||||
DCHECK_EQ(0, byte_length % 2);
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
int32_t length = (int32_t)(byte_length >> 1);
|
||||
icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1),
|
||||
length);
|
||||
return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2),
|
||||
length, U_FOLD_CASE_DEFAULT) == 0;
|
||||
#else
|
||||
uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
|
||||
uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
|
||||
size_t length = byte_length >> 1;
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
if (isolate == nullptr) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
uc32 c1 = substring1[i];
|
||||
uc32 c2 = substring2[i];
|
||||
if (unibrow::Utf16::IsLeadSurrogate(c1)) {
|
||||
// Non-BMP characters do not have case-equivalents in the BMP.
|
||||
// Both have to be non-BMP for them to be able to match.
|
||||
if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
|
||||
if (i + 1 < length) {
|
||||
uc16 c1t = substring1[i + 1];
|
||||
uc16 c2t = substring2[i + 1];
|
||||
if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
|
||||
unibrow::Utf16::IsTrailSurrogate(c2t)) {
|
||||
c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
|
||||
c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
|
||||
c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
|
||||
if (c1 != c2) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif // V8_INTL_SUPPORT
|
||||
DCHECK_NOT_NULL(isolate);
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||
isolate->regexp_macro_assembler_canonicalize();
|
||||
@ -83,6 +64,7 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
#endif // V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
namespace unibrow {
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
template <class T, int s> bool Predicate<T, s>::get(uchar code_point) {
|
||||
CacheEntry entry = entries_[code_point & kMask];
|
||||
if (entry.code_point() == code_point) return entry.value();
|
||||
@ -55,6 +56,7 @@ template <class T, int s> int Mapping<T, s>::CalculateValue(uchar c, uchar n,
|
||||
return length;
|
||||
}
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
// Decodes UTF-8 bytes incrementally, allowing the decoding of bytes as they
|
||||
// stream in. This **must** be followed by a call to ValueOfIncrementalFinish
|
||||
|
164
src/unicode.cc
164
src/unicode.cc
@ -15,8 +15,11 @@
|
||||
|
||||
namespace unibrow {
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
static const int kStartBit = (1 << 30);
|
||||
static const int kChunkBits = (1 << 13);
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
static const uchar kSentinel = static_cast<uchar>(-1);
|
||||
|
||||
/**
|
||||
@ -28,7 +31,7 @@ typedef signed short int16_t; // NOLINT
|
||||
typedef unsigned short uint16_t; // NOLINT
|
||||
typedef int int32_t; // NOLINT
|
||||
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
// All access to the character table should go through this function.
|
||||
template <int D>
|
||||
static inline uchar TableGet(const int32_t* table, int index) {
|
||||
@ -44,7 +47,6 @@ static inline bool IsStart(int32_t entry) {
|
||||
return (entry & kStartBit) != 0;
|
||||
}
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
/**
|
||||
* Look up a character in the Unicode table using a mix of binary and
|
||||
* interpolation search. For a uniformly distributed array
|
||||
@ -92,6 +94,7 @@ struct MultiCharacterSpecialCase {
|
||||
uchar chars[kW];
|
||||
};
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
// Look up the mapping for the given character in the specified table,
|
||||
// which is of the specified length and uses the specified special case
|
||||
// mapping for multi-char mappings. The next parameter is the character
|
||||
@ -192,6 +195,7 @@ static int LookupMapping(const int32_t* table,
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
// This method decodes an UTF-8 value according to RFC 3629 and
|
||||
// https://encoding.spec.whatwg.org/#utf-8-decoder .
|
||||
@ -1596,7 +1600,6 @@ int ToUppercase::Convert(uchar c,
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings0[1] = { // NOLINT
|
||||
{{kSentinel}} }; // NOLINT
|
||||
@ -3072,98 +3075,75 @@ int CanonicalizationRange::Convert(uchar c,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const uchar UnicodeData::kMaxCodePoint = 0xFFFD;
|
||||
|
||||
int UnicodeData::GetByteCount() {
|
||||
#ifndef V8_INTL_SUPPORT // NOLINT
|
||||
return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable2Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable3Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable4Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable2Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable3Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable4Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable6Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
|
||||
+
|
||||
kToLowercaseMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+
|
||||
kToLowercaseMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kToLowercaseMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kToLowercaseMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kToUppercaseMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
||||
+
|
||||
kToUppercaseMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kToUppercaseMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kToUppercaseMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
||||
#else
|
||||
return
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
+
|
||||
kEcma262CanonicalizeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kEcma262CanonicalizeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kEcma262CanonicalizeMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kEcma262CanonicalizeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kEcma262UnCanonicalizeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<4>) // NOLINT
|
||||
+
|
||||
kEcma262UnCanonicalizeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+
|
||||
kEcma262UnCanonicalizeMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+
|
||||
kEcma262UnCanonicalizeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+
|
||||
kCanonicalizationRangeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kCanonicalizationRangeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+
|
||||
kCanonicalizationRangeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>); // NOLINT
|
||||
return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kUppercaseTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable2Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable3Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable4Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
|
||||
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable2Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable3Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable4Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable6Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_StartTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
|
||||
+ kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
|
||||
+ kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
|
||||
+ kToLowercaseMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+ kToLowercaseMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kToLowercaseMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kToLowercaseMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kToUppercaseMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
||||
+ kToUppercaseMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kToUppercaseMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kToUppercaseMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
||||
+ kEcma262CanonicalizeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kEcma262CanonicalizeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kEcma262CanonicalizeMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kEcma262CanonicalizeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kEcma262UnCanonicalizeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<4>) // NOLINT
|
||||
+ kEcma262UnCanonicalizeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+ kEcma262UnCanonicalizeMultiStrings5Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+ kEcma262UnCanonicalizeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
||||
+ kCanonicalizationRangeMultiStrings0Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kCanonicalizationRangeMultiStrings1Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
||||
+ kCanonicalizationRangeMultiStrings7Size *
|
||||
sizeof(MultiCharacterSpecialCase<1>); // NOLINT
|
||||
}
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
} // namespace unibrow
|
||||
|
@ -25,6 +25,7 @@ typedef unsigned char byte;
|
||||
*/
|
||||
const int kMaxMappingSize = 4;
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
template <class T, int size = 256>
|
||||
class Predicate {
|
||||
public:
|
||||
@ -87,7 +88,6 @@ class Mapping {
|
||||
CacheEntry entries_[kSize];
|
||||
};
|
||||
|
||||
|
||||
class UnicodeData {
|
||||
private:
|
||||
friend class Test;
|
||||
@ -95,6 +95,7 @@ class UnicodeData {
|
||||
static const uchar kMaxCodePoint;
|
||||
};
|
||||
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
class Utf16 {
|
||||
public:
|
||||
@ -227,7 +228,6 @@ struct ToUppercase {
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
#endif
|
||||
struct Ecma262Canonicalize {
|
||||
static const int kMaxWidth = 1;
|
||||
static int Convert(uchar c,
|
||||
@ -249,6 +249,7 @@ struct CanonicalizationRange {
|
||||
uchar* result,
|
||||
bool* allow_caching_ptr);
|
||||
};
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
|
||||
} // namespace unibrow
|
||||
|
||||
|
@ -1488,7 +1488,7 @@ TEST(AddInverseToTable) {
|
||||
CHECK(table.Get(0xFFFF)->Get(0));
|
||||
}
|
||||
|
||||
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
static uc32 canonicalize(uc32 c) {
|
||||
unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
|
||||
int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
|
||||
@ -1500,7 +1500,6 @@ static uc32 canonicalize(uc32 c) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(LatinCanonicalize) {
|
||||
unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
|
||||
for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
|
||||
@ -1514,7 +1513,6 @@ TEST(LatinCanonicalize) {
|
||||
}
|
||||
for (uc32 c = 128; c < (1 << 21); c++)
|
||||
CHECK_GE(canonicalize(c), 128);
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
unibrow::Mapping<unibrow::ToUppercase> to_upper;
|
||||
// Canonicalization is only defined for the Basic Multilingual Plane.
|
||||
for (uc32 c = 0; c < (1 << 16); c++) {
|
||||
@ -1529,10 +1527,8 @@ TEST(LatinCanonicalize) {
|
||||
u = c;
|
||||
CHECK_EQ(u, canonicalize(c));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static uc32 CanonRangeEnd(uc32 c) {
|
||||
unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
|
||||
int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
|
||||
@ -1588,6 +1584,7 @@ TEST(UncanonicalizeEquivalence) {
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
|
||||
Vector<CharacterRange> expected) {
|
||||
@ -1621,21 +1618,26 @@ TEST(CharacterRangeCaseIndependence) {
|
||||
CharacterRange::Singleton('A'));
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
|
||||
CharacterRange::Singleton('Z'));
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
|
||||
CharacterRange::Range('A', 'Z'));
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
|
||||
CharacterRange::Range('C', 'F'));
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
|
||||
CharacterRange::Range('A', 'B'));
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
|
||||
CharacterRange::Range('Y', 'Z'));
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
TestSimpleRangeCaseIndependence(isolate,
|
||||
CharacterRange::Range('a' - 1, 'z' + 1),
|
||||
CharacterRange::Range('A', 'Z'));
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
|
||||
CharacterRange::Range('a', 'z'));
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
|
||||
CharacterRange::Range('c', 'f'));
|
||||
#ifndef V8_INTL_SUPPORT
|
||||
TestSimpleRangeCaseIndependence(isolate,
|
||||
CharacterRange::Range('A' - 1, 'Z' + 1),
|
||||
CharacterRange::Range('a', 'z'));
|
||||
@ -1644,6 +1646,7 @@ TEST(CharacterRangeCaseIndependence) {
|
||||
// whole block at a time.
|
||||
TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
|
||||
CharacterRange::Range('a', 'z'));
|
||||
#endif // !V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
|
||||
|
19
test/intl/regress-8348.js
Normal file
19
test/intl/regress-8348.js
Normal file
@ -0,0 +1,19 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
function testRegExpI(text, msg) {
|
||||
assertTrue(new RegExp(text, 'i').test(text.toUpperCase()), msg + ': ' + text);
|
||||
}
|
||||
|
||||
testRegExpI('abc', 'ASCII');
|
||||
testRegExpI('ABC', 'ASCII');
|
||||
testRegExpI('rst', 'ASCII');
|
||||
testRegExpI('RST', 'ASCII');
|
||||
|
||||
testRegExpI('αβψδεφ', 'Greek');
|
||||
|
||||
testRegExpI('\u1c80\u1c81', 'Historic Cyrillic added in Unicode 9');
|
||||
testRegExpI('\u026A', 'Dotless I, uppercase form added in Unicode 9');
|
||||
|
||||
testRegExpI('ოქტ', 'Georgian Mtavruli added in Unicode 11');
|
Loading…
Reference in New Issue
Block a user