4aeb94a42d
Use ICU to check ID_Start, ID_Continue and WhiteSpace even for BMP when V8_INTL_SUPPORT is on (which is default). Change LineTerminator::Is() to check 4 code points from ES#sec-line-terminators instead of using tables and Lookup function. Remove Lowercase::Is(). It's not used anywhere. Update webkit/{ToNumber,parseFloat}.js to have the correct expectation for U+180E and the corresponding expected files. This is a follow-up to an earlier change ( https://codereview.chromium.org/2720953003 ). CQ_INCLUDE_TRYBOTS=master.tryserver.v8:v8_win_dbg,v8_mac_dbg;master.tryserver.chromium.android:android_arm64_dbg_recipe CQ_INCLUDE_TRYBOTS=master.tryserver.v8:v8_linux_noi18n_rel_ng BUG=v8:5370,v8:5155 TEST=unittests --gtest_filter=CharP* TEST=webkit: ToNumber, parseFloat TEST=test262: built-ins/Number/S9.3*, built-ins/parse{Int,Float}/S15* TEST=test262: language/white-space/mong* TEST=test262: built-ins/String/prototype/trim/u180e TEST=mjsunit: whitespaces Review-Url: https://codereview.chromium.org/2331303002 Cr-Commit-Position: refs/heads/master@{#45957}
193 lines
6.8 KiB
C++
193 lines
6.8 KiB
C++
// Copyright 2014 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#include "src/char-predicates.h"
|
|
#include "src/unicode.h"
|
|
#include "testing/gtest/include/gtest/gtest.h"
|
|
|
|
namespace v8 {
|
|
namespace internal {
|
|
|
|
TEST(CharPredicatesTest, WhiteSpace) {
|
|
EXPECT_TRUE(WhiteSpace::Is(0x0009));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x000B));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x000C));
|
|
EXPECT_TRUE(WhiteSpace::Is(' '));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x00A0));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x1680));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x2000));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x2007));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x202F));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x205F));
|
|
EXPECT_TRUE(WhiteSpace::Is(0x3000));
|
|
EXPECT_TRUE(WhiteSpace::Is(0xFEFF));
|
|
EXPECT_FALSE(WhiteSpace::Is(0x180E));
|
|
}
|
|
|
|
|
|
TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) {
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' '));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x1680));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2000));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2007));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x202F));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x205F));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF));
|
|
// Line terminators
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028));
|
|
EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029));
|
|
EXPECT_FALSE(WhiteSpaceOrLineTerminator::Is(0x180E));
|
|
}
|
|
|
|
|
|
TEST(CharPredicatesTest, IdentifierStart) {
|
|
EXPECT_TRUE(IdentifierStart::Is('$'));
|
|
EXPECT_TRUE(IdentifierStart::Is('_'));
|
|
EXPECT_TRUE(IdentifierStart::Is('\\'));
|
|
|
|
// http://www.unicode.org/reports/tr31/
|
|
// curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
|
|
// grep 'Other_ID_Start'
|
|
// Other_ID_Start
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1885));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1886));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x2118));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x212E));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x309B));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x309C));
|
|
|
|
// Issue 2892:
|
|
// \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
|
|
EXPECT_FALSE(IdentifierStart::Is(0x2E2F));
|
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
// New in Unicode 8.0 (6,847 code points)
|
|
// [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
|
|
EXPECT_TRUE(IdentifierStart::Is(0x08B3));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x0AF9));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x13F8));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x9FCD));
|
|
EXPECT_TRUE(IdentifierStart::Is(0xAB60));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x10CC0));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x108E0));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x2B820));
|
|
|
|
// New in Unicode 9.0 (7,177 code points)
|
|
// [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
|
|
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1C80));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x104DB));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1E922));
|
|
#endif
|
|
}
|
|
|
|
|
|
TEST(CharPredicatesTest, IdentifierPart) {
|
|
EXPECT_TRUE(IdentifierPart::Is('$'));
|
|
EXPECT_TRUE(IdentifierPart::Is('_'));
|
|
EXPECT_TRUE(IdentifierPart::Is('\\'));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x200C));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x200D));
|
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
// New in Unicode 8.0 (6,847 code points)
|
|
// [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
|
|
EXPECT_TRUE(IdentifierPart::Is(0x08B3));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x0AF9));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x13F8));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x9FCD));
|
|
EXPECT_TRUE(IdentifierPart::Is(0xAB60));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x10CC0));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x108E0));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x2B820));
|
|
|
|
// [[:ID_Continue:]-[:ID_Start:]] & [[:Age=8.0:]-[:Age=7.0:]]
|
|
// 162 code points
|
|
EXPECT_TRUE(IdentifierPart::Is(0x08E3));
|
|
EXPECT_TRUE(IdentifierPart::Is(0xA69E));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x11730));
|
|
|
|
// New in Unicode 9.0 (7,177 code points)
|
|
// [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1C80));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x104DB));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1E922));
|
|
|
|
// [[:ID_Continue:]-[:ID_Start:]] & [[:Age=9.0:]-[:Age=8.0:]]
|
|
// 162 code points
|
|
EXPECT_TRUE(IdentifierPart::Is(0x08D4));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1DFB));
|
|
EXPECT_TRUE(IdentifierPart::Is(0xA8C5));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x11450));
|
|
#endif
|
|
|
|
// http://www.unicode.org/reports/tr31/
|
|
// curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
|
|
// grep 'Other_ID_(Continue|Start)'
|
|
|
|
// Other_ID_Start
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1885));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1886));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x2118));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x212E));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x309B));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x309C));
|
|
|
|
// Other_ID_Continue
|
|
EXPECT_TRUE(IdentifierPart::Is(0x00B7));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x0387));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1369));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1370));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1371));
|
|
EXPECT_TRUE(IdentifierPart::Is(0x19DA));
|
|
|
|
// Issue 2892:
|
|
// \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
|
|
EXPECT_FALSE(IdentifierPart::Is(0x2E2F));
|
|
}
|
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) {
|
|
// Both ID_Start and ID_Continue.
|
|
EXPECT_TRUE(IdentifierStart::Is(0x10403)); // Category Lu
|
|
EXPECT_TRUE(IdentifierPart::Is(0x10403));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1043C)); // Category Ll
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1043C));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x16F9C)); // Category Lm
|
|
EXPECT_TRUE(IdentifierPart::Is(0x16F9C));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x10048)); // Category Lo
|
|
EXPECT_TRUE(IdentifierPart::Is(0x10048));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x1014D)); // Category Nl
|
|
EXPECT_TRUE(IdentifierPart::Is(0x1014D));
|
|
|
|
// New in Unicode 8.0
|
|
// [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:]
|
|
EXPECT_TRUE(IdentifierStart::Is(0x108E0));
|
|
EXPECT_TRUE(IdentifierStart::Is(0x10C80));
|
|
|
|
// Only ID_Continue.
|
|
EXPECT_FALSE(IdentifierStart::Is(0x101FD)); // Category Mn
|
|
EXPECT_TRUE(IdentifierPart::Is(0x101FD));
|
|
EXPECT_FALSE(IdentifierStart::Is(0x11002)); // Category Mc
|
|
EXPECT_TRUE(IdentifierPart::Is(0x11002));
|
|
EXPECT_FALSE(IdentifierStart::Is(0x104A9)); // Category Nd
|
|
EXPECT_TRUE(IdentifierPart::Is(0x104A9));
|
|
|
|
// Neither.
|
|
EXPECT_FALSE(IdentifierStart::Is(0x10111)); // Category No
|
|
EXPECT_FALSE(IdentifierPart::Is(0x10111));
|
|
EXPECT_FALSE(IdentifierStart::Is(0x1F4A9)); // Category So
|
|
EXPECT_FALSE(IdentifierPart::Is(0x1F4A9));
|
|
}
|
|
#endif // V8_INTL_SUPPORT
|
|
|
|
} // namespace internal
|
|
} // namespace v8
|