v8/test/unittests/char-predicates-unittest.cc
Leszek Swirski 7aac6bc905 [cleanup] Make unicode predicate cache tables static
Moves the unicode predicate cache tables out of the unicode cache,
and turns them into generic predicates in char-predicates.h which
use static constexpr tables.

This drops the per-isolate cost of unicode caches, and removes the
need for accessing the unicode cache from most files. It does remove
the mutability of the cache, which means that there may be regressions
when parsing non-ASCII identifiers. Most likely the benefits to ASCII
identifiers/keywords will outweigh any non-ASCII costs.

Change-Id: I9a7a8b7c9b22d3e9ede824ab4e27f133ce20a399
Reviewed-on: https://chromium-review.googlesource.com/c/1335564
Reviewed-by: Yang Guo <yangguo@chromium.org>
Reviewed-by: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57506}
2018-11-14 15:33:45 +00:00

194 lines
6.6 KiB
C++

// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/char-predicates.h"
#include "src/char-predicates-inl.h"
#include "src/unicode.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace v8 {
namespace internal {
TEST(CharPredicatesTest, WhiteSpace) {
EXPECT_TRUE(IsWhiteSpace(0x0009));
EXPECT_TRUE(IsWhiteSpace(0x000B));
EXPECT_TRUE(IsWhiteSpace(0x000C));
EXPECT_TRUE(IsWhiteSpace(' '));
EXPECT_TRUE(IsWhiteSpace(0x00A0));
EXPECT_TRUE(IsWhiteSpace(0x1680));
EXPECT_TRUE(IsWhiteSpace(0x2000));
EXPECT_TRUE(IsWhiteSpace(0x2007));
EXPECT_TRUE(IsWhiteSpace(0x202F));
EXPECT_TRUE(IsWhiteSpace(0x205F));
EXPECT_TRUE(IsWhiteSpace(0x3000));
EXPECT_TRUE(IsWhiteSpace(0xFEFF));
EXPECT_FALSE(IsWhiteSpace(0x180E));
}
TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) {
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x0009));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000B));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000C));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(' '));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x00A0));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x1680));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2000));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2007));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x202F));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x205F));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0xFEFF));
// Line terminators
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000A));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x000D));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2028));
EXPECT_TRUE(IsWhiteSpaceOrLineTerminator(0x2029));
EXPECT_FALSE(IsWhiteSpaceOrLineTerminator(0x180E));
}
TEST(CharPredicatesTest, IdentifierStart) {
EXPECT_TRUE(IsIdentifierStart('$'));
EXPECT_TRUE(IsIdentifierStart('_'));
EXPECT_TRUE(IsIdentifierStart('\\'));
// http://www.unicode.org/reports/tr31/
// curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
// grep 'Other_ID_Start'
// Other_ID_Start
EXPECT_TRUE(IsIdentifierStart(0x1885));
EXPECT_TRUE(IsIdentifierStart(0x1886));
EXPECT_TRUE(IsIdentifierStart(0x2118));
EXPECT_TRUE(IsIdentifierStart(0x212E));
EXPECT_TRUE(IsIdentifierStart(0x309B));
EXPECT_TRUE(IsIdentifierStart(0x309C));
// Issue 2892:
// \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
EXPECT_FALSE(IsIdentifierStart(0x2E2F));
#ifdef V8_INTL_SUPPORT
// New in Unicode 8.0 (6,847 code points)
// [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
EXPECT_TRUE(IsIdentifierStart(0x08B3));
EXPECT_TRUE(IsIdentifierStart(0x0AF9));
EXPECT_TRUE(IsIdentifierStart(0x13F8));
EXPECT_TRUE(IsIdentifierStart(0x9FCD));
EXPECT_TRUE(IsIdentifierStart(0xAB60));
EXPECT_TRUE(IsIdentifierStart(0x10CC0));
EXPECT_TRUE(IsIdentifierStart(0x108E0));
EXPECT_TRUE(IsIdentifierStart(0x2B820));
// New in Unicode 9.0 (7,177 code points)
// [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
EXPECT_TRUE(IsIdentifierStart(0x1C80));
EXPECT_TRUE(IsIdentifierStart(0x104DB));
EXPECT_TRUE(IsIdentifierStart(0x1E922));
#endif
}
TEST(CharPredicatesTest, IdentifierPart) {
EXPECT_TRUE(IsIdentifierPart('$'));
EXPECT_TRUE(IsIdentifierPart('_'));
EXPECT_TRUE(IsIdentifierPart('\\'));
EXPECT_TRUE(IsIdentifierPart(0x200C));
EXPECT_TRUE(IsIdentifierPart(0x200D));
#ifdef V8_INTL_SUPPORT
// New in Unicode 8.0 (6,847 code points)
// [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]]
EXPECT_TRUE(IsIdentifierPart(0x08B3));
EXPECT_TRUE(IsIdentifierPart(0x0AF9));
EXPECT_TRUE(IsIdentifierPart(0x13F8));
EXPECT_TRUE(IsIdentifierPart(0x9FCD));
EXPECT_TRUE(IsIdentifierPart(0xAB60));
EXPECT_TRUE(IsIdentifierPart(0x10CC0));
EXPECT_TRUE(IsIdentifierPart(0x108E0));
EXPECT_TRUE(IsIdentifierPart(0x2B820));
// [[:ID_Continue:]-[:ID_Start:]] & [[:Age=8.0:]-[:Age=7.0:]]
// 162 code points
EXPECT_TRUE(IsIdentifierPart(0x08E3));
EXPECT_TRUE(IsIdentifierPart(0xA69E));
EXPECT_TRUE(IsIdentifierPart(0x11730));
// New in Unicode 9.0 (7,177 code points)
// [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]]
EXPECT_TRUE(IsIdentifierPart(0x1C80));
EXPECT_TRUE(IsIdentifierPart(0x104DB));
EXPECT_TRUE(IsIdentifierPart(0x1E922));
// [[:ID_Continue:]-[:ID_Start:]] & [[:Age=9.0:]-[:Age=8.0:]]
// 162 code points
EXPECT_TRUE(IsIdentifierPart(0x08D4));
EXPECT_TRUE(IsIdentifierPart(0x1DFB));
EXPECT_TRUE(IsIdentifierPart(0xA8C5));
EXPECT_TRUE(IsIdentifierPart(0x11450));
#endif
// http://www.unicode.org/reports/tr31/
// curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt |
// grep 'Other_ID_(Continue|Start)'
// Other_ID_Start
EXPECT_TRUE(IsIdentifierPart(0x1885));
EXPECT_TRUE(IsIdentifierPart(0x1886));
EXPECT_TRUE(IsIdentifierPart(0x2118));
EXPECT_TRUE(IsIdentifierPart(0x212E));
EXPECT_TRUE(IsIdentifierPart(0x309B));
EXPECT_TRUE(IsIdentifierPart(0x309C));
// Other_ID_Continue
EXPECT_TRUE(IsIdentifierPart(0x00B7));
EXPECT_TRUE(IsIdentifierPart(0x0387));
EXPECT_TRUE(IsIdentifierPart(0x1369));
EXPECT_TRUE(IsIdentifierPart(0x1370));
EXPECT_TRUE(IsIdentifierPart(0x1371));
EXPECT_TRUE(IsIdentifierPart(0x19DA));
// Issue 2892:
// \u2E2F has the Pattern_Syntax property, excluding it from ID_Start.
EXPECT_FALSE(IsIdentifierPart(0x2E2F));
}
#ifdef V8_INTL_SUPPORT
TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) {
// Both ID_Start and ID_Continue.
EXPECT_TRUE(IsIdentifierStart(0x10403)); // Category Lu
EXPECT_TRUE(IsIdentifierPart(0x10403));
EXPECT_TRUE(IsIdentifierStart(0x1043C)); // Category Ll
EXPECT_TRUE(IsIdentifierPart(0x1043C));
EXPECT_TRUE(IsIdentifierStart(0x16F9C)); // Category Lm
EXPECT_TRUE(IsIdentifierPart(0x16F9C));
EXPECT_TRUE(IsIdentifierStart(0x10048)); // Category Lo
EXPECT_TRUE(IsIdentifierPart(0x10048));
EXPECT_TRUE(IsIdentifierStart(0x1014D)); // Category Nl
EXPECT_TRUE(IsIdentifierPart(0x1014D));
// New in Unicode 8.0
// [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:]
EXPECT_TRUE(IsIdentifierStart(0x108E0));
EXPECT_TRUE(IsIdentifierStart(0x10C80));
// Only ID_Continue.
EXPECT_FALSE(IsIdentifierStart(0x101FD)); // Category Mn
EXPECT_TRUE(IsIdentifierPart(0x101FD));
EXPECT_FALSE(IsIdentifierStart(0x11002)); // Category Mc
EXPECT_TRUE(IsIdentifierPart(0x11002));
EXPECT_FALSE(IsIdentifierStart(0x104A9)); // Category Nd
EXPECT_TRUE(IsIdentifierPart(0x104A9));
// Neither.
EXPECT_FALSE(IsIdentifierStart(0x10111)); // Category No
EXPECT_FALSE(IsIdentifierPart(0x10111));
EXPECT_FALSE(IsIdentifierStart(0x1F4A9)); // Category So
EXPECT_FALSE(IsIdentifierPart(0x1F4A9));
}
#endif // V8_INTL_SUPPORT
} // namespace internal
} // namespace v8