v8/src/char-predicates.h
Leszek Swirski 7aac6bc905 [cleanup] Make unicode predicate cache tables static
Moves the unicode predicate cache tables out of the unicode cache,
and turns them into generic predicates in char-predicates.h which
use static constexpr tables.

This drops the per-isolate cost of unicode caches, and removes the
need for accessing the unicode cache from most files. It does remove
the mutability of the cache, which means that there may be regressions
when parsing non-ASCII identifiers. Most likely the benefits to ASCII
identifiers/keywords will outweigh any non-ASCII costs.

Change-Id: I9a7a8b7c9b22d3e9ede824ab4e27f133ce20a399
Reviewed-on: https://chromium-review.googlesource.com/c/1335564
Reviewed-by: Yang Guo <yangguo@chromium.org>
Reviewed-by: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57506}
2018-11-14 15:33:45 +00:00

86 lines
2.9 KiB
C++

// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_CHAR_PREDICATES_H_
#define V8_CHAR_PREDICATES_H_
#include "src/globals.h"
#include "src/unicode.h"
namespace v8 {
namespace internal {
// Unicode character predicates as defined by ECMA-262, 3rd,
// used for lexical analysis.
inline constexpr int AsciiAlphaToLower(uc32 c);
inline constexpr bool IsCarriageReturn(uc32 c);
inline constexpr bool IsLineFeed(uc32 c);
inline constexpr bool IsAsciiIdentifier(uc32 c);
inline constexpr bool IsAlphaNumeric(uc32 c);
inline constexpr bool IsDecimalDigit(uc32 c);
inline constexpr bool IsHexDigit(uc32 c);
inline constexpr bool IsOctalDigit(uc32 c);
inline constexpr bool IsBinaryDigit(uc32 c);
inline constexpr bool IsRegExpWord(uc32 c);
inline constexpr bool IsRegExpNewline(uc32 c);
// ES#sec-names-and-keywords
// This includes '_', '$' and '\', and ID_Start according to
// http://www.unicode.org/reports/tr31/, which consists of categories
// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
inline bool IsIdentifierStart(uc32 c);
#ifdef V8_INTL_SUPPORT
V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c);
#else
inline bool IsIdentifierStartSlow(uc32 c) {
// Non-BMP characters are not supported without I18N.
return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
}
#endif
// ES#sec-names-and-keywords
// This includes \u200c and \u200d, and ID_Continue according to
// http://www.unicode.org/reports/tr31/, which consists of ID_Start,
// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
// 'Pattern_Syntax' or 'Pattern_White_Space'.
inline bool IsIdentifierPart(uc32 c);
#ifdef V8_INTL_SUPPORT
V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c);
#else
inline bool IsIdentifierPartSlow(uc32 c) {
// Non-BMP charaacters are not supported without I18N.
if (c <= 0xFFFF) {
return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
}
return false;
}
#endif
// ES6 draft section 11.2
// This includes all code points of Unicode category 'Zs'.
// Further included are \u0009, \u000b, \u000c, and \ufeff.
inline bool IsWhiteSpace(uc32 c);
#ifdef V8_INTL_SUPPORT
V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c);
#else
inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); }
#endif
// WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
// This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
// as well as \u0009 - \u000d and \ufeff.
inline bool IsWhiteSpaceOrLineTerminator(uc32 c);
inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) {
return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c);
}
inline bool IsLineTerminatorSequence(uc32 c, uc32 next);
} // namespace internal
} // namespace v8
#endif // V8_CHAR_PREDICATES_H_