2012-03-06 09:43:12 +00:00
|
|
|
// Copyright 2012 the V8 project authors. All rights reserved.
|
2014-04-29 06:42:26 +00:00
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
2008-07-03 15:10:15 +00:00
|
|
|
//
|
2014-10-08 14:55:03 +00:00
|
|
|
// This file was generated at 2014-10-08 15:25:47.940335
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2015-09-03 07:55:53 +00:00
|
|
|
#include "src/unicode.h"
|
2014-06-03 08:12:43 +00:00
|
|
|
#include "src/unicode-inl.h"
|
2009-03-18 15:20:26 +00:00
|
|
|
#include <stdio.h>
|
2014-06-20 08:40:11 +00:00
|
|
|
#include <stdlib.h>
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2017-06-14 20:32:49 +00:00
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
|
|
#include "unicode/uchar.h"
|
|
|
|
#endif
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
namespace unibrow {
|
|
|
|
|
2008-11-25 11:07:48 +00:00
|
|
|
static const int kStartBit = (1 << 30);
|
2010-07-30 12:59:57 +00:00
|
|
|
static const int kChunkBits = (1 << 13);
|
2011-01-03 10:28:39 +00:00
|
|
|
static const uchar kSentinel = static_cast<uchar>(-1);
|
2008-11-25 11:07:48 +00:00
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
/**
|
|
|
|
* \file
|
2017-12-01 03:57:07 +00:00
|
|
|
* Implementations of functions for working with Unicode.
|
2008-07-03 15:10:15 +00:00
|
|
|
*/
|
|
|
|
|
2008-08-28 09:55:41 +00:00
|
|
|
typedef signed short int16_t; // NOLINT
|
|
|
|
typedef unsigned short uint16_t; // NOLINT
|
2008-11-25 11:07:48 +00:00
|
|
|
typedef int int32_t; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// All access to the character table should go through this function.
|
|
|
|
template <int D>
|
2008-11-25 11:07:48 +00:00
|
|
|
static inline uchar TableGet(const int32_t* table, int index) {
|
2008-07-03 15:10:15 +00:00
|
|
|
return table[D * index];
|
|
|
|
}
|
|
|
|
|
2013-07-05 09:52:11 +00:00
|
|
|
|
2008-11-25 11:07:48 +00:00
|
|
|
static inline uchar GetEntry(int32_t entry) {
|
|
|
|
return entry & (kStartBit - 1);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2008-11-25 11:07:48 +00:00
|
|
|
static inline bool IsStart(int32_t entry) {
|
|
|
|
return (entry & kStartBit) != 0;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2017-06-14 20:32:49 +00:00
|
|
|
#ifndef V8_INTL_SUPPORT
|
2008-07-03 15:10:15 +00:00
|
|
|
/**
|
2017-12-01 03:57:07 +00:00
|
|
|
* Look up a character in the Unicode table using a mix of binary and
|
2008-07-03 15:10:15 +00:00
|
|
|
* interpolation search. For a uniformly distributed array
|
|
|
|
* interpolation search beats binary search by a wide margin. However,
|
|
|
|
* in this case interpolation search degenerates because of some very
|
|
|
|
* high values in the lower end of the table so this function uses a
|
|
|
|
* combination. The average number of steps to look up the information
|
|
|
|
* about a character is around 10, slightly higher if there is no
|
|
|
|
* information available about the character.
|
|
|
|
*/
|
2008-11-25 11:07:48 +00:00
|
|
|
static bool LookupPredicate(const int32_t* table, uint16_t size, uchar chr) {
|
2008-07-03 15:10:15 +00:00
|
|
|
static const int kEntryDist = 1;
|
2008-11-25 11:07:48 +00:00
|
|
|
uint16_t value = chr & (kChunkBits - 1);
|
2008-07-03 15:10:15 +00:00
|
|
|
unsigned int low = 0;
|
|
|
|
unsigned int high = size - 1;
|
|
|
|
while (high != low) {
|
|
|
|
unsigned int mid = low + ((high - low) >> 1);
|
|
|
|
uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid));
|
|
|
|
// If we've found an entry less than or equal to this one, and the
|
|
|
|
// next one is not also less than this one, we've arrived.
|
|
|
|
if ((current_value <= value) &&
|
|
|
|
(mid + 1 == size ||
|
|
|
|
GetEntry(TableGet<kEntryDist>(table, mid + 1)) > value)) {
|
|
|
|
low = mid;
|
|
|
|
break;
|
|
|
|
} else if (current_value < value) {
|
|
|
|
low = mid + 1;
|
|
|
|
} else if (current_value > value) {
|
|
|
|
// If we've just checked the bottom-most value and it's not
|
|
|
|
// the one we're looking for, we're done.
|
|
|
|
if (mid == 0) break;
|
|
|
|
high = mid - 1;
|
|
|
|
}
|
|
|
|
}
|
2008-11-25 11:07:48 +00:00
|
|
|
int32_t field = TableGet<kEntryDist>(table, low);
|
|
|
|
uchar entry = GetEntry(field);
|
|
|
|
bool is_start = IsStart(field);
|
2011-01-03 10:28:39 +00:00
|
|
|
return (entry == value) || (entry < value && is_start);
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
2017-06-14 20:32:49 +00:00
|
|
|
#endif // !V8_INTL_SUPPORT
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2008-11-25 11:07:48 +00:00
|
|
|
template <int kW>
|
2008-07-03 15:10:15 +00:00
|
|
|
struct MultiCharacterSpecialCase {
|
2011-01-03 10:28:39 +00:00
|
|
|
static const uchar kEndOfEncoding = kSentinel;
|
2008-11-25 11:07:48 +00:00
|
|
|
uchar chars[kW];
|
2008-07-03 15:10:15 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Look up the mapping for the given character in the specified table,
|
|
|
|
// which is of the specified length and uses the specified special case
|
|
|
|
// mapping for multi-char mappings. The next parameter is the character
|
|
|
|
// following the one to map. The result will be written in to the result
|
|
|
|
// buffer and the number of characters written will be returned. Finally,
|
|
|
|
// if the allow_caching_ptr is non-null then false will be stored in
|
|
|
|
// it if the result contains multiple characters or depends on the
|
|
|
|
// context.
|
2010-07-30 12:59:57 +00:00
|
|
|
// If ranges are linear, a match between a start and end point is
|
|
|
|
// offset by the distance between the match and the start. Otherwise
|
|
|
|
// the result is the same as for the start point on the entire range.
|
|
|
|
template <bool ranges_are_linear, int kW>
|
2008-11-25 11:07:48 +00:00
|
|
|
static int LookupMapping(const int32_t* table,
|
|
|
|
uint16_t size,
|
|
|
|
const MultiCharacterSpecialCase<kW>* multi_chars,
|
|
|
|
uchar chr,
|
|
|
|
uchar next,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2008-07-03 15:10:15 +00:00
|
|
|
static const int kEntryDist = 2;
|
2010-07-30 12:59:57 +00:00
|
|
|
uint16_t key = chr & (kChunkBits - 1);
|
|
|
|
uint16_t chunk_start = chr - key;
|
2008-07-03 15:10:15 +00:00
|
|
|
unsigned int low = 0;
|
|
|
|
unsigned int high = size - 1;
|
|
|
|
while (high != low) {
|
|
|
|
unsigned int mid = low + ((high - low) >> 1);
|
|
|
|
uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid));
|
|
|
|
// If we've found an entry less than or equal to this one, and the next one
|
|
|
|
// is not also less than this one, we've arrived.
|
2010-07-30 12:59:57 +00:00
|
|
|
if ((current_value <= key) &&
|
2008-07-03 15:10:15 +00:00
|
|
|
(mid + 1 == size ||
|
2010-07-30 12:59:57 +00:00
|
|
|
GetEntry(TableGet<kEntryDist>(table, mid + 1)) > key)) {
|
2008-07-03 15:10:15 +00:00
|
|
|
low = mid;
|
|
|
|
break;
|
2010-07-30 12:59:57 +00:00
|
|
|
} else if (current_value < key) {
|
2008-07-03 15:10:15 +00:00
|
|
|
low = mid + 1;
|
2010-07-30 12:59:57 +00:00
|
|
|
} else if (current_value > key) {
|
2008-07-03 15:10:15 +00:00
|
|
|
// If we've just checked the bottom-most value and it's not
|
|
|
|
// the one we're looking for, we're done.
|
|
|
|
if (mid == 0) break;
|
|
|
|
high = mid - 1;
|
|
|
|
}
|
|
|
|
}
|
2008-11-25 11:07:48 +00:00
|
|
|
int32_t field = TableGet<kEntryDist>(table, low);
|
|
|
|
uchar entry = GetEntry(field);
|
|
|
|
bool is_start = IsStart(field);
|
2010-07-30 12:59:57 +00:00
|
|
|
bool found = (entry == key) || (entry < key && is_start);
|
2008-07-03 15:10:15 +00:00
|
|
|
if (found) {
|
2008-11-25 11:07:48 +00:00
|
|
|
int32_t value = table[2 * low + 1];
|
2008-07-03 15:10:15 +00:00
|
|
|
if (value == 0) {
|
|
|
|
// 0 means not present
|
|
|
|
return 0;
|
|
|
|
} else if ((value & 3) == 0) {
|
|
|
|
// Low bits 0 means a constant offset from the given character.
|
2010-07-30 12:59:57 +00:00
|
|
|
if (ranges_are_linear) {
|
|
|
|
result[0] = chr + (value >> 2);
|
|
|
|
} else {
|
|
|
|
result[0] = entry + chunk_start + (value >> 2);
|
|
|
|
}
|
2008-07-03 15:10:15 +00:00
|
|
|
return 1;
|
|
|
|
} else if ((value & 3) == 1) {
|
|
|
|
// Low bits 1 means a special case mapping
|
|
|
|
if (allow_caching_ptr) *allow_caching_ptr = false;
|
2008-11-25 11:07:48 +00:00
|
|
|
const MultiCharacterSpecialCase<kW>& mapping = multi_chars[value >> 2];
|
2010-07-30 12:59:57 +00:00
|
|
|
int length = 0;
|
|
|
|
for (length = 0; length < kW; length++) {
|
|
|
|
uchar mapped = mapping.chars[length];
|
2011-01-03 10:28:39 +00:00
|
|
|
if (mapped == MultiCharacterSpecialCase<kW>::kEndOfEncoding) break;
|
2010-07-30 12:59:57 +00:00
|
|
|
if (ranges_are_linear) {
|
|
|
|
result[length] = mapped + (key - entry);
|
|
|
|
} else {
|
|
|
|
result[length] = mapped;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return length;
|
2008-07-03 15:10:15 +00:00
|
|
|
} else {
|
|
|
|
// Low bits 2 means a really really special case
|
|
|
|
if (allow_caching_ptr) *allow_caching_ptr = false;
|
|
|
|
// The cases of this switch are defined in unicode.py in the
|
|
|
|
// really_special_cases mapping.
|
|
|
|
switch (value >> 2) {
|
|
|
|
case 1:
|
|
|
|
// Really special case 1: upper case sigma. This letter
|
|
|
|
// converts to two different lower case sigmas depending on
|
|
|
|
// whether or not it occurs at the end of a word.
|
|
|
|
if (next != 0 && Letter::Is(next)) {
|
|
|
|
result[0] = 0x03C3;
|
|
|
|
} else {
|
|
|
|
result[0] = 0x03C2;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[unicode] Return (the correct) errors for overlong / surrogate sequences.
This fix is two-fold:
1) Incremental UTF-8 decoding: Unify incorrect UTF-8 handling between V8 and
Blink.
Incremental UTF-8 decoding used to allow some overlong sequences / invalid code
points which Blink treated as errors. This caused the decoder and the Blink
UTF-8 decoder to produce a different number of bytes, resulting in random
failures when scripts were streamed (especially, this was detected by the
skipping inner functions feature which adds CHECKs against expected function
positions).
2) Non-incremental UTF-8 decoding: return the correct amount of invalid characters.
According to the encoding spec ( https://encoding.spec.whatwg.org/#utf-8-decoder
), the first byte of an overlong sequence / invalid code point generates an
invalid character, and the rest of the bytes are not processed (i.e., pushed
back to the byte stream). When they're handled, they will look like lonely
continuation bytes, and will generate an invalid character each.
As a result, an overlong 4-byte sequence should generate 4 invalid characters
(not 1).
This is a potentially breaking change, since the (non-incremental) UTF-8
decoding is exposed via the API (String::NewFromUtf8). The behavioral difference
happens when the client is passing in invalid UTF-8 (containing overlong /
surrogate sequences).
However, afaict, this doesn't change the semantics of any JavaScript program:
according to the ECMAScript spec, the program is a sequence of Unicode code
points, and there's no way to invoke the UTF-8 decoding functionalities from
inside JavaScript. Though, this changes the behavior of d8 when decoding source
files which are invalid UTF-8.
This doesn't change anything related to URI decoding (it already throws
exceptions for overlong sequences / invalid code points).
BUG: chromium:765608, chromium:758236, v8:5516
Bug:
Change-Id: Ib029f6a8e87186794b092e4e8af32d01cee3ada0
Reviewed-on: https://chromium-review.googlesource.com/671020
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: Franziska Hinkelmann <franzih@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48105}
2017-09-20 07:32:33 +00:00
|
|
|
// This method decodes an UTF-8 value according to RFC 3629 and
|
|
|
|
// https://encoding.spec.whatwg.org/#utf-8-decoder .
|
2015-05-22 18:47:36 +00:00
|
|
|
uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
DCHECK_GT(max_length, 0);
|
2017-09-26 10:51:17 +00:00
|
|
|
DCHECK_GT(str[0], kMaxOneByteChar);
|
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
State state = State::kAccept;
|
|
|
|
Utf8IncrementalBuffer buffer = 0;
|
|
|
|
uchar t;
|
2016-11-16 11:02:54 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
size_t i = 0;
|
|
|
|
do {
|
|
|
|
t = ValueOfIncremental(str[i], &i, &state, &buffer);
|
|
|
|
} while (i < max_length && t == kIncomplete);
|
2016-11-16 11:02:54 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
*cursor += i;
|
|
|
|
return (state == State::kAccept) ? t : kBadChar;
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
// Decodes UTF-8 bytes incrementally, allowing the decoding of bytes as they
|
|
|
|
// stream in. This **must** be followed by a call to ValueOfIncrementalFinish
|
|
|
|
// when the stream is complete, to ensure incomplete sequences are handled.
|
|
|
|
uchar Utf8::ValueOfIncremental(byte next, size_t* cursor, State* state,
|
|
|
|
Utf8IncrementalBuffer* buffer) {
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
DCHECK_NOT_NULL(buffer);
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
State old_state = *state;
|
|
|
|
*cursor += 1;
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
if (V8_LIKELY(next <= kMaxOneByteChar && old_state == State::kAccept)) {
|
|
|
|
DCHECK_EQ(0u, *buffer);
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
return static_cast<uchar>(next);
|
|
|
|
}
|
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
// So we're at the lead byte of a 2/3/4 sequence, or we're at a continuation
|
|
|
|
// char in that sequence.
|
|
|
|
Utf8DfaDecoder::Decode(next, state, buffer);
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
switch (*state) {
|
|
|
|
case State::kAccept: {
|
|
|
|
uchar t = *buffer;
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
*buffer = 0;
|
2016-10-05 17:18:36 +00:00
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
case State::kReject:
|
|
|
|
*state = State::kAccept;
|
|
|
|
*buffer = 0;
|
[unicode] Return (the correct) errors for overlong / surrogate sequences.
This fix is two-fold:
1) Incremental UTF-8 decoding: Unify incorrect UTF-8 handling between V8 and
Blink.
Incremental UTF-8 decoding used to allow some overlong sequences / invalid code
points which Blink treated as errors. This caused the decoder and the Blink
UTF-8 decoder to produce a different number of bytes, resulting in random
failures when scripts were streamed (especially, this was detected by the
skipping inner functions feature which adds CHECKs against expected function
positions).
2) Non-incremental UTF-8 decoding: return the correct amount of invalid characters.
According to the encoding spec ( https://encoding.spec.whatwg.org/#utf-8-decoder
), the first byte of an overlong sequence / invalid code point generates an
invalid character, and the rest of the bytes are not processed (i.e., pushed
back to the byte stream). When they're handled, they will look like lonely
continuation bytes, and will generate an invalid character each.
As a result, an overlong 4-byte sequence should generate 4 invalid characters
(not 1).
This is a potentially breaking change, since the (non-incremental) UTF-8
decoding is exposed via the API (String::NewFromUtf8). The behavioral difference
happens when the client is passing in invalid UTF-8 (containing overlong /
surrogate sequences).
However, afaict, this doesn't change the semantics of any JavaScript program:
according to the ECMAScript spec, the program is a sequence of Unicode code
points, and there's no way to invoke the UTF-8 decoding functionalities from
inside JavaScript. Though, this changes the behavior of d8 when decoding source
files which are invalid UTF-8.
This doesn't change anything related to URI decoding (it already throws
exceptions for overlong sequences / invalid code points).
BUG: chromium:765608, chromium:758236, v8:5516
Bug:
Change-Id: Ib029f6a8e87186794b092e4e8af32d01cee3ada0
Reviewed-on: https://chromium-review.googlesource.com/671020
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: Franziska Hinkelmann <franzih@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48105}
2017-09-20 07:32:33 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
// If we hit a bad byte, we need to determine if we were trying to start
|
|
|
|
// a sequence or continue one. If we were trying to start a sequence,
|
|
|
|
// that means it's just an invalid lead byte and we need to continue to
|
|
|
|
// the next (which we already did above). If we were already in a
|
|
|
|
// sequence, we need to reprocess this same byte after resetting to the
|
|
|
|
// initial state.
|
|
|
|
if (old_state != State::kAccept) {
|
|
|
|
// We were trying to continue a sequence, so let's reprocess this byte
|
|
|
|
// next time.
|
|
|
|
*cursor -= 1;
|
[unicode] Return (the correct) errors for overlong / surrogate sequences.
This fix is two-fold:
1) Incremental UTF-8 decoding: Unify incorrect UTF-8 handling between V8 and
Blink.
Incremental UTF-8 decoding used to allow some overlong sequences / invalid code
points which Blink treated as errors. This caused the decoder and the Blink
UTF-8 decoder to produce a different number of bytes, resulting in random
failures when scripts were streamed (especially, this was detected by the
skipping inner functions feature which adds CHECKs against expected function
positions).
2) Non-incremental UTF-8 decoding: return the correct amount of invalid characters.
According to the encoding spec ( https://encoding.spec.whatwg.org/#utf-8-decoder
), the first byte of an overlong sequence / invalid code point generates an
invalid character, and the rest of the bytes are not processed (i.e., pushed
back to the byte stream). When they're handled, they will look like lonely
continuation bytes, and will generate an invalid character each.
As a result, an overlong 4-byte sequence should generate 4 invalid characters
(not 1).
This is a potentially breaking change, since the (non-incremental) UTF-8
decoding is exposed via the API (String::NewFromUtf8). The behavioral difference
happens when the client is passing in invalid UTF-8 (containing overlong /
surrogate sequences).
However, afaict, this doesn't change the semantics of any JavaScript program:
according to the ECMAScript spec, the program is a sequence of Unicode code
points, and there's no way to invoke the UTF-8 decoding functionalities from
inside JavaScript. Though, this changes the behavior of d8 when decoding source
files which are invalid UTF-8.
This doesn't change anything related to URI decoding (it already throws
exceptions for overlong sequences / invalid code points).
BUG: chromium:765608, chromium:758236, v8:5516
Bug:
Change-Id: Ib029f6a8e87186794b092e4e8af32d01cee3ada0
Reviewed-on: https://chromium-review.googlesource.com/671020
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: Franziska Hinkelmann <franzih@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48105}
2017-09-20 07:32:33 +00:00
|
|
|
}
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
return kBadChar;
|
[unicode] Return (the correct) errors for overlong / surrogate sequences.
This fix is two-fold:
1) Incremental UTF-8 decoding: Unify incorrect UTF-8 handling between V8 and
Blink.
Incremental UTF-8 decoding used to allow some overlong sequences / invalid code
points which Blink treated as errors. This caused the decoder and the Blink
UTF-8 decoder to produce a different number of bytes, resulting in random
failures when scripts were streamed (especially, this was detected by the
skipping inner functions feature which adds CHECKs against expected function
positions).
2) Non-incremental UTF-8 decoding: return the correct amount of invalid characters.
According to the encoding spec ( https://encoding.spec.whatwg.org/#utf-8-decoder
), the first byte of an overlong sequence / invalid code point generates an
invalid character, and the rest of the bytes are not processed (i.e., pushed
back to the byte stream). When they're handled, they will look like lonely
continuation bytes, and will generate an invalid character each.
As a result, an overlong 4-byte sequence should generate 4 invalid characters
(not 1).
This is a potentially breaking change, since the (non-incremental) UTF-8
decoding is exposed via the API (String::NewFromUtf8). The behavioral difference
happens when the client is passing in invalid UTF-8 (containing overlong /
surrogate sequences).
However, afaict, this doesn't change the semantics of any JavaScript program:
according to the ECMAScript spec, the program is a sequence of Unicode code
points, and there's no way to invoke the UTF-8 decoding functionalities from
inside JavaScript. Though, this changes the behavior of d8 when decoding source
files which are invalid UTF-8.
This doesn't change anything related to URI decoding (it already throws
exceptions for overlong sequences / invalid code points).
BUG: chromium:765608, chromium:758236, v8:5516
Bug:
Change-Id: Ib029f6a8e87186794b092e4e8af32d01cee3ada0
Reviewed-on: https://chromium-review.googlesource.com/671020
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: Franziska Hinkelmann <franzih@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#48105}
2017-09-20 07:32:33 +00:00
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
default:
|
2016-10-05 17:18:36 +00:00
|
|
|
return kIncomplete;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
// Finishes the incremental decoding, ensuring that if an unfinished sequence
|
|
|
|
// is left that it is replaced by a replacement char.
|
|
|
|
uchar Utf8::ValueOfIncrementalFinish(State* state) {
|
|
|
|
if (*state == State::kAccept) {
|
2016-10-05 17:18:36 +00:00
|
|
|
return kBufferEmpty;
|
|
|
|
} else {
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
DCHECK_GT(*state, State::kAccept);
|
|
|
|
*state = State::kAccept;
|
|
|
|
return kBadChar;
|
Rework scanner-character-streams.
- Smaller, more consistent streams API (Advance, Back, pos, Seek)
- Remove implementations from the header, in favor of creation functions.
Observe:
- Performance:
- All Utf16CharacterStream methods have an inlinable V8_LIKELY w/ a
body of only a few instructions. I expect most calls to end up there.
- There used to be performance problems w/ bookmarking, particularly
with copying too much data on SetBookmark w/ UTF-8 streaming streams.
All those copies are gone.
- The old streaming streams implementation used to copy data even for
2-byte input. It no longer does.
- The only remaining 'slow' method is the Seek(.) slow case for utf-8
streaming streams. I don't expect this to be called a lot; and even if,
I expect it to be offset by the gains in the (vastly more frequent)
calls to the other methods or the 'fast path'.
- If it still bothers us, there are several ways to speed it up.
- API & code cleanliness:
- I want to remove the 'old' API in a follow-up CL, which should mostly
delete code, or replace it 1:1.
- In a 2nd follow-up I want to delete much of the UTF-8 handling in Blink
for streaming streams.
- The "bookmark" is now always implemented (and mostly very fast), so we
should be able to use it for more things.
- Testing & correctness:
- The unit tests now cover all stream implementations,
and are pretty good and triggering all the edge cases.
- Vastly more DCHECKs of the invariants.
BUG=v8:4947
Review-Url: https://codereview.chromium.org/2314663002
Cr-Commit-Position: refs/heads/master@{#39464}
2016-09-16 08:29:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-22 15:53:41 +00:00
|
|
|
bool Utf8::ValidateEncoding(const byte* bytes, size_t length) {
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
State state = State::kAccept;
|
|
|
|
Utf8IncrementalBuffer throw_away = 0;
|
|
|
|
for (size_t i = 0; i < length && state != State::kReject; i++) {
|
|
|
|
Utf8DfaDecoder::Decode(bytes[i], &state, &throw_away);
|
2016-05-12 13:01:10 +00:00
|
|
|
}
|
Implement DFA Unicode Decoder
This is a separation of the DFA Unicode Decoder from
https://chromium-review.googlesource.com/c/v8/v8/+/789560
I attempted to make the DFA's table a bit more explicit in this CL. Still, the
linter prevents me from letting me present the array as a "table" in source
code. For a better representation, please refer to
https://docs.google.com/spreadsheets/d/1L9STtkmWs-A7HdK5ZmZ-wPZ_VBjQ3-Jj_xN9c6_hLKA
- - - - -
Now for a big copy-paste from 789560:
Essentially, reworks a standard FSM (imagine an
array of structs) and flattens it out into a single-dimension array.
Using Table 3-7 of the Unicode 10.0.0 standard (page 126 of
http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf), we can nicely
map all bytes into one of 12 character classes:
00. 0x00-0x7F
01. 0x80-0x8F (split from general continuation because this range is not
valid after a 0xF0 leading byte)
02. 0x90-0x9F (split from general continuation because this range is not
valid after a 0xE0 nor a 0xF4 leading byte)
03. 0xA0-0xBF (the rest of the continuation range)
04. 0xC0-0xC1, 0xF5-0xFF (the joined range of invalid bytes, notice this
includes 255 which we use as a known bad byte during hex-to-int
decoding)
05. 0xC2-0xDF (leading bytes which require any continuation byte
afterwards)
06. 0xE0 (leading byte which requires a 0xA0-0xBF afterwards then any
continuation byte after that)
07. 0xE1-0xEC, 0xEE-0xEF (leading bytes which requires any continuation
afterwards then any continuation byte after that)
08. 0xED (leading byte which requires a 0x80-0x9F afterwards then any
continuation byte after that)
09. 0xF1-F3 (leading bytes which requires any continuation byte
afterwards then any continuation byte then any continuation byte)
10. 0xF0 (leading bytes which requires a 0x90-0xBF afterwards then any
continuation byte then any continuation byte)
11. 0xF4 (leading bytes which requires a 0x80-0x8F afterwards then any
continuation byte then any continuation byte)
Note that 0xF0 and 0xF1-0xF3 were swapped so that fewer bytes were
needed to represent the transition state ("9, 10, 10, 10" vs.
"10, 9, 9, 9").
Using these 12 classes as "transitions", we can map from one state to
the next. Each state is defined as some multiple of 12, so that we're
always starting at the 0th column of each row of the FSM. From each
state, we add the transition and get a index of the new row the FSM is
entering.
If at any point we encounter a bad byte, the state + bad-byte-transition
is guaranteed to map us into the first row of the FSM (which contains no
valid exiting transitions).
The key differences from Björn's original (or his self-modified) DFA is
the "bad" state is now mapped to 0 (or the first row of the FSM) instead
of 12 (the second row). This saves ~50 bytes when gzipping, and also
speeds up determining if a string is properly encoded (see his sample
code at http://bjoern.hoehrmann.de/utf-8/decoder/dfa/#performance).
Finally, I've replace his ternary check with an array access, to make
the algorithm branchless. This places a requirement on the caller to 0
out the code point between successful decodings, which it could always
have done because it's already branching.
R=marja@google.com
Bug:
Change-Id: I574f208a84dc5d06caba17127b0d41f7ce1a3395
Reviewed-on: https://chromium-review.googlesource.com/805357
Commit-Queue: Justin Ridgewell <jridgewell@google.com>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50012}
2017-12-11 20:58:27 +00:00
|
|
|
return state == State::kAccept;
|
2016-05-12 13:01:10 +00:00
|
|
|
}
|
2012-03-12 12:35:28 +00:00
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
// Uppercase: point.category == 'Lu'
|
2017-06-14 20:32:49 +00:00
|
|
|
// TODO(jshin): Check if it's ok to exclude Other_Uppercase characters.
|
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
|
|
bool Uppercase::Is(uchar c) { return static_cast<bool>(u_isupper(c)); }
|
|
|
|
#else
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kUppercaseTable0Size = 455;
|
|
|
|
static const int32_t kUppercaseTable0[455] = {
|
|
|
|
1073741889, 90, 1073742016, 214,
|
|
|
|
1073742040, 222, 256, 258, // NOLINT
|
|
|
|
260, 262, 264, 266,
|
|
|
|
268, 270, 272, 274, // NOLINT
|
|
|
|
276, 278, 280, 282,
|
|
|
|
284, 286, 288, 290, // NOLINT
|
|
|
|
292, 294, 296, 298,
|
|
|
|
300, 302, 304, 306, // NOLINT
|
|
|
|
308, 310, 313, 315,
|
|
|
|
317, 319, 321, 323, // NOLINT
|
|
|
|
325, 327, 330, 332,
|
|
|
|
334, 336, 338, 340, // NOLINT
|
|
|
|
342, 344, 346, 348,
|
|
|
|
350, 352, 354, 356, // NOLINT
|
|
|
|
358, 360, 362, 364,
|
|
|
|
366, 368, 370, 372, // NOLINT
|
|
|
|
374, 1073742200, 377, 379,
|
|
|
|
381, 1073742209, 386, 388, // NOLINT
|
|
|
|
1073742214, 391, 1073742217, 395,
|
|
|
|
1073742222, 401, 1073742227, 404, // NOLINT
|
|
|
|
1073742230, 408, 1073742236, 413,
|
|
|
|
1073742239, 416, 418, 420, // NOLINT
|
|
|
|
1073742246, 423, 425, 428,
|
|
|
|
1073742254, 431, 1073742257, 435, // NOLINT
|
|
|
|
437, 1073742263, 440, 444,
|
|
|
|
452, 455, 458, 461, // NOLINT
|
|
|
|
463, 465, 467, 469,
|
|
|
|
471, 473, 475, 478, // NOLINT
|
|
|
|
480, 482, 484, 486,
|
|
|
|
488, 490, 492, 494, // NOLINT
|
|
|
|
497, 500, 1073742326, 504,
|
|
|
|
506, 508, 510, 512, // NOLINT
|
|
|
|
514, 516, 518, 520,
|
|
|
|
522, 524, 526, 528, // NOLINT
|
|
|
|
530, 532, 534, 536,
|
|
|
|
538, 540, 542, 544, // NOLINT
|
|
|
|
546, 548, 550, 552,
|
|
|
|
554, 556, 558, 560, // NOLINT
|
|
|
|
562, 1073742394, 571, 1073742397,
|
|
|
|
574, 577, 1073742403, 582, // NOLINT
|
|
|
|
584, 586, 588, 590,
|
|
|
|
880, 882, 886, 895, // NOLINT
|
|
|
|
902, 1073742728, 906, 908,
|
|
|
|
1073742734, 911, 1073742737, 929, // NOLINT
|
|
|
|
1073742755, 939, 975, 1073742802,
|
|
|
|
980, 984, 986, 988, // NOLINT
|
|
|
|
990, 992, 994, 996,
|
|
|
|
998, 1000, 1002, 1004, // NOLINT
|
|
|
|
1006, 1012, 1015, 1073742841,
|
|
|
|
1018, 1073742845, 1071, 1120, // NOLINT
|
|
|
|
1122, 1124, 1126, 1128,
|
|
|
|
1130, 1132, 1134, 1136, // NOLINT
|
|
|
|
1138, 1140, 1142, 1144,
|
|
|
|
1146, 1148, 1150, 1152, // NOLINT
|
|
|
|
1162, 1164, 1166, 1168,
|
|
|
|
1170, 1172, 1174, 1176, // NOLINT
|
|
|
|
1178, 1180, 1182, 1184,
|
|
|
|
1186, 1188, 1190, 1192, // NOLINT
|
|
|
|
1194, 1196, 1198, 1200,
|
|
|
|
1202, 1204, 1206, 1208, // NOLINT
|
|
|
|
1210, 1212, 1214, 1073743040,
|
|
|
|
1217, 1219, 1221, 1223, // NOLINT
|
|
|
|
1225, 1227, 1229, 1232,
|
|
|
|
1234, 1236, 1238, 1240, // NOLINT
|
|
|
|
1242, 1244, 1246, 1248,
|
|
|
|
1250, 1252, 1254, 1256, // NOLINT
|
|
|
|
1258, 1260, 1262, 1264,
|
|
|
|
1266, 1268, 1270, 1272, // NOLINT
|
|
|
|
1274, 1276, 1278, 1280,
|
|
|
|
1282, 1284, 1286, 1288, // NOLINT
|
|
|
|
1290, 1292, 1294, 1296,
|
|
|
|
1298, 1300, 1302, 1304, // NOLINT
|
|
|
|
1306, 1308, 1310, 1312,
|
|
|
|
1314, 1316, 1318, 1320, // NOLINT
|
|
|
|
1322, 1324, 1326, 1073743153,
|
|
|
|
1366, 1073746080, 4293, 4295, // NOLINT
|
|
|
|
4301, 7680, 7682, 7684,
|
|
|
|
7686, 7688, 7690, 7692, // NOLINT
|
|
|
|
7694, 7696, 7698, 7700,
|
|
|
|
7702, 7704, 7706, 7708, // NOLINT
|
|
|
|
7710, 7712, 7714, 7716,
|
|
|
|
7718, 7720, 7722, 7724, // NOLINT
|
|
|
|
7726, 7728, 7730, 7732,
|
|
|
|
7734, 7736, 7738, 7740, // NOLINT
|
|
|
|
7742, 7744, 7746, 7748,
|
|
|
|
7750, 7752, 7754, 7756, // NOLINT
|
|
|
|
7758, 7760, 7762, 7764,
|
|
|
|
7766, 7768, 7770, 7772, // NOLINT
|
|
|
|
7774, 7776, 7778, 7780,
|
|
|
|
7782, 7784, 7786, 7788, // NOLINT
|
|
|
|
7790, 7792, 7794, 7796,
|
|
|
|
7798, 7800, 7802, 7804, // NOLINT
|
|
|
|
7806, 7808, 7810, 7812,
|
|
|
|
7814, 7816, 7818, 7820, // NOLINT
|
|
|
|
7822, 7824, 7826, 7828,
|
|
|
|
7838, 7840, 7842, 7844, // NOLINT
|
|
|
|
7846, 7848, 7850, 7852,
|
|
|
|
7854, 7856, 7858, 7860, // NOLINT
|
|
|
|
7862, 7864, 7866, 7868,
|
|
|
|
7870, 7872, 7874, 7876, // NOLINT
|
|
|
|
7878, 7880, 7882, 7884,
|
|
|
|
7886, 7888, 7890, 7892, // NOLINT
|
|
|
|
7894, 7896, 7898, 7900,
|
|
|
|
7902, 7904, 7906, 7908, // NOLINT
|
|
|
|
7910, 7912, 7914, 7916,
|
|
|
|
7918, 7920, 7922, 7924, // NOLINT
|
|
|
|
7926, 7928, 7930, 7932,
|
|
|
|
7934, 1073749768, 7951, 1073749784, // NOLINT
|
|
|
|
7965, 1073749800, 7983, 1073749816,
|
|
|
|
7999, 1073749832, 8013, 8025, // NOLINT
|
|
|
|
8027, 8029, 8031, 1073749864,
|
|
|
|
8047, 1073749944, 8123, 1073749960, // NOLINT
|
|
|
|
8139, 1073749976, 8155, 1073749992,
|
|
|
|
8172, 1073750008, 8187}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kUppercaseTable1Size = 86;
|
|
|
|
static const int32_t kUppercaseTable1[86] = {
|
2010-07-30 12:59:57 +00:00
|
|
|
258, 263, 1073742091, 269, 1073742096, 274, 277, 1073742105, // NOLINT
|
|
|
|
285, 292, 294, 296, 1073742122, 301, 1073742128, 307, // NOLINT
|
|
|
|
1073742142, 319, 325, 387, 1073744896, 3118, 3168, 1073744994, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
3172, 3175, 3177, 3179, 1073745005, 3184, 3186, 3189, // NOLINT
|
|
|
|
1073745022, 3200, 3202, 3204, 3206, 3208, 3210, 3212, // NOLINT
|
|
|
|
3214, 3216, 3218, 3220, 3222, 3224, 3226, 3228, // NOLINT
|
|
|
|
3230, 3232, 3234, 3236, 3238, 3240, 3242, 3244, // NOLINT
|
|
|
|
3246, 3248, 3250, 3252, 3254, 3256, 3258, 3260, // NOLINT
|
|
|
|
3262, 3264, 3266, 3268, 3270, 3272, 3274, 3276, // NOLINT
|
|
|
|
3278, 3280, 3282, 3284, 3286, 3288, 3290, 3292, // NOLINT
|
|
|
|
3294, 3296, 3298, 3307, 3309, 3314 }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kUppercaseTable5Size = 101;
|
|
|
|
static const int32_t kUppercaseTable5[101] = {
|
|
|
|
1600, 1602, 1604, 1606, 1608, 1610, 1612, 1614, // NOLINT
|
|
|
|
1616, 1618, 1620, 1622, 1624, 1626, 1628, 1630, // NOLINT
|
|
|
|
1632, 1634, 1636, 1638, 1640, 1642, 1644, 1664, // NOLINT
|
|
|
|
1666, 1668, 1670, 1672, 1674, 1676, 1678, 1680, // NOLINT
|
|
|
|
1682, 1684, 1686, 1688, 1690, 1826, 1828, 1830, // NOLINT
|
|
|
|
1832, 1834, 1836, 1838, 1842, 1844, 1846, 1848, // NOLINT
|
|
|
|
1850, 1852, 1854, 1856, 1858, 1860, 1862, 1864, // NOLINT
|
|
|
|
1866, 1868, 1870, 1872, 1874, 1876, 1878, 1880, // NOLINT
|
|
|
|
1882, 1884, 1886, 1888, 1890, 1892, 1894, 1896, // NOLINT
|
|
|
|
1898, 1900, 1902, 1913, 1915, 1073743741, 1918, 1920, // NOLINT
|
|
|
|
1922, 1924, 1926, 1931, 1933, 1936, 1938, 1942, // NOLINT
|
|
|
|
1944, 1946, 1948, 1950, 1952, 1954, 1956, 1958, // NOLINT
|
|
|
|
1960, 1073743786, 1965, 1073743792, 1969}; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kUppercaseTable7Size = 2;
|
|
|
|
static const int32_t kUppercaseTable7[2] = {
|
|
|
|
1073749793, 7994 }; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
bool Uppercase::Is(uchar c) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
|
|
|
case 0: return LookupPredicate(kUppercaseTable0,
|
|
|
|
kUppercaseTable0Size,
|
|
|
|
c);
|
|
|
|
case 1: return LookupPredicate(kUppercaseTable1,
|
|
|
|
kUppercaseTable1Size,
|
|
|
|
c);
|
2012-03-06 09:43:12 +00:00
|
|
|
case 5: return LookupPredicate(kUppercaseTable5,
|
|
|
|
kUppercaseTable5Size,
|
|
|
|
c);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 7: return LookupPredicate(kUppercaseTable7,
|
|
|
|
kUppercaseTable7Size,
|
|
|
|
c);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return false;
|
|
|
|
}
|
|
|
|
}
|
2017-06-14 20:32:49 +00:00
|
|
|
#endif // V8_INTL_SUPPORT
|
2013-07-05 09:52:11 +00:00
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
// Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
|
2017-06-14 20:32:49 +00:00
|
|
|
#ifdef V8_INTL_SUPPORT
|
|
|
|
bool Letter::Is(uchar c) { return static_cast<bool>(u_isalpha(c)); }
|
|
|
|
#else
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kLetterTable0Size = 431;
|
|
|
|
static const int32_t kLetterTable0[431] = {
|
|
|
|
1073741889, 90, 1073741921, 122,
|
|
|
|
170, 181, 186, 1073742016, // NOLINT
|
|
|
|
214, 1073742040, 246, 1073742072,
|
|
|
|
705, 1073742534, 721, 1073742560, // NOLINT
|
|
|
|
740, 748, 750, 1073742704,
|
|
|
|
884, 1073742710, 887, 1073742714, // NOLINT
|
|
|
|
893, 895, 902, 1073742728,
|
|
|
|
906, 908, 1073742734, 929, // NOLINT
|
|
|
|
1073742755, 1013, 1073742839, 1153,
|
|
|
|
1073742986, 1327, 1073743153, 1366, // NOLINT
|
|
|
|
1369, 1073743201, 1415, 1073743312,
|
|
|
|
1514, 1073743344, 1522, 1073743392, // NOLINT
|
|
|
|
1610, 1073743470, 1647, 1073743473,
|
|
|
|
1747, 1749, 1073743589, 1766, // NOLINT
|
|
|
|
1073743598, 1775, 1073743610, 1788,
|
|
|
|
1791, 1808, 1073743634, 1839, // NOLINT
|
|
|
|
1073743693, 1957, 1969, 1073743818,
|
|
|
|
2026, 1073743860, 2037, 2042, // NOLINT
|
|
|
|
1073743872, 2069, 2074, 2084,
|
|
|
|
2088, 1073743936, 2136, 1073744032, // NOLINT
|
|
|
|
2226, 1073744132, 2361, 2365,
|
|
|
|
2384, 1073744216, 2401, 1073744241, // NOLINT
|
|
|
|
2432, 1073744261, 2444, 1073744271,
|
|
|
|
2448, 1073744275, 2472, 1073744298, // NOLINT
|
|
|
|
2480, 2482, 1073744310, 2489,
|
|
|
|
2493, 2510, 1073744348, 2525, // NOLINT
|
|
|
|
1073744351, 2529, 1073744368, 2545,
|
|
|
|
1073744389, 2570, 1073744399, 2576, // NOLINT
|
|
|
|
1073744403, 2600, 1073744426, 2608,
|
|
|
|
1073744434, 2611, 1073744437, 2614, // NOLINT
|
|
|
|
1073744440, 2617, 1073744473, 2652,
|
|
|
|
2654, 1073744498, 2676, 1073744517, // NOLINT
|
|
|
|
2701, 1073744527, 2705, 1073744531,
|
|
|
|
2728, 1073744554, 2736, 1073744562, // NOLINT
|
|
|
|
2739, 1073744565, 2745, 2749,
|
|
|
|
2768, 1073744608, 2785, 1073744645, // NOLINT
|
|
|
|
2828, 1073744655, 2832, 1073744659,
|
|
|
|
2856, 1073744682, 2864, 1073744690, // NOLINT
|
|
|
|
2867, 1073744693, 2873, 2877,
|
|
|
|
1073744732, 2909, 1073744735, 2913, // NOLINT
|
|
|
|
2929, 2947, 1073744773, 2954,
|
|
|
|
1073744782, 2960, 1073744786, 2965, // NOLINT
|
|
|
|
1073744793, 2970, 2972, 1073744798,
|
|
|
|
2975, 1073744803, 2980, 1073744808, // NOLINT
|
|
|
|
2986, 1073744814, 3001, 3024,
|
|
|
|
1073744901, 3084, 1073744910, 3088, // NOLINT
|
|
|
|
1073744914, 3112, 1073744938, 3129,
|
|
|
|
3133, 1073744984, 3161, 1073744992, // NOLINT
|
|
|
|
3169, 1073745029, 3212, 1073745038,
|
|
|
|
3216, 1073745042, 3240, 1073745066, // NOLINT
|
|
|
|
3251, 1073745077, 3257, 3261,
|
|
|
|
3294, 1073745120, 3297, 1073745137, // NOLINT
|
|
|
|
3314, 1073745157, 3340, 1073745166,
|
|
|
|
3344, 1073745170, 3386, 3389, // NOLINT
|
|
|
|
3406, 1073745248, 3425, 1073745274,
|
|
|
|
3455, 1073745285, 3478, 1073745306, // NOLINT
|
|
|
|
3505, 1073745331, 3515, 3517,
|
|
|
|
1073745344, 3526, 1073745409, 3632, // NOLINT
|
|
|
|
1073745458, 3635, 1073745472, 3654,
|
|
|
|
1073745537, 3714, 3716, 1073745543, // NOLINT
|
|
|
|
3720, 3722, 3725, 1073745556,
|
|
|
|
3735, 1073745561, 3743, 1073745569, // NOLINT
|
|
|
|
3747, 3749, 3751, 1073745578,
|
|
|
|
3755, 1073745581, 3760, 1073745586, // NOLINT
|
|
|
|
3763, 3773, 1073745600, 3780,
|
|
|
|
3782, 1073745628, 3807, 3840, // NOLINT
|
|
|
|
1073745728, 3911, 1073745737, 3948,
|
|
|
|
1073745800, 3980, 1073745920, 4138, // NOLINT
|
|
|
|
4159, 1073746000, 4181, 1073746010,
|
|
|
|
4189, 4193, 1073746021, 4198, // NOLINT
|
|
|
|
1073746030, 4208, 1073746037, 4225,
|
|
|
|
4238, 1073746080, 4293, 4295, // NOLINT
|
|
|
|
4301, 1073746128, 4346, 1073746172,
|
|
|
|
4680, 1073746506, 4685, 1073746512, // NOLINT
|
|
|
|
4694, 4696, 1073746522, 4701,
|
|
|
|
1073746528, 4744, 1073746570, 4749, // NOLINT
|
|
|
|
1073746576, 4784, 1073746610, 4789,
|
|
|
|
1073746616, 4798, 4800, 1073746626, // NOLINT
|
|
|
|
4805, 1073746632, 4822, 1073746648,
|
|
|
|
4880, 1073746706, 4885, 1073746712, // NOLINT
|
|
|
|
4954, 1073746816, 5007, 1073746848,
|
|
|
|
5108, 1073746945, 5740, 1073747567, // NOLINT
|
|
|
|
5759, 1073747585, 5786, 1073747616,
|
|
|
|
5866, 1073747694, 5880, 1073747712, // NOLINT
|
|
|
|
5900, 1073747726, 5905, 1073747744,
|
|
|
|
5937, 1073747776, 5969, 1073747808, // NOLINT
|
|
|
|
5996, 1073747822, 6000, 1073747840,
|
|
|
|
6067, 6103, 6108, 1073748000, // NOLINT
|
|
|
|
6263, 1073748096, 6312, 6314,
|
|
|
|
1073748144, 6389, 1073748224, 6430, // NOLINT
|
|
|
|
1073748304, 6509, 1073748336, 6516,
|
|
|
|
1073748352, 6571, 1073748417, 6599, // NOLINT
|
|
|
|
1073748480, 6678, 1073748512, 6740,
|
|
|
|
6823, 1073748741, 6963, 1073748805, // NOLINT
|
|
|
|
6987, 1073748867, 7072, 1073748910,
|
|
|
|
7087, 1073748922, 7141, 1073748992, // NOLINT
|
|
|
|
7203, 1073749069, 7247, 1073749082,
|
|
|
|
7293, 1073749225, 7404, 1073749230, // NOLINT
|
|
|
|
7409, 1073749237, 7414, 1073749248,
|
|
|
|
7615, 1073749504, 7957, 1073749784, // NOLINT
|
|
|
|
7965, 1073749792, 8005, 1073749832,
|
|
|
|
8013, 1073749840, 8023, 8025, // NOLINT
|
|
|
|
8027, 8029, 1073749855, 8061,
|
|
|
|
1073749888, 8116, 1073749942, 8124, // NOLINT
|
|
|
|
8126, 1073749954, 8132, 1073749958,
|
|
|
|
8140, 1073749968, 8147, 1073749974, // NOLINT
|
|
|
|
8155, 1073749984, 8172, 1073750002,
|
|
|
|
8180, 1073750006, 8188}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kLetterTable1Size = 87;
|
|
|
|
static const int32_t kLetterTable1[87] = {
|
|
|
|
113, 127, 1073741968, 156, 258, 263, 1073742090, 275, // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
277, 1073742105, 285, 292, 294, 296, 1073742122, 301, // NOLINT
|
|
|
|
1073742127, 313, 1073742140, 319, 1073742149, 329, 334, 1073742176, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
392, 1073744896, 3118, 1073744944, 3166, 1073744992, 3300, 1073745131, // NOLINT
|
|
|
|
3310, 1073745138, 3315, 1073745152, 3365, 3367, 3373, 1073745200, // NOLINT
|
|
|
|
3431, 3439, 1073745280, 3478, 1073745312, 3494, 1073745320, 3502, // NOLINT
|
|
|
|
1073745328, 3510, 1073745336, 3518, 1073745344, 3526, 1073745352, 3534, // NOLINT
|
|
|
|
1073745360, 3542, 1073745368, 3550, 3631, 1073745925, 4103, 1073745953, // NOLINT
|
|
|
|
4137, 1073745969, 4149, 1073745976, 4156, 1073745985, 4246, 1073746077, // NOLINT
|
|
|
|
4255, 1073746081, 4346, 1073746172, 4351, 1073746181, 4397, 1073746225, // NOLINT
|
|
|
|
4494, 1073746336, 4538, 1073746416, 4607, 1073746944, 8191 }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kLetterTable2Size = 4;
|
|
|
|
static const int32_t kLetterTable2[4] = {
|
|
|
|
1073741824, 3509, 1073745408, 8191 }; // NOLINT
|
|
|
|
static const uint16_t kLetterTable3Size = 2;
|
|
|
|
static const int32_t kLetterTable3[2] = {
|
|
|
|
1073741824, 8191 }; // NOLINT
|
|
|
|
static const uint16_t kLetterTable4Size = 2;
|
|
|
|
static const int32_t kLetterTable4[2] = {
|
2012-03-06 09:43:12 +00:00
|
|
|
1073741824, 8140 }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kLetterTable5Size = 100;
|
|
|
|
static const int32_t kLetterTable5[100] = {
|
|
|
|
1073741824, 1164, 1073743056, 1277,
|
|
|
|
1073743104, 1548, 1073743376, 1567, // NOLINT
|
|
|
|
1073743402, 1579, 1073743424, 1646,
|
|
|
|
1073743487, 1693, 1073743520, 1775, // NOLINT
|
|
|
|
1073743639, 1823, 1073743650, 1928,
|
|
|
|
1073743755, 1934, 1073743760, 1965, // NOLINT
|
|
|
|
1073743792, 1969, 1073743863, 2049,
|
|
|
|
1073743875, 2053, 1073743879, 2058, // NOLINT
|
|
|
|
1073743884, 2082, 1073743936, 2163,
|
|
|
|
1073744002, 2227, 1073744114, 2295, // NOLINT
|
|
|
|
2299, 1073744138, 2341, 1073744176,
|
|
|
|
2374, 1073744224, 2428, 1073744260, // NOLINT
|
|
|
|
2482, 2511, 1073744352, 2532,
|
|
|
|
1073744358, 2543, 1073744378, 2558, // NOLINT
|
|
|
|
1073744384, 2600, 1073744448, 2626,
|
|
|
|
1073744452, 2635, 1073744480, 2678, // NOLINT
|
|
|
|
2682, 1073744510, 2735, 2737,
|
|
|
|
1073744565, 2742, 1073744569, 2749, // NOLINT
|
|
|
|
2752, 2754, 1073744603, 2781,
|
|
|
|
1073744608, 2794, 1073744626, 2804, // NOLINT
|
|
|
|
1073744641, 2822, 1073744649, 2830,
|
|
|
|
1073744657, 2838, 1073744672, 2854, // NOLINT
|
|
|
|
1073744680, 2862, 1073744688, 2906,
|
|
|
|
1073744732, 2911, 1073744740, 2917, // NOLINT
|
|
|
|
1073744832, 3042, 1073744896, 8191}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kLetterTable6Size = 6;
|
|
|
|
static const int32_t kLetterTable6[6] = {
|
|
|
|
1073741824, 6051, 1073747888, 6086, 1073747915, 6139 }; // NOLINT
|
|
|
|
static const uint16_t kLetterTable7Size = 48;
|
|
|
|
static const int32_t kLetterTable7[48] = {
|
|
|
|
1073748224, 6765, 1073748592, 6873, 1073748736, 6918, 1073748755, 6935, // NOLINT
|
|
|
|
6941, 1073748767, 6952, 1073748778, 6966, 1073748792, 6972, 6974, // NOLINT
|
|
|
|
1073748800, 6977, 1073748803, 6980, 1073748806, 7089, 1073748947, 7485, // NOLINT
|
|
|
|
1073749328, 7567, 1073749394, 7623, 1073749488, 7675, 1073749616, 7796, // NOLINT
|
|
|
|
1073749622, 7932, 1073749793, 7994, 1073749825, 8026, 1073749862, 8126, // NOLINT
|
|
|
|
1073749954, 8135, 1073749962, 8143, 1073749970, 8151, 1073749978, 8156 }; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
bool Letter::Is(uchar c) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
|
|
|
case 0: return LookupPredicate(kLetterTable0,
|
|
|
|
kLetterTable0Size,
|
|
|
|
c);
|
|
|
|
case 1: return LookupPredicate(kLetterTable1,
|
|
|
|
kLetterTable1Size,
|
|
|
|
c);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 2: return LookupPredicate(kLetterTable2,
|
|
|
|
kLetterTable2Size,
|
|
|
|
c);
|
|
|
|
case 3: return LookupPredicate(kLetterTable3,
|
|
|
|
kLetterTable3Size,
|
|
|
|
c);
|
|
|
|
case 4: return LookupPredicate(kLetterTable4,
|
|
|
|
kLetterTable4Size,
|
|
|
|
c);
|
|
|
|
case 5: return LookupPredicate(kLetterTable5,
|
|
|
|
kLetterTable5Size,
|
|
|
|
c);
|
|
|
|
case 6: return LookupPredicate(kLetterTable6,
|
|
|
|
kLetterTable6Size,
|
|
|
|
c);
|
|
|
|
case 7: return LookupPredicate(kLetterTable7,
|
|
|
|
kLetterTable7Size,
|
|
|
|
c);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return false;
|
|
|
|
}
|
|
|
|
}
|
2017-06-14 20:32:49 +00:00
|
|
|
#endif
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2017-06-14 20:32:49 +00:00
|
|
|
#ifndef V8_INTL_SUPPORT
|
2014-10-08 14:55:03 +00:00
|
|
|
// ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo',
|
|
|
|
// 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in
|
|
|
|
// point.properties) and ('Pattern_White_Space' not in point.properties)) or
|
|
|
|
// ('JS_ID_Start' in point.properties)
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kID_StartTable0Size = 434;
|
|
|
|
static const int32_t kID_StartTable0[434] = {
|
|
|
|
36, 1073741889, 90, 92,
|
|
|
|
95, 1073741921, 122, 170, // NOLINT
|
|
|
|
181, 186, 1073742016, 214,
|
|
|
|
1073742040, 246, 1073742072, 705, // NOLINT
|
|
|
|
1073742534, 721, 1073742560, 740,
|
|
|
|
748, 750, 1073742704, 884, // NOLINT
|
|
|
|
1073742710, 887, 1073742714, 893,
|
|
|
|
895, 902, 1073742728, 906, // NOLINT
|
|
|
|
908, 1073742734, 929, 1073742755,
|
|
|
|
1013, 1073742839, 1153, 1073742986, // NOLINT
|
|
|
|
1327, 1073743153, 1366, 1369,
|
|
|
|
1073743201, 1415, 1073743312, 1514, // NOLINT
|
|
|
|
1073743344, 1522, 1073743392, 1610,
|
|
|
|
1073743470, 1647, 1073743473, 1747, // NOLINT
|
|
|
|
1749, 1073743589, 1766, 1073743598,
|
|
|
|
1775, 1073743610, 1788, 1791, // NOLINT
|
|
|
|
1808, 1073743634, 1839, 1073743693,
|
|
|
|
1957, 1969, 1073743818, 2026, // NOLINT
|
|
|
|
1073743860, 2037, 2042, 1073743872,
|
|
|
|
2069, 2074, 2084, 2088, // NOLINT
|
|
|
|
1073743936, 2136, 1073744032, 2226,
|
|
|
|
1073744132, 2361, 2365, 2384, // NOLINT
|
|
|
|
1073744216, 2401, 1073744241, 2432,
|
|
|
|
1073744261, 2444, 1073744271, 2448, // NOLINT
|
|
|
|
1073744275, 2472, 1073744298, 2480,
|
|
|
|
2482, 1073744310, 2489, 2493, // NOLINT
|
|
|
|
2510, 1073744348, 2525, 1073744351,
|
|
|
|
2529, 1073744368, 2545, 1073744389, // NOLINT
|
|
|
|
2570, 1073744399, 2576, 1073744403,
|
|
|
|
2600, 1073744426, 2608, 1073744434, // NOLINT
|
|
|
|
2611, 1073744437, 2614, 1073744440,
|
|
|
|
2617, 1073744473, 2652, 2654, // NOLINT
|
|
|
|
1073744498, 2676, 1073744517, 2701,
|
|
|
|
1073744527, 2705, 1073744531, 2728, // NOLINT
|
|
|
|
1073744554, 2736, 1073744562, 2739,
|
|
|
|
1073744565, 2745, 2749, 2768, // NOLINT
|
|
|
|
1073744608, 2785, 1073744645, 2828,
|
|
|
|
1073744655, 2832, 1073744659, 2856, // NOLINT
|
|
|
|
1073744682, 2864, 1073744690, 2867,
|
|
|
|
1073744693, 2873, 2877, 1073744732, // NOLINT
|
|
|
|
2909, 1073744735, 2913, 2929,
|
|
|
|
2947, 1073744773, 2954, 1073744782, // NOLINT
|
|
|
|
2960, 1073744786, 2965, 1073744793,
|
|
|
|
2970, 2972, 1073744798, 2975, // NOLINT
|
|
|
|
1073744803, 2980, 1073744808, 2986,
|
|
|
|
1073744814, 3001, 3024, 1073744901, // NOLINT
|
|
|
|
3084, 1073744910, 3088, 1073744914,
|
|
|
|
3112, 1073744938, 3129, 3133, // NOLINT
|
|
|
|
1073744984, 3161, 1073744992, 3169,
|
|
|
|
1073745029, 3212, 1073745038, 3216, // NOLINT
|
|
|
|
1073745042, 3240, 1073745066, 3251,
|
|
|
|
1073745077, 3257, 3261, 3294, // NOLINT
|
|
|
|
1073745120, 3297, 1073745137, 3314,
|
|
|
|
1073745157, 3340, 1073745166, 3344, // NOLINT
|
|
|
|
1073745170, 3386, 3389, 3406,
|
|
|
|
1073745248, 3425, 1073745274, 3455, // NOLINT
|
|
|
|
1073745285, 3478, 1073745306, 3505,
|
|
|
|
1073745331, 3515, 3517, 1073745344, // NOLINT
|
|
|
|
3526, 1073745409, 3632, 1073745458,
|
|
|
|
3635, 1073745472, 3654, 1073745537, // NOLINT
|
|
|
|
3714, 3716, 1073745543, 3720,
|
|
|
|
3722, 3725, 1073745556, 3735, // NOLINT
|
|
|
|
1073745561, 3743, 1073745569, 3747,
|
|
|
|
3749, 3751, 1073745578, 3755, // NOLINT
|
|
|
|
1073745581, 3760, 1073745586, 3763,
|
|
|
|
3773, 1073745600, 3780, 3782, // NOLINT
|
|
|
|
1073745628, 3807, 3840, 1073745728,
|
|
|
|
3911, 1073745737, 3948, 1073745800, // NOLINT
|
|
|
|
3980, 1073745920, 4138, 4159,
|
|
|
|
1073746000, 4181, 1073746010, 4189, // NOLINT
|
|
|
|
4193, 1073746021, 4198, 1073746030,
|
|
|
|
4208, 1073746037, 4225, 4238, // NOLINT
|
|
|
|
1073746080, 4293, 4295, 4301,
|
|
|
|
1073746128, 4346, 1073746172, 4680, // NOLINT
|
|
|
|
1073746506, 4685, 1073746512, 4694,
|
|
|
|
4696, 1073746522, 4701, 1073746528, // NOLINT
|
|
|
|
4744, 1073746570, 4749, 1073746576,
|
|
|
|
4784, 1073746610, 4789, 1073746616, // NOLINT
|
|
|
|
4798, 4800, 1073746626, 4805,
|
|
|
|
1073746632, 4822, 1073746648, 4880, // NOLINT
|
|
|
|
1073746706, 4885, 1073746712, 4954,
|
|
|
|
1073746816, 5007, 1073746848, 5108, // NOLINT
|
|
|
|
1073746945, 5740, 1073747567, 5759,
|
|
|
|
1073747585, 5786, 1073747616, 5866, // NOLINT
|
|
|
|
1073747694, 5880, 1073747712, 5900,
|
|
|
|
1073747726, 5905, 1073747744, 5937, // NOLINT
|
|
|
|
1073747776, 5969, 1073747808, 5996,
|
|
|
|
1073747822, 6000, 1073747840, 6067, // NOLINT
|
|
|
|
6103, 6108, 1073748000, 6263,
|
|
|
|
1073748096, 6312, 6314, 1073748144, // NOLINT
|
|
|
|
6389, 1073748224, 6430, 1073748304,
|
|
|
|
6509, 1073748336, 6516, 1073748352, // NOLINT
|
|
|
|
6571, 1073748417, 6599, 1073748480,
|
|
|
|
6678, 1073748512, 6740, 6823, // NOLINT
|
|
|
|
1073748741, 6963, 1073748805, 6987,
|
|
|
|
1073748867, 7072, 1073748910, 7087, // NOLINT
|
|
|
|
1073748922, 7141, 1073748992, 7203,
|
|
|
|
1073749069, 7247, 1073749082, 7293, // NOLINT
|
|
|
|
1073749225, 7404, 1073749230, 7409,
|
|
|
|
1073749237, 7414, 1073749248, 7615, // NOLINT
|
|
|
|
1073749504, 7957, 1073749784, 7965,
|
|
|
|
1073749792, 8005, 1073749832, 8013, // NOLINT
|
|
|
|
1073749840, 8023, 8025, 8027,
|
|
|
|
8029, 1073749855, 8061, 1073749888, // NOLINT
|
|
|
|
8116, 1073749942, 8124, 8126,
|
|
|
|
1073749954, 8132, 1073749958, 8140, // NOLINT
|
|
|
|
1073749968, 8147, 1073749974, 8155,
|
|
|
|
1073749984, 8172, 1073750002, 8180, // NOLINT
|
|
|
|
1073750006, 8188}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable1Size = 84;
|
|
|
|
static const int32_t kID_StartTable1[84] = {
|
|
|
|
113, 127, 1073741968, 156,
|
|
|
|
258, 263, 1073742090, 275, // NOLINT
|
|
|
|
277, 1073742104, 285, 292,
|
|
|
|
294, 296, 1073742122, 313, // NOLINT
|
|
|
|
1073742140, 319, 1073742149, 329,
|
|
|
|
334, 1073742176, 392, 1073744896, // NOLINT
|
|
|
|
3118, 1073744944, 3166, 1073744992,
|
|
|
|
3300, 1073745131, 3310, 1073745138, // NOLINT
|
|
|
|
3315, 1073745152, 3365, 3367,
|
|
|
|
3373, 1073745200, 3431, 3439, // NOLINT
|
|
|
|
1073745280, 3478, 1073745312, 3494,
|
|
|
|
1073745320, 3502, 1073745328, 3510, // NOLINT
|
|
|
|
1073745336, 3518, 1073745344, 3526,
|
|
|
|
1073745352, 3534, 1073745360, 3542, // NOLINT
|
|
|
|
1073745368, 3550, 1073745925, 4103,
|
|
|
|
1073745953, 4137, 1073745969, 4149, // NOLINT
|
|
|
|
1073745976, 4156, 1073745985, 4246,
|
|
|
|
1073746075, 4255, 1073746081, 4346, // NOLINT
|
|
|
|
1073746172, 4351, 1073746181, 4397,
|
|
|
|
1073746225, 4494, 1073746336, 4538, // NOLINT
|
|
|
|
1073746416, 4607, 1073746944, 8191}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable2Size = 4;
|
|
|
|
static const int32_t kID_StartTable2[4] = {1073741824, 3509, 1073745408,
|
|
|
|
8191}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable3Size = 2;
|
|
|
|
static const int32_t kID_StartTable3[2] = {1073741824, 8191}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable4Size = 2;
|
|
|
|
static const int32_t kID_StartTable4[2] = {1073741824, 8140}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable5Size = 100;
|
|
|
|
static const int32_t kID_StartTable5[100] = {
|
|
|
|
1073741824, 1164, 1073743056, 1277,
|
|
|
|
1073743104, 1548, 1073743376, 1567, // NOLINT
|
|
|
|
1073743402, 1579, 1073743424, 1646,
|
|
|
|
1073743487, 1693, 1073743520, 1775, // NOLINT
|
|
|
|
1073743639, 1823, 1073743650, 1928,
|
|
|
|
1073743755, 1934, 1073743760, 1965, // NOLINT
|
|
|
|
1073743792, 1969, 1073743863, 2049,
|
|
|
|
1073743875, 2053, 1073743879, 2058, // NOLINT
|
|
|
|
1073743884, 2082, 1073743936, 2163,
|
|
|
|
1073744002, 2227, 1073744114, 2295, // NOLINT
|
|
|
|
2299, 1073744138, 2341, 1073744176,
|
|
|
|
2374, 1073744224, 2428, 1073744260, // NOLINT
|
|
|
|
2482, 2511, 1073744352, 2532,
|
|
|
|
1073744358, 2543, 1073744378, 2558, // NOLINT
|
|
|
|
1073744384, 2600, 1073744448, 2626,
|
|
|
|
1073744452, 2635, 1073744480, 2678, // NOLINT
|
|
|
|
2682, 1073744510, 2735, 2737,
|
|
|
|
1073744565, 2742, 1073744569, 2749, // NOLINT
|
|
|
|
2752, 2754, 1073744603, 2781,
|
|
|
|
1073744608, 2794, 1073744626, 2804, // NOLINT
|
|
|
|
1073744641, 2822, 1073744649, 2830,
|
|
|
|
1073744657, 2838, 1073744672, 2854, // NOLINT
|
|
|
|
1073744680, 2862, 1073744688, 2906,
|
|
|
|
1073744732, 2911, 1073744740, 2917, // NOLINT
|
|
|
|
1073744832, 3042, 1073744896, 8191}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable6Size = 6;
|
|
|
|
static const int32_t kID_StartTable6[6] = {1073741824, 6051, 1073747888, 6086,
|
|
|
|
1073747915, 6139}; // NOLINT
|
|
|
|
static const uint16_t kID_StartTable7Size = 48;
|
|
|
|
static const int32_t kID_StartTable7[48] = {
|
|
|
|
1073748224, 6765, 1073748592, 6873,
|
|
|
|
1073748736, 6918, 1073748755, 6935, // NOLINT
|
|
|
|
6941, 1073748767, 6952, 1073748778,
|
|
|
|
6966, 1073748792, 6972, 6974, // NOLINT
|
|
|
|
1073748800, 6977, 1073748803, 6980,
|
|
|
|
1073748806, 7089, 1073748947, 7485, // NOLINT
|
|
|
|
1073749328, 7567, 1073749394, 7623,
|
|
|
|
1073749488, 7675, 1073749616, 7796, // NOLINT
|
|
|
|
1073749622, 7932, 1073749793, 7994,
|
|
|
|
1073749825, 8026, 1073749862, 8126, // NOLINT
|
|
|
|
1073749954, 8135, 1073749962, 8143,
|
|
|
|
1073749970, 8151, 1073749978, 8156}; // NOLINT
|
|
|
|
bool ID_Start::Is(uchar c) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
2014-10-08 14:55:03 +00:00
|
|
|
case 0:
|
|
|
|
return LookupPredicate(kID_StartTable0, kID_StartTable0Size, c);
|
|
|
|
case 1:
|
|
|
|
return LookupPredicate(kID_StartTable1, kID_StartTable1Size, c);
|
|
|
|
case 2:
|
|
|
|
return LookupPredicate(kID_StartTable2, kID_StartTable2Size, c);
|
|
|
|
case 3:
|
|
|
|
return LookupPredicate(kID_StartTable3, kID_StartTable3Size, c);
|
|
|
|
case 4:
|
|
|
|
return LookupPredicate(kID_StartTable4, kID_StartTable4Size, c);
|
|
|
|
case 5:
|
|
|
|
return LookupPredicate(kID_StartTable5, kID_StartTable5Size, c);
|
|
|
|
case 6:
|
|
|
|
return LookupPredicate(kID_StartTable6, kID_StartTable6Size, c);
|
|
|
|
case 7:
|
|
|
|
return LookupPredicate(kID_StartTable7, kID_StartTable7Size, c);
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ID_Continue: point.category in ['Nd', 'Mn', 'Mc', 'Pc'] or
|
|
|
|
// 'Other_ID_Continue' in point.properties or 'JS_ID_Continue' in
|
|
|
|
// point.properties
|
|
|
|
|
|
|
|
static const uint16_t kID_ContinueTable0Size = 315;
|
|
|
|
static const int32_t kID_ContinueTable0[315] = {
|
|
|
|
1073741872, 57, 95, 183,
|
|
|
|
1073742592, 879, 903, 1073742979, // NOLINT
|
|
|
|
1159, 1073743249, 1469, 1471,
|
|
|
|
1073743297, 1474, 1073743300, 1477, // NOLINT
|
|
|
|
1479, 1073743376, 1562, 1073743435,
|
|
|
|
1641, 1648, 1073743574, 1756, // NOLINT
|
|
|
|
1073743583, 1764, 1073743591, 1768,
|
|
|
|
1073743594, 1773, 1073743600, 1785, // NOLINT
|
|
|
|
1809, 1073743664, 1866, 1073743782,
|
|
|
|
1968, 1073743808, 1993, 1073743851, // NOLINT
|
|
|
|
2035, 1073743894, 2073, 1073743899,
|
|
|
|
2083, 1073743909, 2087, 1073743913, // NOLINT
|
|
|
|
2093, 1073743961, 2139, 1073744100,
|
|
|
|
2307, 1073744186, 2364, 1073744190, // NOLINT
|
|
|
|
2383, 1073744209, 2391, 1073744226,
|
|
|
|
2403, 1073744230, 2415, 1073744257, // NOLINT
|
|
|
|
2435, 2492, 1073744318, 2500,
|
|
|
|
1073744327, 2504, 1073744331, 2509, // NOLINT
|
|
|
|
2519, 1073744354, 2531, 1073744358,
|
|
|
|
2543, 1073744385, 2563, 2620, // NOLINT
|
|
|
|
1073744446, 2626, 1073744455, 2632,
|
|
|
|
1073744459, 2637, 2641, 1073744486, // NOLINT
|
|
|
|
2673, 2677, 1073744513, 2691,
|
|
|
|
2748, 1073744574, 2757, 1073744583, // NOLINT
|
|
|
|
2761, 1073744587, 2765, 1073744610,
|
|
|
|
2787, 1073744614, 2799, 1073744641, // NOLINT
|
|
|
|
2819, 2876, 1073744702, 2884,
|
|
|
|
1073744711, 2888, 1073744715, 2893, // NOLINT
|
|
|
|
1073744726, 2903, 1073744738, 2915,
|
|
|
|
1073744742, 2927, 2946, 1073744830, // NOLINT
|
|
|
|
3010, 1073744838, 3016, 1073744842,
|
|
|
|
3021, 3031, 1073744870, 3055, // NOLINT
|
|
|
|
1073744896, 3075, 1073744958, 3140,
|
|
|
|
1073744966, 3144, 1073744970, 3149, // NOLINT
|
|
|
|
1073744981, 3158, 1073744994, 3171,
|
|
|
|
1073744998, 3183, 1073745025, 3203, // NOLINT
|
|
|
|
3260, 1073745086, 3268, 1073745094,
|
|
|
|
3272, 1073745098, 3277, 1073745109, // NOLINT
|
|
|
|
3286, 1073745122, 3299, 1073745126,
|
|
|
|
3311, 1073745153, 3331, 1073745214, // NOLINT
|
|
|
|
3396, 1073745222, 3400, 1073745226,
|
|
|
|
3405, 3415, 1073745250, 3427, // NOLINT
|
|
|
|
1073745254, 3439, 1073745282, 3459,
|
|
|
|
3530, 1073745359, 3540, 3542, // NOLINT
|
|
|
|
1073745368, 3551, 1073745382, 3567,
|
|
|
|
1073745394, 3571, 3633, 1073745460, // NOLINT
|
|
|
|
3642, 1073745479, 3662, 1073745488,
|
|
|
|
3673, 3761, 1073745588, 3769, // NOLINT
|
|
|
|
1073745595, 3772, 1073745608, 3789,
|
|
|
|
1073745616, 3801, 1073745688, 3865, // NOLINT
|
|
|
|
1073745696, 3881, 3893, 3895,
|
|
|
|
3897, 1073745726, 3903, 1073745777, // NOLINT
|
|
|
|
3972, 1073745798, 3975, 1073745805,
|
|
|
|
3991, 1073745817, 4028, 4038, // NOLINT
|
|
|
|
1073745963, 4158, 1073745984, 4169,
|
|
|
|
1073746006, 4185, 1073746014, 4192, // NOLINT
|
|
|
|
1073746018, 4196, 1073746023, 4205,
|
|
|
|
1073746033, 4212, 1073746050, 4237, // NOLINT
|
|
|
|
1073746063, 4253, 1073746781, 4959,
|
|
|
|
1073746793, 4977, 1073747730, 5908, // NOLINT
|
|
|
|
1073747762, 5940, 1073747794, 5971,
|
|
|
|
1073747826, 6003, 1073747892, 6099, // NOLINT
|
|
|
|
6109, 1073747936, 6121, 1073747979,
|
|
|
|
6157, 1073747984, 6169, 6313, // NOLINT
|
|
|
|
1073748256, 6443, 1073748272, 6459,
|
|
|
|
1073748294, 6479, 1073748400, 6592, // NOLINT
|
|
|
|
1073748424, 6601, 1073748432, 6618,
|
|
|
|
1073748503, 6683, 1073748565, 6750, // NOLINT
|
|
|
|
1073748576, 6780, 1073748607, 6793,
|
|
|
|
1073748624, 6809, 1073748656, 6845, // NOLINT
|
|
|
|
1073748736, 6916, 1073748788, 6980,
|
|
|
|
1073748816, 7001, 1073748843, 7027, // NOLINT
|
|
|
|
1073748864, 7042, 1073748897, 7085,
|
|
|
|
1073748912, 7097, 1073748966, 7155, // NOLINT
|
|
|
|
1073749028, 7223, 1073749056, 7241,
|
|
|
|
1073749072, 7257, 1073749200, 7378, // NOLINT
|
|
|
|
1073749204, 7400, 7405, 1073749234,
|
|
|
|
7412, 1073749240, 7417, 1073749440, // NOLINT
|
|
|
|
7669, 1073749500, 7679}; // NOLINT
|
|
|
|
static const uint16_t kID_ContinueTable1Size = 19;
|
|
|
|
static const int32_t kID_ContinueTable1[19] = {
|
|
|
|
1073741836, 13, 1073741887, 64,
|
|
|
|
84, 1073742032, 220, 225, // NOLINT
|
|
|
|
1073742053, 240, 1073745135, 3313,
|
|
|
|
3455, 1073745376, 3583, 1073745962, // NOLINT
|
|
|
|
4143, 1073746073, 4250}; // NOLINT
|
|
|
|
static const uint16_t kID_ContinueTable5Size = 63;
|
|
|
|
static const int32_t kID_ContinueTable5[63] = {
|
|
|
|
1073743392, 1577, 1647, 1073743476,
|
|
|
|
1661, 1695, 1073743600, 1777, // NOLINT
|
|
|
|
2050, 2054, 2059, 1073743907,
|
|
|
|
2087, 1073744000, 2177, 1073744052, // NOLINT
|
|
|
|
2244, 1073744080, 2265, 1073744096,
|
|
|
|
2289, 1073744128, 2313, 1073744166, // NOLINT
|
|
|
|
2349, 1073744199, 2387, 1073744256,
|
|
|
|
2435, 1073744307, 2496, 1073744336, // NOLINT
|
|
|
|
2521, 2533, 1073744368, 2553,
|
|
|
|
1073744425, 2614, 2627, 1073744460, // NOLINT
|
|
|
|
2637, 1073744464, 2649, 1073744507,
|
|
|
|
2685, 2736, 1073744562, 2740, // NOLINT
|
|
|
|
1073744567, 2744, 1073744574, 2751,
|
|
|
|
2753, 1073744619, 2799, 1073744629, // NOLINT
|
|
|
|
2806, 1073744867, 3050, 1073744876,
|
|
|
|
3053, 1073744880, 3065}; // NOLINT
|
|
|
|
static const uint16_t kID_ContinueTable7Size = 12;
|
|
|
|
static const int32_t kID_ContinueTable7[12] = {
|
|
|
|
6942, 1073749504, 7695, 1073749536,
|
|
|
|
7725, 1073749555, 7732, 1073749581, // NOLINT
|
|
|
|
7759, 1073749776, 7961, 7999}; // NOLINT
|
|
|
|
bool ID_Continue::Is(uchar c) {
|
|
|
|
int chunk_index = c >> 13;
|
|
|
|
switch (chunk_index) {
|
|
|
|
case 0:
|
|
|
|
return LookupPredicate(kID_ContinueTable0, kID_ContinueTable0Size, c);
|
|
|
|
case 1:
|
|
|
|
return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c);
|
|
|
|
case 5:
|
|
|
|
return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c);
|
|
|
|
case 7:
|
|
|
|
return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
// WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in
|
|
|
|
// point.properties)
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2017-02-28 13:42:29 +00:00
|
|
|
static const uint16_t kWhiteSpaceTable0Size = 6;
|
|
|
|
static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12,
|
|
|
|
32, 160, 5760}; // NOLINT
|
2014-02-10 12:43:10 +00:00
|
|
|
static const uint16_t kWhiteSpaceTable1Size = 5;
|
|
|
|
static const int32_t kWhiteSpaceTable1[5] = {
|
|
|
|
1073741824, 10, 47, 95, 4096 }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kWhiteSpaceTable7Size = 1;
|
|
|
|
static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
bool WhiteSpace::Is(uchar c) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
|
|
|
case 0: return LookupPredicate(kWhiteSpaceTable0,
|
|
|
|
kWhiteSpaceTable0Size,
|
|
|
|
c);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 1: return LookupPredicate(kWhiteSpaceTable1,
|
|
|
|
kWhiteSpaceTable1Size,
|
|
|
|
c);
|
2014-10-08 14:55:03 +00:00
|
|
|
case 7:
|
|
|
|
return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return false;
|
|
|
|
}
|
|
|
|
}
|
2017-06-14 20:32:49 +00:00
|
|
|
#endif // !V8_INTL_SUPPORT
|
2013-07-05 09:52:11 +00:00
|
|
|
|
2017-06-29 03:01:13 +00:00
|
|
|
#ifndef V8_INTL_SUPPORT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{105, 775}}, {{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kToLowercaseTable0Size = 488; // NOLINT
|
|
|
|
static const int32_t kToLowercaseTable0[976] = {
|
|
|
|
1073741889, 128, 90, 128, 1073742016, 128,
|
|
|
|
214, 128, 1073742040, 128, 222, 128,
|
|
|
|
256, 4, 258, 4, // NOLINT
|
|
|
|
260, 4, 262, 4, 264, 4,
|
|
|
|
266, 4, 268, 4, 270, 4,
|
|
|
|
272, 4, 274, 4, // NOLINT
|
|
|
|
276, 4, 278, 4, 280, 4,
|
|
|
|
282, 4, 284, 4, 286, 4,
|
|
|
|
288, 4, 290, 4, // NOLINT
|
|
|
|
292, 4, 294, 4, 296, 4,
|
|
|
|
298, 4, 300, 4, 302, 4,
|
|
|
|
304, 1, 306, 4, // NOLINT
|
|
|
|
308, 4, 310, 4, 313, 4,
|
|
|
|
315, 4, 317, 4, 319, 4,
|
|
|
|
321, 4, 323, 4, // NOLINT
|
|
|
|
325, 4, 327, 4, 330, 4,
|
|
|
|
332, 4, 334, 4, 336, 4,
|
|
|
|
338, 4, 340, 4, // NOLINT
|
|
|
|
342, 4, 344, 4, 346, 4,
|
|
|
|
348, 4, 350, 4, 352, 4,
|
|
|
|
354, 4, 356, 4, // NOLINT
|
|
|
|
358, 4, 360, 4, 362, 4,
|
|
|
|
364, 4, 366, 4, 368, 4,
|
|
|
|
370, 4, 372, 4, // NOLINT
|
|
|
|
374, 4, 376, -484, 377, 4,
|
|
|
|
379, 4, 381, 4, 385, 840,
|
|
|
|
386, 4, 388, 4, // NOLINT
|
|
|
|
390, 824, 391, 4, 1073742217, 820,
|
|
|
|
394, 820, 395, 4, 398, 316,
|
|
|
|
399, 808, 400, 812, // NOLINT
|
|
|
|
401, 4, 403, 820, 404, 828,
|
|
|
|
406, 844, 407, 836, 408, 4,
|
|
|
|
412, 844, 413, 852, // NOLINT
|
|
|
|
415, 856, 416, 4, 418, 4,
|
|
|
|
420, 4, 422, 872, 423, 4,
|
|
|
|
425, 872, 428, 4, // NOLINT
|
|
|
|
430, 872, 431, 4, 1073742257, 868,
|
|
|
|
434, 868, 435, 4, 437, 4,
|
|
|
|
439, 876, 440, 4, // NOLINT
|
|
|
|
444, 4, 452, 8, 453, 4,
|
|
|
|
455, 8, 456, 4, 458, 8,
|
|
|
|
459, 4, 461, 4, // NOLINT
|
|
|
|
463, 4, 465, 4, 467, 4,
|
|
|
|
469, 4, 471, 4, 473, 4,
|
|
|
|
475, 4, 478, 4, // NOLINT
|
|
|
|
480, 4, 482, 4, 484, 4,
|
|
|
|
486, 4, 488, 4, 490, 4,
|
|
|
|
492, 4, 494, 4, // NOLINT
|
|
|
|
497, 8, 498, 4, 500, 4,
|
|
|
|
502, -388, 503, -224, 504, 4,
|
|
|
|
506, 4, 508, 4, // NOLINT
|
|
|
|
510, 4, 512, 4, 514, 4,
|
|
|
|
516, 4, 518, 4, 520, 4,
|
|
|
|
522, 4, 524, 4, // NOLINT
|
|
|
|
526, 4, 528, 4, 530, 4,
|
|
|
|
532, 4, 534, 4, 536, 4,
|
|
|
|
538, 4, 540, 4, // NOLINT
|
|
|
|
542, 4, 544, -520, 546, 4,
|
|
|
|
548, 4, 550, 4, 552, 4,
|
|
|
|
554, 4, 556, 4, // NOLINT
|
|
|
|
558, 4, 560, 4, 562, 4,
|
|
|
|
570, 43180, 571, 4, 573, -652,
|
|
|
|
574, 43168, 577, 4, // NOLINT
|
|
|
|
579, -780, 580, 276, 581, 284,
|
|
|
|
582, 4, 584, 4, 586, 4,
|
|
|
|
588, 4, 590, 4, // NOLINT
|
|
|
|
880, 4, 882, 4, 886, 4,
|
|
|
|
895, 464, 902, 152, 1073742728, 148,
|
|
|
|
906, 148, 908, 256, // NOLINT
|
|
|
|
1073742734, 252, 911, 252, 1073742737, 128,
|
|
|
|
929, 128, 931, 6, 1073742756, 128,
|
|
|
|
939, 128, 975, 32, // NOLINT
|
|
|
|
984, 4, 986, 4, 988, 4,
|
|
|
|
990, 4, 992, 4, 994, 4,
|
|
|
|
996, 4, 998, 4, // NOLINT
|
|
|
|
1000, 4, 1002, 4, 1004, 4,
|
|
|
|
1006, 4, 1012, -240, 1015, 4,
|
|
|
|
1017, -28, 1018, 4, // NOLINT
|
|
|
|
1073742845, -520, 1023, -520, 1073742848, 320,
|
|
|
|
1039, 320, 1073742864, 128, 1071, 128,
|
|
|
|
1120, 4, 1122, 4, // NOLINT
|
|
|
|
1124, 4, 1126, 4, 1128, 4,
|
|
|
|
1130, 4, 1132, 4, 1134, 4,
|
|
|
|
1136, 4, 1138, 4, // NOLINT
|
|
|
|
1140, 4, 1142, 4, 1144, 4,
|
|
|
|
1146, 4, 1148, 4, 1150, 4,
|
|
|
|
1152, 4, 1162, 4, // NOLINT
|
|
|
|
1164, 4, 1166, 4, 1168, 4,
|
|
|
|
1170, 4, 1172, 4, 1174, 4,
|
|
|
|
1176, 4, 1178, 4, // NOLINT
|
|
|
|
1180, 4, 1182, 4, 1184, 4,
|
|
|
|
1186, 4, 1188, 4, 1190, 4,
|
|
|
|
1192, 4, 1194, 4, // NOLINT
|
|
|
|
1196, 4, 1198, 4, 1200, 4,
|
|
|
|
1202, 4, 1204, 4, 1206, 4,
|
|
|
|
1208, 4, 1210, 4, // NOLINT
|
|
|
|
1212, 4, 1214, 4, 1216, 60,
|
|
|
|
1217, 4, 1219, 4, 1221, 4,
|
|
|
|
1223, 4, 1225, 4, // NOLINT
|
|
|
|
1227, 4, 1229, 4, 1232, 4,
|
|
|
|
1234, 4, 1236, 4, 1238, 4,
|
|
|
|
1240, 4, 1242, 4, // NOLINT
|
|
|
|
1244, 4, 1246, 4, 1248, 4,
|
|
|
|
1250, 4, 1252, 4, 1254, 4,
|
|
|
|
1256, 4, 1258, 4, // NOLINT
|
|
|
|
1260, 4, 1262, 4, 1264, 4,
|
|
|
|
1266, 4, 1268, 4, 1270, 4,
|
|
|
|
1272, 4, 1274, 4, // NOLINT
|
|
|
|
1276, 4, 1278, 4, 1280, 4,
|
|
|
|
1282, 4, 1284, 4, 1286, 4,
|
|
|
|
1288, 4, 1290, 4, // NOLINT
|
|
|
|
1292, 4, 1294, 4, 1296, 4,
|
|
|
|
1298, 4, 1300, 4, 1302, 4,
|
|
|
|
1304, 4, 1306, 4, // NOLINT
|
|
|
|
1308, 4, 1310, 4, 1312, 4,
|
|
|
|
1314, 4, 1316, 4, 1318, 4,
|
|
|
|
1320, 4, 1322, 4, // NOLINT
|
|
|
|
1324, 4, 1326, 4, 1073743153, 192,
|
|
|
|
1366, 192, 1073746080, 29056, 4293, 29056,
|
|
|
|
4295, 29056, 4301, 29056, // NOLINT
|
|
|
|
7680, 4, 7682, 4, 7684, 4,
|
|
|
|
7686, 4, 7688, 4, 7690, 4,
|
|
|
|
7692, 4, 7694, 4, // NOLINT
|
|
|
|
7696, 4, 7698, 4, 7700, 4,
|
|
|
|
7702, 4, 7704, 4, 7706, 4,
|
|
|
|
7708, 4, 7710, 4, // NOLINT
|
|
|
|
7712, 4, 7714, 4, 7716, 4,
|
|
|
|
7718, 4, 7720, 4, 7722, 4,
|
|
|
|
7724, 4, 7726, 4, // NOLINT
|
|
|
|
7728, 4, 7730, 4, 7732, 4,
|
|
|
|
7734, 4, 7736, 4, 7738, 4,
|
|
|
|
7740, 4, 7742, 4, // NOLINT
|
|
|
|
7744, 4, 7746, 4, 7748, 4,
|
|
|
|
7750, 4, 7752, 4, 7754, 4,
|
|
|
|
7756, 4, 7758, 4, // NOLINT
|
|
|
|
7760, 4, 7762, 4, 7764, 4,
|
|
|
|
7766, 4, 7768, 4, 7770, 4,
|
|
|
|
7772, 4, 7774, 4, // NOLINT
|
|
|
|
7776, 4, 7778, 4, 7780, 4,
|
|
|
|
7782, 4, 7784, 4, 7786, 4,
|
|
|
|
7788, 4, 7790, 4, // NOLINT
|
|
|
|
7792, 4, 7794, 4, 7796, 4,
|
|
|
|
7798, 4, 7800, 4, 7802, 4,
|
|
|
|
7804, 4, 7806, 4, // NOLINT
|
|
|
|
7808, 4, 7810, 4, 7812, 4,
|
|
|
|
7814, 4, 7816, 4, 7818, 4,
|
|
|
|
7820, 4, 7822, 4, // NOLINT
|
|
|
|
7824, 4, 7826, 4, 7828, 4,
|
|
|
|
7838, -30460, 7840, 4, 7842, 4,
|
|
|
|
7844, 4, 7846, 4, // NOLINT
|
|
|
|
7848, 4, 7850, 4, 7852, 4,
|
|
|
|
7854, 4, 7856, 4, 7858, 4,
|
|
|
|
7860, 4, 7862, 4, // NOLINT
|
|
|
|
7864, 4, 7866, 4, 7868, 4,
|
|
|
|
7870, 4, 7872, 4, 7874, 4,
|
|
|
|
7876, 4, 7878, 4, // NOLINT
|
|
|
|
7880, 4, 7882, 4, 7884, 4,
|
|
|
|
7886, 4, 7888, 4, 7890, 4,
|
|
|
|
7892, 4, 7894, 4, // NOLINT
|
|
|
|
7896, 4, 7898, 4, 7900, 4,
|
|
|
|
7902, 4, 7904, 4, 7906, 4,
|
|
|
|
7908, 4, 7910, 4, // NOLINT
|
|
|
|
7912, 4, 7914, 4, 7916, 4,
|
|
|
|
7918, 4, 7920, 4, 7922, 4,
|
|
|
|
7924, 4, 7926, 4, // NOLINT
|
|
|
|
7928, 4, 7930, 4, 7932, 4,
|
|
|
|
7934, 4, 1073749768, -32, 7951, -32,
|
|
|
|
1073749784, -32, 7965, -32, // NOLINT
|
|
|
|
1073749800, -32, 7983, -32, 1073749816, -32,
|
|
|
|
7999, -32, 1073749832, -32, 8013, -32,
|
|
|
|
8025, -32, 8027, -32, // NOLINT
|
|
|
|
8029, -32, 8031, -32, 1073749864, -32,
|
|
|
|
8047, -32, 1073749896, -32, 8079, -32,
|
|
|
|
1073749912, -32, 8095, -32, // NOLINT
|
|
|
|
1073749928, -32, 8111, -32, 1073749944, -32,
|
|
|
|
8121, -32, 1073749946, -296, 8123, -296,
|
|
|
|
8124, -36, 1073749960, -344, // NOLINT
|
|
|
|
8139, -344, 8140, -36, 1073749976, -32,
|
|
|
|
8153, -32, 1073749978, -400, 8155, -400,
|
|
|
|
1073749992, -32, 8169, -32, // NOLINT
|
|
|
|
1073749994, -448, 8171, -448, 8172, -28,
|
|
|
|
1073750008, -512, 8185, -512, 1073750010, -504,
|
|
|
|
8187, -504, 8188, -36}; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToLowercaseMultiStrings0Size = 2; // NOLINT
|
|
|
|
static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings1[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kToLowercaseTable1Size = 79; // NOLINT
|
|
|
|
static const int32_t kToLowercaseTable1[158] = {
|
2010-07-30 12:59:57 +00:00
|
|
|
294, -30068, 298, -33532, 299, -33048, 306, 112, 1073742176, 64, 367, 64, 387, 4, 1073743030, 104, // NOLINT
|
|
|
|
1231, 104, 1073744896, 192, 3118, 192, 3168, 4, 3170, -42972, 3171, -15256, 3172, -42908, 3175, 4, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
3177, 4, 3179, 4, 3181, -43120, 3182, -42996, 3183, -43132, 3184, -43128, 3186, 4, 3189, 4, // NOLINT
|
|
|
|
1073745022, -43260, 3199, -43260, 3200, 4, 3202, 4, 3204, 4, 3206, 4, 3208, 4, 3210, 4, // NOLINT
|
|
|
|
3212, 4, 3214, 4, 3216, 4, 3218, 4, 3220, 4, 3222, 4, 3224, 4, 3226, 4, // NOLINT
|
|
|
|
3228, 4, 3230, 4, 3232, 4, 3234, 4, 3236, 4, 3238, 4, 3240, 4, 3242, 4, // NOLINT
|
|
|
|
3244, 4, 3246, 4, 3248, 4, 3250, 4, 3252, 4, 3254, 4, 3256, 4, 3258, 4, // NOLINT
|
|
|
|
3260, 4, 3262, 4, 3264, 4, 3266, 4, 3268, 4, 3270, 4, 3272, 4, 3274, 4, // NOLINT
|
|
|
|
3276, 4, 3278, 4, 3280, 4, 3282, 4, 3284, 4, 3286, 4, 3288, 4, 3290, 4, // NOLINT
|
|
|
|
3292, 4, 3294, 4, 3296, 4, 3298, 4, 3307, 4, 3309, 4, 3314, 4 }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToLowercaseMultiStrings1Size = 1; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings5[1] = { // NOLINT
|
|
|
|
{{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kToLowercaseTable5Size = 103; // NOLINT
|
|
|
|
static const int32_t kToLowercaseTable5[206] = {
|
|
|
|
1600, 4, 1602, 4, 1604, 4, 1606, 4,
|
|
|
|
1608, 4, 1610, 4, 1612, 4, 1614, 4, // NOLINT
|
|
|
|
1616, 4, 1618, 4, 1620, 4, 1622, 4,
|
|
|
|
1624, 4, 1626, 4, 1628, 4, 1630, 4, // NOLINT
|
|
|
|
1632, 4, 1634, 4, 1636, 4, 1638, 4,
|
|
|
|
1640, 4, 1642, 4, 1644, 4, 1664, 4, // NOLINT
|
|
|
|
1666, 4, 1668, 4, 1670, 4, 1672, 4,
|
|
|
|
1674, 4, 1676, 4, 1678, 4, 1680, 4, // NOLINT
|
|
|
|
1682, 4, 1684, 4, 1686, 4, 1688, 4,
|
|
|
|
1690, 4, 1826, 4, 1828, 4, 1830, 4, // NOLINT
|
|
|
|
1832, 4, 1834, 4, 1836, 4, 1838, 4,
|
|
|
|
1842, 4, 1844, 4, 1846, 4, 1848, 4, // NOLINT
|
|
|
|
1850, 4, 1852, 4, 1854, 4, 1856, 4,
|
|
|
|
1858, 4, 1860, 4, 1862, 4, 1864, 4, // NOLINT
|
|
|
|
1866, 4, 1868, 4, 1870, 4, 1872, 4,
|
|
|
|
1874, 4, 1876, 4, 1878, 4, 1880, 4, // NOLINT
|
|
|
|
1882, 4, 1884, 4, 1886, 4, 1888, 4,
|
|
|
|
1890, 4, 1892, 4, 1894, 4, 1896, 4, // NOLINT
|
|
|
|
1898, 4, 1900, 4, 1902, 4, 1913, 4,
|
|
|
|
1915, 4, 1917, -141328, 1918, 4, 1920, 4, // NOLINT
|
|
|
|
1922, 4, 1924, 4, 1926, 4, 1931, 4,
|
|
|
|
1933, -169120, 1936, 4, 1938, 4, 1942, 4, // NOLINT
|
|
|
|
1944, 4, 1946, 4, 1948, 4, 1950, 4,
|
|
|
|
1952, 4, 1954, 4, 1956, 4, 1958, 4, // NOLINT
|
|
|
|
1960, 4, 1962, -169232, 1963, -169276, 1964, -169260,
|
|
|
|
1965, -169220, 1968, -169032, 1969, -169128}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kToLowercaseMultiStrings5Size = 1; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kToLowercaseMultiStrings7[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToLowercaseTable7Size = 2; // NOLINT
|
|
|
|
static const int32_t kToLowercaseTable7[4] = {
|
|
|
|
1073749793, 128, 7994, 128 }; // NOLINT
|
|
|
|
static const uint16_t kToLowercaseMultiStrings7Size = 1; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
int ToLowercase::Convert(uchar c,
|
|
|
|
uchar n,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
2010-07-30 12:59:57 +00:00
|
|
|
case 0: return LookupMapping<true>(kToLowercaseTable0,
|
|
|
|
kToLowercaseTable0Size,
|
|
|
|
kToLowercaseMultiStrings0,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 1: return LookupMapping<true>(kToLowercaseTable1,
|
|
|
|
kToLowercaseTable1Size,
|
|
|
|
kToLowercaseMultiStrings1,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2012-03-06 09:43:12 +00:00
|
|
|
case 5: return LookupMapping<true>(kToLowercaseTable5,
|
|
|
|
kToLowercaseTable5Size,
|
|
|
|
kToLowercaseMultiStrings5,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 7: return LookupMapping<true>(kToLowercaseTable7,
|
|
|
|
kToLowercaseTable7Size,
|
|
|
|
kToLowercaseMultiStrings7,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<3> kToUppercaseMultiStrings0[62] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{83, 83, kSentinel}}, {{700, 78, kSentinel}}, {{74, 780, kSentinel}}, {{921, 776, 769}}, // NOLINT
|
|
|
|
{{933, 776, 769}}, {{1333, 1362, kSentinel}}, {{72, 817, kSentinel}}, {{84, 776, kSentinel}}, // NOLINT
|
|
|
|
{{87, 778, kSentinel}}, {{89, 778, kSentinel}}, {{65, 702, kSentinel}}, {{933, 787, kSentinel}}, // NOLINT
|
|
|
|
{{933, 787, 768}}, {{933, 787, 769}}, {{933, 787, 834}}, {{7944, 921, kSentinel}}, // NOLINT
|
|
|
|
{{7945, 921, kSentinel}}, {{7946, 921, kSentinel}}, {{7947, 921, kSentinel}}, {{7948, 921, kSentinel}}, // NOLINT
|
|
|
|
{{7949, 921, kSentinel}}, {{7950, 921, kSentinel}}, {{7951, 921, kSentinel}}, {{7976, 921, kSentinel}}, // NOLINT
|
|
|
|
{{7977, 921, kSentinel}}, {{7978, 921, kSentinel}}, {{7979, 921, kSentinel}}, {{7980, 921, kSentinel}}, // NOLINT
|
|
|
|
{{7981, 921, kSentinel}}, {{7982, 921, kSentinel}}, {{7983, 921, kSentinel}}, {{8040, 921, kSentinel}}, // NOLINT
|
|
|
|
{{8041, 921, kSentinel}}, {{8042, 921, kSentinel}}, {{8043, 921, kSentinel}}, {{8044, 921, kSentinel}}, // NOLINT
|
|
|
|
{{8045, 921, kSentinel}}, {{8046, 921, kSentinel}}, {{8047, 921, kSentinel}}, {{8122, 921, kSentinel}}, // NOLINT
|
|
|
|
{{913, 921, kSentinel}}, {{902, 921, kSentinel}}, {{913, 834, kSentinel}}, {{913, 834, 921}}, // NOLINT
|
|
|
|
{{8138, 921, kSentinel}}, {{919, 921, kSentinel}}, {{905, 921, kSentinel}}, {{919, 834, kSentinel}}, // NOLINT
|
|
|
|
{{919, 834, 921}}, {{921, 776, 768}}, {{921, 834, kSentinel}}, {{921, 776, 834}}, // NOLINT
|
|
|
|
{{933, 776, 768}}, {{929, 787, kSentinel}}, {{933, 834, kSentinel}}, {{933, 776, 834}}, // NOLINT
|
|
|
|
{{8186, 921, kSentinel}}, {{937, 921, kSentinel}}, {{911, 921, kSentinel}}, {{937, 834, kSentinel}}, // NOLINT
|
|
|
|
{{937, 834, 921}}, {{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kToUppercaseTable0Size = 590; // NOLINT
|
|
|
|
static const int32_t kToUppercaseTable0[1180] = {
|
|
|
|
1073741921, -128, 122, -128, 181, 2972,
|
|
|
|
223, 1, 1073742048, -128, 246, -128,
|
|
|
|
1073742072, -128, 254, -128, // NOLINT
|
|
|
|
255, 484, 257, -4, 259, -4,
|
|
|
|
261, -4, 263, -4, 265, -4,
|
|
|
|
267, -4, 269, -4, // NOLINT
|
|
|
|
271, -4, 273, -4, 275, -4,
|
|
|
|
277, -4, 279, -4, 281, -4,
|
|
|
|
283, -4, 285, -4, // NOLINT
|
|
|
|
287, -4, 289, -4, 291, -4,
|
|
|
|
293, -4, 295, -4, 297, -4,
|
|
|
|
299, -4, 301, -4, // NOLINT
|
|
|
|
303, -4, 305, -928, 307, -4,
|
|
|
|
309, -4, 311, -4, 314, -4,
|
|
|
|
316, -4, 318, -4, // NOLINT
|
|
|
|
320, -4, 322, -4, 324, -4,
|
|
|
|
326, -4, 328, -4, 329, 5,
|
|
|
|
331, -4, 333, -4, // NOLINT
|
|
|
|
335, -4, 337, -4, 339, -4,
|
|
|
|
341, -4, 343, -4, 345, -4,
|
|
|
|
347, -4, 349, -4, // NOLINT
|
|
|
|
351, -4, 353, -4, 355, -4,
|
|
|
|
357, -4, 359, -4, 361, -4,
|
|
|
|
363, -4, 365, -4, // NOLINT
|
|
|
|
367, -4, 369, -4, 371, -4,
|
|
|
|
373, -4, 375, -4, 378, -4,
|
|
|
|
380, -4, 382, -4, // NOLINT
|
|
|
|
383, -1200, 384, 780, 387, -4,
|
|
|
|
389, -4, 392, -4, 396, -4,
|
|
|
|
402, -4, 405, 388, // NOLINT
|
|
|
|
409, -4, 410, 652, 414, 520,
|
|
|
|
417, -4, 419, -4, 421, -4,
|
|
|
|
424, -4, 429, -4, // NOLINT
|
|
|
|
432, -4, 436, -4, 438, -4,
|
|
|
|
441, -4, 445, -4, 447, 224,
|
|
|
|
453, -4, 454, -8, // NOLINT
|
|
|
|
456, -4, 457, -8, 459, -4,
|
|
|
|
460, -8, 462, -4, 464, -4,
|
|
|
|
466, -4, 468, -4, // NOLINT
|
|
|
|
470, -4, 472, -4, 474, -4,
|
|
|
|
476, -4, 477, -316, 479, -4,
|
|
|
|
481, -4, 483, -4, // NOLINT
|
|
|
|
485, -4, 487, -4, 489, -4,
|
|
|
|
491, -4, 493, -4, 495, -4,
|
|
|
|
496, 9, 498, -4, // NOLINT
|
|
|
|
499, -8, 501, -4, 505, -4,
|
|
|
|
507, -4, 509, -4, 511, -4,
|
|
|
|
513, -4, 515, -4, // NOLINT
|
|
|
|
517, -4, 519, -4, 521, -4,
|
|
|
|
523, -4, 525, -4, 527, -4,
|
|
|
|
529, -4, 531, -4, // NOLINT
|
|
|
|
533, -4, 535, -4, 537, -4,
|
|
|
|
539, -4, 541, -4, 543, -4,
|
|
|
|
547, -4, 549, -4, // NOLINT
|
|
|
|
551, -4, 553, -4, 555, -4,
|
|
|
|
557, -4, 559, -4, 561, -4,
|
|
|
|
563, -4, 572, -4, // NOLINT
|
|
|
|
1073742399, 43260, 576, 43260, 578, -4,
|
|
|
|
583, -4, 585, -4, 587, -4,
|
|
|
|
589, -4, 591, -4, // NOLINT
|
|
|
|
592, 43132, 593, 43120, 594, 43128,
|
|
|
|
595, -840, 596, -824, 1073742422, -820,
|
|
|
|
599, -820, 601, -808, // NOLINT
|
|
|
|
603, -812, 604, 169276, 608, -820,
|
|
|
|
609, 169260, 611, -828, 613, 169120,
|
|
|
|
614, 169232, 616, -836, // NOLINT
|
|
|
|
617, -844, 619, 42972, 620, 169220,
|
|
|
|
623, -844, 625, 42996, 626, -852,
|
|
|
|
629, -856, 637, 42908, // NOLINT
|
|
|
|
640, -872, 643, -872, 647, 169128,
|
|
|
|
648, -872, 649, -276, 1073742474, -868,
|
|
|
|
651, -868, 652, -284, // NOLINT
|
|
|
|
658, -876, 670, 169032, 837, 336,
|
|
|
|
881, -4, 883, -4, 887, -4,
|
|
|
|
1073742715, 520, 893, 520, // NOLINT
|
|
|
|
912, 13, 940, -152, 1073742765, -148,
|
|
|
|
943, -148, 944, 17, 1073742769, -128,
|
|
|
|
961, -128, 962, -124, // NOLINT
|
|
|
|
1073742787, -128, 971, -128, 972, -256,
|
|
|
|
1073742797, -252, 974, -252, 976, -248,
|
|
|
|
977, -228, 981, -188, // NOLINT
|
|
|
|
982, -216, 983, -32, 985, -4,
|
|
|
|
987, -4, 989, -4, 991, -4,
|
|
|
|
993, -4, 995, -4, // NOLINT
|
|
|
|
997, -4, 999, -4, 1001, -4,
|
|
|
|
1003, -4, 1005, -4, 1007, -4,
|
|
|
|
1008, -344, 1009, -320, // NOLINT
|
|
|
|
1010, 28, 1011, -464, 1013, -384,
|
|
|
|
1016, -4, 1019, -4, 1073742896, -128,
|
|
|
|
1103, -128, 1073742928, -320, // NOLINT
|
|
|
|
1119, -320, 1121, -4, 1123, -4,
|
|
|
|
1125, -4, 1127, -4, 1129, -4,
|
|
|
|
1131, -4, 1133, -4, // NOLINT
|
|
|
|
1135, -4, 1137, -4, 1139, -4,
|
|
|
|
1141, -4, 1143, -4, 1145, -4,
|
|
|
|
1147, -4, 1149, -4, // NOLINT
|
|
|
|
1151, -4, 1153, -4, 1163, -4,
|
|
|
|
1165, -4, 1167, -4, 1169, -4,
|
|
|
|
1171, -4, 1173, -4, // NOLINT
|
|
|
|
1175, -4, 1177, -4, 1179, -4,
|
|
|
|
1181, -4, 1183, -4, 1185, -4,
|
|
|
|
1187, -4, 1189, -4, // NOLINT
|
|
|
|
1191, -4, 1193, -4, 1195, -4,
|
|
|
|
1197, -4, 1199, -4, 1201, -4,
|
|
|
|
1203, -4, 1205, -4, // NOLINT
|
|
|
|
1207, -4, 1209, -4, 1211, -4,
|
|
|
|
1213, -4, 1215, -4, 1218, -4,
|
|
|
|
1220, -4, 1222, -4, // NOLINT
|
|
|
|
1224, -4, 1226, -4, 1228, -4,
|
|
|
|
1230, -4, 1231, -60, 1233, -4,
|
|
|
|
1235, -4, 1237, -4, // NOLINT
|
|
|
|
1239, -4, 1241, -4, 1243, -4,
|
|
|
|
1245, -4, 1247, -4, 1249, -4,
|
|
|
|
1251, -4, 1253, -4, // NOLINT
|
|
|
|
1255, -4, 1257, -4, 1259, -4,
|
|
|
|
1261, -4, 1263, -4, 1265, -4,
|
|
|
|
1267, -4, 1269, -4, // NOLINT
|
|
|
|
1271, -4, 1273, -4, 1275, -4,
|
|
|
|
1277, -4, 1279, -4, 1281, -4,
|
|
|
|
1283, -4, 1285, -4, // NOLINT
|
|
|
|
1287, -4, 1289, -4, 1291, -4,
|
|
|
|
1293, -4, 1295, -4, 1297, -4,
|
|
|
|
1299, -4, 1301, -4, // NOLINT
|
|
|
|
1303, -4, 1305, -4, 1307, -4,
|
|
|
|
1309, -4, 1311, -4, 1313, -4,
|
|
|
|
1315, -4, 1317, -4, // NOLINT
|
|
|
|
1319, -4, 1321, -4, 1323, -4,
|
|
|
|
1325, -4, 1327, -4, 1073743201, -192,
|
|
|
|
1414, -192, 1415, 21, // NOLINT
|
|
|
|
7545, 141328, 7549, 15256, 7681, -4,
|
|
|
|
7683, -4, 7685, -4, 7687, -4,
|
|
|
|
7689, -4, 7691, -4, // NOLINT
|
|
|
|
7693, -4, 7695, -4, 7697, -4,
|
|
|
|
7699, -4, 7701, -4, 7703, -4,
|
|
|
|
7705, -4, 7707, -4, // NOLINT
|
|
|
|
7709, -4, 7711, -4, 7713, -4,
|
|
|
|
7715, -4, 7717, -4, 7719, -4,
|
|
|
|
7721, -4, 7723, -4, // NOLINT
|
|
|
|
7725, -4, 7727, -4, 7729, -4,
|
|
|
|
7731, -4, 7733, -4, 7735, -4,
|
|
|
|
7737, -4, 7739, -4, // NOLINT
|
|
|
|
7741, -4, 7743, -4, 7745, -4,
|
|
|
|
7747, -4, 7749, -4, 7751, -4,
|
|
|
|
7753, -4, 7755, -4, // NOLINT
|
|
|
|
7757, -4, 7759, -4, 7761, -4,
|
|
|
|
7763, -4, 7765, -4, 7767, -4,
|
|
|
|
7769, -4, 7771, -4, // NOLINT
|
|
|
|
7773, -4, 7775, -4, 7777, -4,
|
|
|
|
7779, -4, 7781, -4, 7783, -4,
|
|
|
|
7785, -4, 7787, -4, // NOLINT
|
|
|
|
7789, -4, 7791, -4, 7793, -4,
|
|
|
|
7795, -4, 7797, -4, 7799, -4,
|
|
|
|
7801, -4, 7803, -4, // NOLINT
|
|
|
|
7805, -4, 7807, -4, 7809, -4,
|
|
|
|
7811, -4, 7813, -4, 7815, -4,
|
|
|
|
7817, -4, 7819, -4, // NOLINT
|
|
|
|
7821, -4, 7823, -4, 7825, -4,
|
|
|
|
7827, -4, 7829, -4, 7830, 25,
|
|
|
|
7831, 29, 7832, 33, // NOLINT
|
|
|
|
7833, 37, 7834, 41, 7835, -236,
|
|
|
|
7841, -4, 7843, -4, 7845, -4,
|
|
|
|
7847, -4, 7849, -4, // NOLINT
|
|
|
|
7851, -4, 7853, -4, 7855, -4,
|
|
|
|
7857, -4, 7859, -4, 7861, -4,
|
|
|
|
7863, -4, 7865, -4, // NOLINT
|
|
|
|
7867, -4, 7869, -4, 7871, -4,
|
|
|
|
7873, -4, 7875, -4, 7877, -4,
|
|
|
|
7879, -4, 7881, -4, // NOLINT
|
|
|
|
7883, -4, 7885, -4, 7887, -4,
|
|
|
|
7889, -4, 7891, -4, 7893, -4,
|
|
|
|
7895, -4, 7897, -4, // NOLINT
|
|
|
|
7899, -4, 7901, -4, 7903, -4,
|
|
|
|
7905, -4, 7907, -4, 7909, -4,
|
|
|
|
7911, -4, 7913, -4, // NOLINT
|
|
|
|
7915, -4, 7917, -4, 7919, -4,
|
|
|
|
7921, -4, 7923, -4, 7925, -4,
|
|
|
|
7927, -4, 7929, -4, // NOLINT
|
|
|
|
7931, -4, 7933, -4, 7935, -4,
|
|
|
|
1073749760, 32, 7943, 32, 1073749776, 32,
|
|
|
|
7957, 32, 1073749792, 32, // NOLINT
|
|
|
|
7975, 32, 1073749808, 32, 7991, 32,
|
|
|
|
1073749824, 32, 8005, 32, 8016, 45,
|
|
|
|
8017, 32, 8018, 49, // NOLINT
|
|
|
|
8019, 32, 8020, 53, 8021, 32,
|
|
|
|
8022, 57, 8023, 32, 1073749856, 32,
|
|
|
|
8039, 32, 1073749872, 296, // NOLINT
|
|
|
|
8049, 296, 1073749874, 344, 8053, 344,
|
|
|
|
1073749878, 400, 8055, 400, 1073749880, 512,
|
|
|
|
8057, 512, 1073749882, 448, // NOLINT
|
|
|
|
8059, 448, 1073749884, 504, 8061, 504,
|
|
|
|
8064, 61, 8065, 65, 8066, 69,
|
|
|
|
8067, 73, 8068, 77, // NOLINT
|
|
|
|
8069, 81, 8070, 85, 8071, 89,
|
|
|
|
8072, 61, 8073, 65, 8074, 69,
|
|
|
|
8075, 73, 8076, 77, // NOLINT
|
|
|
|
8077, 81, 8078, 85, 8079, 89,
|
|
|
|
8080, 93, 8081, 97, 8082, 101,
|
|
|
|
8083, 105, 8084, 109, // NOLINT
|
|
|
|
8085, 113, 8086, 117, 8087, 121,
|
|
|
|
8088, 93, 8089, 97, 8090, 101,
|
|
|
|
8091, 105, 8092, 109, // NOLINT
|
|
|
|
8093, 113, 8094, 117, 8095, 121,
|
|
|
|
8096, 125, 8097, 129, 8098, 133,
|
|
|
|
8099, 137, 8100, 141, // NOLINT
|
|
|
|
8101, 145, 8102, 149, 8103, 153,
|
|
|
|
8104, 125, 8105, 129, 8106, 133,
|
|
|
|
8107, 137, 8108, 141, // NOLINT
|
|
|
|
8109, 145, 8110, 149, 8111, 153,
|
|
|
|
1073749936, 32, 8113, 32, 8114, 157,
|
|
|
|
8115, 161, 8116, 165, // NOLINT
|
|
|
|
8118, 169, 8119, 173, 8124, 161,
|
|
|
|
8126, -28820, 8130, 177, 8131, 181,
|
|
|
|
8132, 185, 8134, 189, // NOLINT
|
|
|
|
8135, 193, 8140, 181, 1073749968, 32,
|
|
|
|
8145, 32, 8146, 197, 8147, 13,
|
|
|
|
8150, 201, 8151, 205, // NOLINT
|
|
|
|
1073749984, 32, 8161, 32, 8162, 209,
|
|
|
|
8163, 17, 8164, 213, 8165, 28,
|
|
|
|
8166, 217, 8167, 221, // NOLINT
|
|
|
|
8178, 225, 8179, 229, 8180, 233,
|
|
|
|
8182, 237, 8183, 241, 8188, 229}; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToUppercaseMultiStrings0Size = 62; // NOLINT
|
|
|
|
static const MultiCharacterSpecialCase<1> kToUppercaseMultiStrings1[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kToUppercaseTable1Size = 73; // NOLINT
|
|
|
|
static const int32_t kToUppercaseTable1[146] = {
|
2010-07-30 12:59:57 +00:00
|
|
|
334, -112, 1073742192, -64, 383, -64, 388, -4, 1073743056, -104, 1257, -104, 1073744944, -192, 3166, -192, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
3169, -4, 3173, -43180, 3174, -43168, 3176, -4, 3178, -4, 3180, -4, 3187, -4, 3190, -4, // NOLINT
|
|
|
|
3201, -4, 3203, -4, 3205, -4, 3207, -4, 3209, -4, 3211, -4, 3213, -4, 3215, -4, // NOLINT
|
|
|
|
3217, -4, 3219, -4, 3221, -4, 3223, -4, 3225, -4, 3227, -4, 3229, -4, 3231, -4, // NOLINT
|
|
|
|
3233, -4, 3235, -4, 3237, -4, 3239, -4, 3241, -4, 3243, -4, 3245, -4, 3247, -4, // NOLINT
|
|
|
|
3249, -4, 3251, -4, 3253, -4, 3255, -4, 3257, -4, 3259, -4, 3261, -4, 3263, -4, // NOLINT
|
|
|
|
3265, -4, 3267, -4, 3269, -4, 3271, -4, 3273, -4, 3275, -4, 3277, -4, 3279, -4, // NOLINT
|
|
|
|
3281, -4, 3283, -4, 3285, -4, 3287, -4, 3289, -4, 3291, -4, 3293, -4, 3295, -4, // NOLINT
|
|
|
|
3297, -4, 3299, -4, 3308, -4, 3310, -4, 3315, -4, 1073745152, -29056, 3365, -29056, 3367, -29056, // NOLINT
|
|
|
|
3373, -29056 }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToUppercaseMultiStrings1Size = 1; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kToUppercaseMultiStrings5[1] = { // NOLINT
|
|
|
|
{{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kToUppercaseTable5Size = 95; // NOLINT
|
|
|
|
static const int32_t
|
|
|
|
kToUppercaseTable5[190] = {1601, -4, 1603, -4, 1605, -4, 1607, -4, 1609, -4,
|
|
|
|
1611, -4, 1613, -4, 1615, -4, // NOLINT
|
|
|
|
1617, -4, 1619, -4, 1621, -4, 1623, -4, 1625, -4,
|
|
|
|
1627, -4, 1629, -4, 1631, -4, // NOLINT
|
|
|
|
1633, -4, 1635, -4, 1637, -4, 1639, -4, 1641, -4,
|
|
|
|
1643, -4, 1645, -4, 1665, -4, // NOLINT
|
|
|
|
1667, -4, 1669, -4, 1671, -4, 1673, -4, 1675, -4,
|
|
|
|
1677, -4, 1679, -4, 1681, -4, // NOLINT
|
|
|
|
1683, -4, 1685, -4, 1687, -4, 1689, -4, 1691, -4,
|
|
|
|
1827, -4, 1829, -4, 1831, -4, // NOLINT
|
|
|
|
1833, -4, 1835, -4, 1837, -4, 1839, -4, 1843, -4,
|
|
|
|
1845, -4, 1847, -4, 1849, -4, // NOLINT
|
|
|
|
1851, -4, 1853, -4, 1855, -4, 1857, -4, 1859, -4,
|
|
|
|
1861, -4, 1863, -4, 1865, -4, // NOLINT
|
|
|
|
1867, -4, 1869, -4, 1871, -4, 1873, -4, 1875, -4,
|
|
|
|
1877, -4, 1879, -4, 1881, -4, // NOLINT
|
|
|
|
1883, -4, 1885, -4, 1887, -4, 1889, -4, 1891, -4,
|
|
|
|
1893, -4, 1895, -4, 1897, -4, // NOLINT
|
|
|
|
1899, -4, 1901, -4, 1903, -4, 1914, -4, 1916, -4,
|
|
|
|
1919, -4, 1921, -4, 1923, -4, // NOLINT
|
|
|
|
1925, -4, 1927, -4, 1932, -4, 1937, -4, 1939, -4,
|
|
|
|
1943, -4, 1945, -4, 1947, -4, // NOLINT
|
|
|
|
1949, -4, 1951, -4, 1953, -4, 1955, -4, 1957, -4,
|
|
|
|
1959, -4, 1961, -4}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kToUppercaseMultiStrings5Size = 1; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<3> kToUppercaseMultiStrings7[12] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{70, 70, kSentinel}}, {{70, 73, kSentinel}}, {{70, 76, kSentinel}}, {{70, 70, 73}}, // NOLINT
|
|
|
|
{{70, 70, 76}}, {{83, 84, kSentinel}}, {{1348, 1350, kSentinel}}, {{1348, 1333, kSentinel}}, // NOLINT
|
|
|
|
{{1348, 1339, kSentinel}}, {{1358, 1350, kSentinel}}, {{1348, 1341, kSentinel}}, {{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kToUppercaseTable7Size = 14; // NOLINT
|
|
|
|
static const int32_t kToUppercaseTable7[28] = {
|
|
|
|
6912, 1, 6913, 5, 6914, 9, 6915, 13, 6916, 17, 6917, 21, 6918, 21, 6931, 25, // NOLINT
|
|
|
|
6932, 29, 6933, 33, 6934, 37, 6935, 41, 1073749825, -128, 8026, -128 }; // NOLINT
|
|
|
|
static const uint16_t kToUppercaseMultiStrings7Size = 12; // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
int ToUppercase::Convert(uchar c,
|
|
|
|
uchar n,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-07-03 15:10:15 +00:00
|
|
|
switch (chunk_index) {
|
2010-07-30 12:59:57 +00:00
|
|
|
case 0: return LookupMapping<true>(kToUppercaseTable0,
|
|
|
|
kToUppercaseTable0Size,
|
|
|
|
kToUppercaseMultiStrings0,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 1: return LookupMapping<true>(kToUppercaseTable1,
|
|
|
|
kToUppercaseTable1Size,
|
|
|
|
kToUppercaseMultiStrings1,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2012-03-06 09:43:12 +00:00
|
|
|
case 5: return LookupMapping<true>(kToUppercaseTable5,
|
|
|
|
kToUppercaseTable5Size,
|
|
|
|
kToUppercaseMultiStrings5,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 7: return LookupMapping<true>(kToUppercaseTable7,
|
|
|
|
kToUppercaseTable7Size,
|
|
|
|
kToUppercaseMultiStrings7,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2008-07-03 15:10:15 +00:00
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
2017-06-29 03:01:13 +00:00
|
|
|
#endif // !V8_INTL_SUPPORT
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings0[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeTable0Size = 498; // NOLINT
|
|
|
|
static const int32_t kEcma262CanonicalizeTable0[996] = {
|
|
|
|
1073741921, -128, 122, -128, 181, 2972,
|
|
|
|
1073742048, -128, 246, -128, 1073742072, -128,
|
|
|
|
254, -128, 255, 484, // NOLINT
|
|
|
|
257, -4, 259, -4, 261, -4,
|
|
|
|
263, -4, 265, -4, 267, -4,
|
|
|
|
269, -4, 271, -4, // NOLINT
|
|
|
|
273, -4, 275, -4, 277, -4,
|
|
|
|
279, -4, 281, -4, 283, -4,
|
|
|
|
285, -4, 287, -4, // NOLINT
|
|
|
|
289, -4, 291, -4, 293, -4,
|
|
|
|
295, -4, 297, -4, 299, -4,
|
|
|
|
301, -4, 303, -4, // NOLINT
|
|
|
|
307, -4, 309, -4, 311, -4,
|
|
|
|
314, -4, 316, -4, 318, -4,
|
|
|
|
320, -4, 322, -4, // NOLINT
|
|
|
|
324, -4, 326, -4, 328, -4,
|
|
|
|
331, -4, 333, -4, 335, -4,
|
|
|
|
337, -4, 339, -4, // NOLINT
|
|
|
|
341, -4, 343, -4, 345, -4,
|
|
|
|
347, -4, 349, -4, 351, -4,
|
|
|
|
353, -4, 355, -4, // NOLINT
|
|
|
|
357, -4, 359, -4, 361, -4,
|
|
|
|
363, -4, 365, -4, 367, -4,
|
|
|
|
369, -4, 371, -4, // NOLINT
|
|
|
|
373, -4, 375, -4, 378, -4,
|
|
|
|
380, -4, 382, -4, 384, 780,
|
|
|
|
387, -4, 389, -4, // NOLINT
|
|
|
|
392, -4, 396, -4, 402, -4,
|
|
|
|
405, 388, 409, -4, 410, 652,
|
|
|
|
414, 520, 417, -4, // NOLINT
|
|
|
|
419, -4, 421, -4, 424, -4,
|
|
|
|
429, -4, 432, -4, 436, -4,
|
|
|
|
438, -4, 441, -4, // NOLINT
|
|
|
|
445, -4, 447, 224, 453, -4,
|
|
|
|
454, -8, 456, -4, 457, -8,
|
|
|
|
459, -4, 460, -8, // NOLINT
|
|
|
|
462, -4, 464, -4, 466, -4,
|
|
|
|
468, -4, 470, -4, 472, -4,
|
|
|
|
474, -4, 476, -4, // NOLINT
|
|
|
|
477, -316, 479, -4, 481, -4,
|
|
|
|
483, -4, 485, -4, 487, -4,
|
|
|
|
489, -4, 491, -4, // NOLINT
|
|
|
|
493, -4, 495, -4, 498, -4,
|
|
|
|
499, -8, 501, -4, 505, -4,
|
|
|
|
507, -4, 509, -4, // NOLINT
|
|
|
|
511, -4, 513, -4, 515, -4,
|
|
|
|
517, -4, 519, -4, 521, -4,
|
|
|
|
523, -4, 525, -4, // NOLINT
|
|
|
|
527, -4, 529, -4, 531, -4,
|
|
|
|
533, -4, 535, -4, 537, -4,
|
|
|
|
539, -4, 541, -4, // NOLINT
|
|
|
|
543, -4, 547, -4, 549, -4,
|
|
|
|
551, -4, 553, -4, 555, -4,
|
|
|
|
557, -4, 559, -4, // NOLINT
|
|
|
|
561, -4, 563, -4, 572, -4,
|
|
|
|
1073742399, 43260, 576, 43260, 578, -4,
|
|
|
|
583, -4, 585, -4, // NOLINT
|
|
|
|
587, -4, 589, -4, 591, -4,
|
|
|
|
592, 43132, 593, 43120, 594, 43128,
|
|
|
|
595, -840, 596, -824, // NOLINT
|
|
|
|
1073742422, -820, 599, -820, 601, -808,
|
|
|
|
603, -812, 604, 169276, 608, -820,
|
|
|
|
609, 169260, 611, -828, // NOLINT
|
|
|
|
613, 169120, 614, 169232, 616, -836,
|
|
|
|
617, -844, 619, 42972, 620, 169220,
|
|
|
|
623, -844, 625, 42996, // NOLINT
|
|
|
|
626, -852, 629, -856, 637, 42908,
|
|
|
|
640, -872, 643, -872, 647, 169128,
|
|
|
|
648, -872, 649, -276, // NOLINT
|
|
|
|
1073742474, -868, 651, -868, 652, -284,
|
|
|
|
658, -876, 670, 169032, 837, 336,
|
|
|
|
881, -4, 883, -4, // NOLINT
|
|
|
|
887, -4, 1073742715, 520, 893, 520,
|
|
|
|
940, -152, 1073742765, -148, 943, -148,
|
|
|
|
1073742769, -128, 961, -128, // NOLINT
|
|
|
|
962, -124, 1073742787, -128, 971, -128,
|
|
|
|
972, -256, 1073742797, -252, 974, -252,
|
|
|
|
976, -248, 977, -228, // NOLINT
|
|
|
|
981, -188, 982, -216, 983, -32,
|
|
|
|
985, -4, 987, -4, 989, -4,
|
|
|
|
991, -4, 993, -4, // NOLINT
|
|
|
|
995, -4, 997, -4, 999, -4,
|
|
|
|
1001, -4, 1003, -4, 1005, -4,
|
|
|
|
1007, -4, 1008, -344, // NOLINT
|
|
|
|
1009, -320, 1010, 28, 1011, -464,
|
|
|
|
1013, -384, 1016, -4, 1019, -4,
|
|
|
|
1073742896, -128, 1103, -128, // NOLINT
|
|
|
|
1073742928, -320, 1119, -320, 1121, -4,
|
|
|
|
1123, -4, 1125, -4, 1127, -4,
|
|
|
|
1129, -4, 1131, -4, // NOLINT
|
|
|
|
1133, -4, 1135, -4, 1137, -4,
|
|
|
|
1139, -4, 1141, -4, 1143, -4,
|
|
|
|
1145, -4, 1147, -4, // NOLINT
|
|
|
|
1149, -4, 1151, -4, 1153, -4,
|
|
|
|
1163, -4, 1165, -4, 1167, -4,
|
|
|
|
1169, -4, 1171, -4, // NOLINT
|
|
|
|
1173, -4, 1175, -4, 1177, -4,
|
|
|
|
1179, -4, 1181, -4, 1183, -4,
|
|
|
|
1185, -4, 1187, -4, // NOLINT
|
|
|
|
1189, -4, 1191, -4, 1193, -4,
|
|
|
|
1195, -4, 1197, -4, 1199, -4,
|
|
|
|
1201, -4, 1203, -4, // NOLINT
|
|
|
|
1205, -4, 1207, -4, 1209, -4,
|
|
|
|
1211, -4, 1213, -4, 1215, -4,
|
|
|
|
1218, -4, 1220, -4, // NOLINT
|
|
|
|
1222, -4, 1224, -4, 1226, -4,
|
|
|
|
1228, -4, 1230, -4, 1231, -60,
|
|
|
|
1233, -4, 1235, -4, // NOLINT
|
|
|
|
1237, -4, 1239, -4, 1241, -4,
|
|
|
|
1243, -4, 1245, -4, 1247, -4,
|
|
|
|
1249, -4, 1251, -4, // NOLINT
|
|
|
|
1253, -4, 1255, -4, 1257, -4,
|
|
|
|
1259, -4, 1261, -4, 1263, -4,
|
|
|
|
1265, -4, 1267, -4, // NOLINT
|
|
|
|
1269, -4, 1271, -4, 1273, -4,
|
|
|
|
1275, -4, 1277, -4, 1279, -4,
|
|
|
|
1281, -4, 1283, -4, // NOLINT
|
|
|
|
1285, -4, 1287, -4, 1289, -4,
|
|
|
|
1291, -4, 1293, -4, 1295, -4,
|
|
|
|
1297, -4, 1299, -4, // NOLINT
|
|
|
|
1301, -4, 1303, -4, 1305, -4,
|
|
|
|
1307, -4, 1309, -4, 1311, -4,
|
|
|
|
1313, -4, 1315, -4, // NOLINT
|
|
|
|
1317, -4, 1319, -4, 1321, -4,
|
|
|
|
1323, -4, 1325, -4, 1327, -4,
|
|
|
|
1073743201, -192, 1414, -192, // NOLINT
|
|
|
|
7545, 141328, 7549, 15256, 7681, -4,
|
|
|
|
7683, -4, 7685, -4, 7687, -4,
|
|
|
|
7689, -4, 7691, -4, // NOLINT
|
|
|
|
7693, -4, 7695, -4, 7697, -4,
|
|
|
|
7699, -4, 7701, -4, 7703, -4,
|
|
|
|
7705, -4, 7707, -4, // NOLINT
|
|
|
|
7709, -4, 7711, -4, 7713, -4,
|
|
|
|
7715, -4, 7717, -4, 7719, -4,
|
|
|
|
7721, -4, 7723, -4, // NOLINT
|
|
|
|
7725, -4, 7727, -4, 7729, -4,
|
|
|
|
7731, -4, 7733, -4, 7735, -4,
|
|
|
|
7737, -4, 7739, -4, // NOLINT
|
|
|
|
7741, -4, 7743, -4, 7745, -4,
|
|
|
|
7747, -4, 7749, -4, 7751, -4,
|
|
|
|
7753, -4, 7755, -4, // NOLINT
|
|
|
|
7757, -4, 7759, -4, 7761, -4,
|
|
|
|
7763, -4, 7765, -4, 7767, -4,
|
|
|
|
7769, -4, 7771, -4, // NOLINT
|
|
|
|
7773, -4, 7775, -4, 7777, -4,
|
|
|
|
7779, -4, 7781, -4, 7783, -4,
|
|
|
|
7785, -4, 7787, -4, // NOLINT
|
|
|
|
7789, -4, 7791, -4, 7793, -4,
|
|
|
|
7795, -4, 7797, -4, 7799, -4,
|
|
|
|
7801, -4, 7803, -4, // NOLINT
|
|
|
|
7805, -4, 7807, -4, 7809, -4,
|
|
|
|
7811, -4, 7813, -4, 7815, -4,
|
|
|
|
7817, -4, 7819, -4, // NOLINT
|
|
|
|
7821, -4, 7823, -4, 7825, -4,
|
|
|
|
7827, -4, 7829, -4, 7835, -236,
|
|
|
|
7841, -4, 7843, -4, // NOLINT
|
|
|
|
7845, -4, 7847, -4, 7849, -4,
|
|
|
|
7851, -4, 7853, -4, 7855, -4,
|
|
|
|
7857, -4, 7859, -4, // NOLINT
|
|
|
|
7861, -4, 7863, -4, 7865, -4,
|
|
|
|
7867, -4, 7869, -4, 7871, -4,
|
|
|
|
7873, -4, 7875, -4, // NOLINT
|
|
|
|
7877, -4, 7879, -4, 7881, -4,
|
|
|
|
7883, -4, 7885, -4, 7887, -4,
|
|
|
|
7889, -4, 7891, -4, // NOLINT
|
|
|
|
7893, -4, 7895, -4, 7897, -4,
|
|
|
|
7899, -4, 7901, -4, 7903, -4,
|
|
|
|
7905, -4, 7907, -4, // NOLINT
|
|
|
|
7909, -4, 7911, -4, 7913, -4,
|
|
|
|
7915, -4, 7917, -4, 7919, -4,
|
|
|
|
7921, -4, 7923, -4, // NOLINT
|
|
|
|
7925, -4, 7927, -4, 7929, -4,
|
|
|
|
7931, -4, 7933, -4, 7935, -4,
|
|
|
|
1073749760, 32, 7943, 32, // NOLINT
|
|
|
|
1073749776, 32, 7957, 32, 1073749792, 32,
|
|
|
|
7975, 32, 1073749808, 32, 7991, 32,
|
|
|
|
1073749824, 32, 8005, 32, // NOLINT
|
|
|
|
8017, 32, 8019, 32, 8021, 32,
|
|
|
|
8023, 32, 1073749856, 32, 8039, 32,
|
|
|
|
1073749872, 296, 8049, 296, // NOLINT
|
|
|
|
1073749874, 344, 8053, 344, 1073749878, 400,
|
|
|
|
8055, 400, 1073749880, 512, 8057, 512,
|
|
|
|
1073749882, 448, 8059, 448, // NOLINT
|
|
|
|
1073749884, 504, 8061, 504, 1073749936, 32,
|
|
|
|
8113, 32, 8126, -28820, 1073749968, 32,
|
|
|
|
8145, 32, 1073749984, 32, // NOLINT
|
|
|
|
8161, 32, 8165, 28}; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeMultiStrings0Size = 1; // NOLINT
|
|
|
|
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings1[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeTable1Size = 73; // NOLINT
|
|
|
|
static const int32_t kEcma262CanonicalizeTable1[146] = {
|
2010-07-30 12:59:57 +00:00
|
|
|
334, -112, 1073742192, -64, 383, -64, 388, -4, 1073743056, -104, 1257, -104, 1073744944, -192, 3166, -192, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
3169, -4, 3173, -43180, 3174, -43168, 3176, -4, 3178, -4, 3180, -4, 3187, -4, 3190, -4, // NOLINT
|
|
|
|
3201, -4, 3203, -4, 3205, -4, 3207, -4, 3209, -4, 3211, -4, 3213, -4, 3215, -4, // NOLINT
|
|
|
|
3217, -4, 3219, -4, 3221, -4, 3223, -4, 3225, -4, 3227, -4, 3229, -4, 3231, -4, // NOLINT
|
|
|
|
3233, -4, 3235, -4, 3237, -4, 3239, -4, 3241, -4, 3243, -4, 3245, -4, 3247, -4, // NOLINT
|
|
|
|
3249, -4, 3251, -4, 3253, -4, 3255, -4, 3257, -4, 3259, -4, 3261, -4, 3263, -4, // NOLINT
|
|
|
|
3265, -4, 3267, -4, 3269, -4, 3271, -4, 3273, -4, 3275, -4, 3277, -4, 3279, -4, // NOLINT
|
|
|
|
3281, -4, 3283, -4, 3285, -4, 3287, -4, 3289, -4, 3291, -4, 3293, -4, 3295, -4, // NOLINT
|
|
|
|
3297, -4, 3299, -4, 3308, -4, 3310, -4, 3315, -4, 1073745152, -29056, 3365, -29056, 3367, -29056, // NOLINT
|
|
|
|
3373, -29056 }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeMultiStrings1Size = 1; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings5[1] = { // NOLINT
|
|
|
|
{{kSentinel}} }; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeTable5Size = 95; // NOLINT
|
|
|
|
static const int32_t kEcma262CanonicalizeTable5
|
|
|
|
[190] = {1601, -4, 1603, -4, 1605, -4, 1607, -4,
|
|
|
|
1609, -4, 1611, -4, 1613, -4, 1615, -4, // NOLINT
|
|
|
|
1617, -4, 1619, -4, 1621, -4, 1623, -4,
|
|
|
|
1625, -4, 1627, -4, 1629, -4, 1631, -4, // NOLINT
|
|
|
|
1633, -4, 1635, -4, 1637, -4, 1639, -4,
|
|
|
|
1641, -4, 1643, -4, 1645, -4, 1665, -4, // NOLINT
|
|
|
|
1667, -4, 1669, -4, 1671, -4, 1673, -4,
|
|
|
|
1675, -4, 1677, -4, 1679, -4, 1681, -4, // NOLINT
|
|
|
|
1683, -4, 1685, -4, 1687, -4, 1689, -4,
|
|
|
|
1691, -4, 1827, -4, 1829, -4, 1831, -4, // NOLINT
|
|
|
|
1833, -4, 1835, -4, 1837, -4, 1839, -4,
|
|
|
|
1843, -4, 1845, -4, 1847, -4, 1849, -4, // NOLINT
|
|
|
|
1851, -4, 1853, -4, 1855, -4, 1857, -4,
|
|
|
|
1859, -4, 1861, -4, 1863, -4, 1865, -4, // NOLINT
|
|
|
|
1867, -4, 1869, -4, 1871, -4, 1873, -4,
|
|
|
|
1875, -4, 1877, -4, 1879, -4, 1881, -4, // NOLINT
|
|
|
|
1883, -4, 1885, -4, 1887, -4, 1889, -4,
|
|
|
|
1891, -4, 1893, -4, 1895, -4, 1897, -4, // NOLINT
|
|
|
|
1899, -4, 1901, -4, 1903, -4, 1914, -4,
|
|
|
|
1916, -4, 1919, -4, 1921, -4, 1923, -4, // NOLINT
|
|
|
|
1925, -4, 1927, -4, 1932, -4, 1937, -4,
|
|
|
|
1939, -4, 1943, -4, 1945, -4, 1947, -4, // NOLINT
|
|
|
|
1949, -4, 1951, -4, 1953, -4, 1955, -4,
|
|
|
|
1957, -4, 1959, -4, 1961, -4}; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeMultiStrings5Size = 1; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings7[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kEcma262CanonicalizeTable7Size = 2; // NOLINT
|
|
|
|
static const int32_t kEcma262CanonicalizeTable7[4] = {
|
|
|
|
1073749825, -128, 8026, -128 }; // NOLINT
|
|
|
|
static const uint16_t kEcma262CanonicalizeMultiStrings7Size = 1; // NOLINT
|
2008-11-25 11:07:48 +00:00
|
|
|
int Ecma262Canonicalize::Convert(uchar c,
|
|
|
|
uchar n,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-11-25 11:07:48 +00:00
|
|
|
switch (chunk_index) {
|
2010-07-30 12:59:57 +00:00
|
|
|
case 0: return LookupMapping<true>(kEcma262CanonicalizeTable0,
|
|
|
|
kEcma262CanonicalizeTable0Size,
|
|
|
|
kEcma262CanonicalizeMultiStrings0,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 1: return LookupMapping<true>(kEcma262CanonicalizeTable1,
|
|
|
|
kEcma262CanonicalizeTable1Size,
|
|
|
|
kEcma262CanonicalizeMultiStrings1,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2012-03-06 09:43:12 +00:00
|
|
|
case 5: return LookupMapping<true>(kEcma262CanonicalizeTable5,
|
|
|
|
kEcma262CanonicalizeTable5Size,
|
|
|
|
kEcma262CanonicalizeMultiStrings5,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 7: return LookupMapping<true>(kEcma262CanonicalizeTable7,
|
|
|
|
kEcma262CanonicalizeTable7Size,
|
|
|
|
kEcma262CanonicalizeMultiStrings7,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2008-11-25 11:07:48 +00:00
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
static const MultiCharacterSpecialCase<4>
|
|
|
|
kEcma262UnCanonicalizeMultiStrings0[507] = { // NOLINT
|
|
|
|
{{65, 97, kSentinel}},
|
|
|
|
{{90, 122, kSentinel}},
|
|
|
|
{{181, 924, 956, kSentinel}},
|
|
|
|
{{192, 224, kSentinel}}, // NOLINT
|
|
|
|
{{214, 246, kSentinel}},
|
|
|
|
{{216, 248, kSentinel}},
|
|
|
|
{{222, 254, kSentinel}},
|
|
|
|
{{255, 376, kSentinel}}, // NOLINT
|
|
|
|
{{256, 257, kSentinel}},
|
|
|
|
{{258, 259, kSentinel}},
|
|
|
|
{{260, 261, kSentinel}},
|
|
|
|
{{262, 263, kSentinel}}, // NOLINT
|
|
|
|
{{264, 265, kSentinel}},
|
|
|
|
{{266, 267, kSentinel}},
|
|
|
|
{{268, 269, kSentinel}},
|
|
|
|
{{270, 271, kSentinel}}, // NOLINT
|
|
|
|
{{272, 273, kSentinel}},
|
|
|
|
{{274, 275, kSentinel}},
|
|
|
|
{{276, 277, kSentinel}},
|
|
|
|
{{278, 279, kSentinel}}, // NOLINT
|
|
|
|
{{280, 281, kSentinel}},
|
|
|
|
{{282, 283, kSentinel}},
|
|
|
|
{{284, 285, kSentinel}},
|
|
|
|
{{286, 287, kSentinel}}, // NOLINT
|
|
|
|
{{288, 289, kSentinel}},
|
|
|
|
{{290, 291, kSentinel}},
|
|
|
|
{{292, 293, kSentinel}},
|
|
|
|
{{294, 295, kSentinel}}, // NOLINT
|
|
|
|
{{296, 297, kSentinel}},
|
|
|
|
{{298, 299, kSentinel}},
|
|
|
|
{{300, 301, kSentinel}},
|
|
|
|
{{302, 303, kSentinel}}, // NOLINT
|
|
|
|
{{306, 307, kSentinel}},
|
|
|
|
{{308, 309, kSentinel}},
|
|
|
|
{{310, 311, kSentinel}},
|
|
|
|
{{313, 314, kSentinel}}, // NOLINT
|
|
|
|
{{315, 316, kSentinel}},
|
|
|
|
{{317, 318, kSentinel}},
|
|
|
|
{{319, 320, kSentinel}},
|
|
|
|
{{321, 322, kSentinel}}, // NOLINT
|
|
|
|
{{323, 324, kSentinel}},
|
|
|
|
{{325, 326, kSentinel}},
|
|
|
|
{{327, 328, kSentinel}},
|
|
|
|
{{330, 331, kSentinel}}, // NOLINT
|
|
|
|
{{332, 333, kSentinel}},
|
|
|
|
{{334, 335, kSentinel}},
|
|
|
|
{{336, 337, kSentinel}},
|
|
|
|
{{338, 339, kSentinel}}, // NOLINT
|
|
|
|
{{340, 341, kSentinel}},
|
|
|
|
{{342, 343, kSentinel}},
|
|
|
|
{{344, 345, kSentinel}},
|
|
|
|
{{346, 347, kSentinel}}, // NOLINT
|
|
|
|
{{348, 349, kSentinel}},
|
|
|
|
{{350, 351, kSentinel}},
|
|
|
|
{{352, 353, kSentinel}},
|
|
|
|
{{354, 355, kSentinel}}, // NOLINT
|
|
|
|
{{356, 357, kSentinel}},
|
|
|
|
{{358, 359, kSentinel}},
|
|
|
|
{{360, 361, kSentinel}},
|
|
|
|
{{362, 363, kSentinel}}, // NOLINT
|
|
|
|
{{364, 365, kSentinel}},
|
|
|
|
{{366, 367, kSentinel}},
|
|
|
|
{{368, 369, kSentinel}},
|
|
|
|
{{370, 371, kSentinel}}, // NOLINT
|
|
|
|
{{372, 373, kSentinel}},
|
|
|
|
{{374, 375, kSentinel}},
|
|
|
|
{{377, 378, kSentinel}},
|
|
|
|
{{379, 380, kSentinel}}, // NOLINT
|
|
|
|
{{381, 382, kSentinel}},
|
|
|
|
{{384, 579, kSentinel}},
|
|
|
|
{{385, 595, kSentinel}},
|
|
|
|
{{386, 387, kSentinel}}, // NOLINT
|
|
|
|
{{388, 389, kSentinel}},
|
|
|
|
{{390, 596, kSentinel}},
|
|
|
|
{{391, 392, kSentinel}},
|
|
|
|
{{393, 598, kSentinel}}, // NOLINT
|
|
|
|
{{394, 599, kSentinel}},
|
|
|
|
{{395, 396, kSentinel}},
|
|
|
|
{{398, 477, kSentinel}},
|
|
|
|
{{399, 601, kSentinel}}, // NOLINT
|
|
|
|
{{400, 603, kSentinel}},
|
|
|
|
{{401, 402, kSentinel}},
|
|
|
|
{{403, 608, kSentinel}},
|
|
|
|
{{404, 611, kSentinel}}, // NOLINT
|
|
|
|
{{405, 502, kSentinel}},
|
|
|
|
{{406, 617, kSentinel}},
|
|
|
|
{{407, 616, kSentinel}},
|
|
|
|
{{408, 409, kSentinel}}, // NOLINT
|
|
|
|
{{410, 573, kSentinel}},
|
|
|
|
{{412, 623, kSentinel}},
|
|
|
|
{{413, 626, kSentinel}},
|
|
|
|
{{414, 544, kSentinel}}, // NOLINT
|
|
|
|
{{415, 629, kSentinel}},
|
|
|
|
{{416, 417, kSentinel}},
|
|
|
|
{{418, 419, kSentinel}},
|
|
|
|
{{420, 421, kSentinel}}, // NOLINT
|
|
|
|
{{422, 640, kSentinel}},
|
|
|
|
{{423, 424, kSentinel}},
|
|
|
|
{{425, 643, kSentinel}},
|
|
|
|
{{428, 429, kSentinel}}, // NOLINT
|
|
|
|
{{430, 648, kSentinel}},
|
|
|
|
{{431, 432, kSentinel}},
|
|
|
|
{{433, 650, kSentinel}},
|
|
|
|
{{434, 651, kSentinel}}, // NOLINT
|
|
|
|
{{435, 436, kSentinel}},
|
|
|
|
{{437, 438, kSentinel}},
|
|
|
|
{{439, 658, kSentinel}},
|
|
|
|
{{440, 441, kSentinel}}, // NOLINT
|
|
|
|
{{444, 445, kSentinel}},
|
|
|
|
{{447, 503, kSentinel}},
|
|
|
|
{{452, 453, 454, kSentinel}},
|
|
|
|
{{455, 456, 457, kSentinel}}, // NOLINT
|
|
|
|
{{458, 459, 460, kSentinel}},
|
|
|
|
{{461, 462, kSentinel}},
|
|
|
|
{{463, 464, kSentinel}},
|
|
|
|
{{465, 466, kSentinel}}, // NOLINT
|
|
|
|
{{467, 468, kSentinel}},
|
|
|
|
{{469, 470, kSentinel}},
|
|
|
|
{{471, 472, kSentinel}},
|
|
|
|
{{473, 474, kSentinel}}, // NOLINT
|
|
|
|
{{475, 476, kSentinel}},
|
|
|
|
{{478, 479, kSentinel}},
|
|
|
|
{{480, 481, kSentinel}},
|
|
|
|
{{482, 483, kSentinel}}, // NOLINT
|
|
|
|
{{484, 485, kSentinel}},
|
|
|
|
{{486, 487, kSentinel}},
|
|
|
|
{{488, 489, kSentinel}},
|
|
|
|
{{490, 491, kSentinel}}, // NOLINT
|
|
|
|
{{492, 493, kSentinel}},
|
|
|
|
{{494, 495, kSentinel}},
|
|
|
|
{{497, 498, 499, kSentinel}},
|
|
|
|
{{500, 501, kSentinel}}, // NOLINT
|
|
|
|
{{504, 505, kSentinel}},
|
|
|
|
{{506, 507, kSentinel}},
|
|
|
|
{{508, 509, kSentinel}},
|
|
|
|
{{510, 511, kSentinel}}, // NOLINT
|
|
|
|
{{512, 513, kSentinel}},
|
|
|
|
{{514, 515, kSentinel}},
|
|
|
|
{{516, 517, kSentinel}},
|
|
|
|
{{518, 519, kSentinel}}, // NOLINT
|
|
|
|
{{520, 521, kSentinel}},
|
|
|
|
{{522, 523, kSentinel}},
|
|
|
|
{{524, 525, kSentinel}},
|
|
|
|
{{526, 527, kSentinel}}, // NOLINT
|
|
|
|
{{528, 529, kSentinel}},
|
|
|
|
{{530, 531, kSentinel}},
|
|
|
|
{{532, 533, kSentinel}},
|
|
|
|
{{534, 535, kSentinel}}, // NOLINT
|
|
|
|
{{536, 537, kSentinel}},
|
|
|
|
{{538, 539, kSentinel}},
|
|
|
|
{{540, 541, kSentinel}},
|
|
|
|
{{542, 543, kSentinel}}, // NOLINT
|
|
|
|
{{546, 547, kSentinel}},
|
|
|
|
{{548, 549, kSentinel}},
|
|
|
|
{{550, 551, kSentinel}},
|
|
|
|
{{552, 553, kSentinel}}, // NOLINT
|
|
|
|
{{554, 555, kSentinel}},
|
|
|
|
{{556, 557, kSentinel}},
|
|
|
|
{{558, 559, kSentinel}},
|
|
|
|
{{560, 561, kSentinel}}, // NOLINT
|
|
|
|
{{562, 563, kSentinel}},
|
|
|
|
{{570, 11365, kSentinel}},
|
|
|
|
{{571, 572, kSentinel}},
|
|
|
|
{{574, 11366, kSentinel}}, // NOLINT
|
|
|
|
{{575, 11390, kSentinel}},
|
|
|
|
{{576, 11391, kSentinel}},
|
|
|
|
{{577, 578, kSentinel}},
|
|
|
|
{{580, 649, kSentinel}}, // NOLINT
|
|
|
|
{{581, 652, kSentinel}},
|
|
|
|
{{582, 583, kSentinel}},
|
|
|
|
{{584, 585, kSentinel}},
|
|
|
|
{{586, 587, kSentinel}}, // NOLINT
|
|
|
|
{{588, 589, kSentinel}},
|
|
|
|
{{590, 591, kSentinel}},
|
|
|
|
{{592, 11375, kSentinel}},
|
|
|
|
{{593, 11373, kSentinel}}, // NOLINT
|
|
|
|
{{594, 11376, kSentinel}},
|
|
|
|
{{604, 42923, kSentinel}},
|
|
|
|
{{609, 42924, kSentinel}},
|
|
|
|
{{613, 42893, kSentinel}}, // NOLINT
|
|
|
|
{{614, 42922, kSentinel}},
|
|
|
|
{{619, 11362, kSentinel}},
|
|
|
|
{{620, 42925, kSentinel}},
|
|
|
|
{{625, 11374, kSentinel}}, // NOLINT
|
|
|
|
{{637, 11364, kSentinel}},
|
|
|
|
{{647, 42929, kSentinel}},
|
|
|
|
{{670, 42928, kSentinel}},
|
|
|
|
{{837, 921, 953, 8126}}, // NOLINT
|
|
|
|
{{880, 881, kSentinel}},
|
|
|
|
{{882, 883, kSentinel}},
|
|
|
|
{{886, 887, kSentinel}},
|
|
|
|
{{891, 1021, kSentinel}}, // NOLINT
|
|
|
|
{{893, 1023, kSentinel}},
|
|
|
|
{{895, 1011, kSentinel}},
|
|
|
|
{{902, 940, kSentinel}},
|
|
|
|
{{904, 941, kSentinel}}, // NOLINT
|
|
|
|
{{906, 943, kSentinel}},
|
|
|
|
{{908, 972, kSentinel}},
|
|
|
|
{{910, 973, kSentinel}},
|
|
|
|
{{911, 974, kSentinel}}, // NOLINT
|
|
|
|
{{913, 945, kSentinel}},
|
|
|
|
{{914, 946, 976, kSentinel}},
|
|
|
|
{{915, 947, kSentinel}},
|
|
|
|
{{916, 948, kSentinel}}, // NOLINT
|
|
|
|
{{917, 949, 1013, kSentinel}},
|
|
|
|
{{918, 950, kSentinel}},
|
|
|
|
{{919, 951, kSentinel}},
|
|
|
|
{{920, 952, 977, kSentinel}}, // NOLINT
|
|
|
|
{{922, 954, 1008, kSentinel}},
|
|
|
|
{{923, 955, kSentinel}},
|
|
|
|
{{925, 957, kSentinel}},
|
|
|
|
{{927, 959, kSentinel}}, // NOLINT
|
|
|
|
{{928, 960, 982, kSentinel}},
|
|
|
|
{{929, 961, 1009, kSentinel}},
|
|
|
|
{{931, 962, 963, kSentinel}},
|
|
|
|
{{932, 964, kSentinel}}, // NOLINT
|
|
|
|
{{933, 965, kSentinel}},
|
|
|
|
{{934, 966, 981, kSentinel}},
|
|
|
|
{{935, 967, kSentinel}},
|
|
|
|
{{939, 971, kSentinel}}, // NOLINT
|
|
|
|
{{975, 983, kSentinel}},
|
|
|
|
{{984, 985, kSentinel}},
|
|
|
|
{{986, 987, kSentinel}},
|
|
|
|
{{988, 989, kSentinel}}, // NOLINT
|
|
|
|
{{990, 991, kSentinel}},
|
|
|
|
{{992, 993, kSentinel}},
|
|
|
|
{{994, 995, kSentinel}},
|
|
|
|
{{996, 997, kSentinel}}, // NOLINT
|
|
|
|
{{998, 999, kSentinel}},
|
|
|
|
{{1000, 1001, kSentinel}},
|
|
|
|
{{1002, 1003, kSentinel}},
|
|
|
|
{{1004, 1005, kSentinel}}, // NOLINT
|
|
|
|
{{1006, 1007, kSentinel}},
|
|
|
|
{{1010, 1017, kSentinel}},
|
|
|
|
{{1015, 1016, kSentinel}},
|
|
|
|
{{1018, 1019, kSentinel}}, // NOLINT
|
|
|
|
{{1024, 1104, kSentinel}},
|
|
|
|
{{1039, 1119, kSentinel}},
|
|
|
|
{{1040, 1072, kSentinel}},
|
|
|
|
{{1071, 1103, kSentinel}}, // NOLINT
|
|
|
|
{{1120, 1121, kSentinel}},
|
|
|
|
{{1122, 1123, kSentinel}},
|
|
|
|
{{1124, 1125, kSentinel}},
|
|
|
|
{{1126, 1127, kSentinel}}, // NOLINT
|
|
|
|
{{1128, 1129, kSentinel}},
|
|
|
|
{{1130, 1131, kSentinel}},
|
|
|
|
{{1132, 1133, kSentinel}},
|
|
|
|
{{1134, 1135, kSentinel}}, // NOLINT
|
|
|
|
{{1136, 1137, kSentinel}},
|
|
|
|
{{1138, 1139, kSentinel}},
|
|
|
|
{{1140, 1141, kSentinel}},
|
|
|
|
{{1142, 1143, kSentinel}}, // NOLINT
|
|
|
|
{{1144, 1145, kSentinel}},
|
|
|
|
{{1146, 1147, kSentinel}},
|
|
|
|
{{1148, 1149, kSentinel}},
|
|
|
|
{{1150, 1151, kSentinel}}, // NOLINT
|
|
|
|
{{1152, 1153, kSentinel}},
|
|
|
|
{{1162, 1163, kSentinel}},
|
|
|
|
{{1164, 1165, kSentinel}},
|
|
|
|
{{1166, 1167, kSentinel}}, // NOLINT
|
|
|
|
{{1168, 1169, kSentinel}},
|
|
|
|
{{1170, 1171, kSentinel}},
|
|
|
|
{{1172, 1173, kSentinel}},
|
|
|
|
{{1174, 1175, kSentinel}}, // NOLINT
|
|
|
|
{{1176, 1177, kSentinel}},
|
|
|
|
{{1178, 1179, kSentinel}},
|
|
|
|
{{1180, 1181, kSentinel}},
|
|
|
|
{{1182, 1183, kSentinel}}, // NOLINT
|
|
|
|
{{1184, 1185, kSentinel}},
|
|
|
|
{{1186, 1187, kSentinel}},
|
|
|
|
{{1188, 1189, kSentinel}},
|
|
|
|
{{1190, 1191, kSentinel}}, // NOLINT
|
|
|
|
{{1192, 1193, kSentinel}},
|
|
|
|
{{1194, 1195, kSentinel}},
|
|
|
|
{{1196, 1197, kSentinel}},
|
|
|
|
{{1198, 1199, kSentinel}}, // NOLINT
|
|
|
|
{{1200, 1201, kSentinel}},
|
|
|
|
{{1202, 1203, kSentinel}},
|
|
|
|
{{1204, 1205, kSentinel}},
|
|
|
|
{{1206, 1207, kSentinel}}, // NOLINT
|
|
|
|
{{1208, 1209, kSentinel}},
|
|
|
|
{{1210, 1211, kSentinel}},
|
|
|
|
{{1212, 1213, kSentinel}},
|
|
|
|
{{1214, 1215, kSentinel}}, // NOLINT
|
|
|
|
{{1216, 1231, kSentinel}},
|
|
|
|
{{1217, 1218, kSentinel}},
|
|
|
|
{{1219, 1220, kSentinel}},
|
|
|
|
{{1221, 1222, kSentinel}}, // NOLINT
|
|
|
|
{{1223, 1224, kSentinel}},
|
|
|
|
{{1225, 1226, kSentinel}},
|
|
|
|
{{1227, 1228, kSentinel}},
|
|
|
|
{{1229, 1230, kSentinel}}, // NOLINT
|
|
|
|
{{1232, 1233, kSentinel}},
|
|
|
|
{{1234, 1235, kSentinel}},
|
|
|
|
{{1236, 1237, kSentinel}},
|
|
|
|
{{1238, 1239, kSentinel}}, // NOLINT
|
|
|
|
{{1240, 1241, kSentinel}},
|
|
|
|
{{1242, 1243, kSentinel}},
|
|
|
|
{{1244, 1245, kSentinel}},
|
|
|
|
{{1246, 1247, kSentinel}}, // NOLINT
|
|
|
|
{{1248, 1249, kSentinel}},
|
|
|
|
{{1250, 1251, kSentinel}},
|
|
|
|
{{1252, 1253, kSentinel}},
|
|
|
|
{{1254, 1255, kSentinel}}, // NOLINT
|
|
|
|
{{1256, 1257, kSentinel}},
|
|
|
|
{{1258, 1259, kSentinel}},
|
|
|
|
{{1260, 1261, kSentinel}},
|
|
|
|
{{1262, 1263, kSentinel}}, // NOLINT
|
|
|
|
{{1264, 1265, kSentinel}},
|
|
|
|
{{1266, 1267, kSentinel}},
|
|
|
|
{{1268, 1269, kSentinel}},
|
|
|
|
{{1270, 1271, kSentinel}}, // NOLINT
|
|
|
|
{{1272, 1273, kSentinel}},
|
|
|
|
{{1274, 1275, kSentinel}},
|
|
|
|
{{1276, 1277, kSentinel}},
|
|
|
|
{{1278, 1279, kSentinel}}, // NOLINT
|
|
|
|
{{1280, 1281, kSentinel}},
|
|
|
|
{{1282, 1283, kSentinel}},
|
|
|
|
{{1284, 1285, kSentinel}},
|
|
|
|
{{1286, 1287, kSentinel}}, // NOLINT
|
|
|
|
{{1288, 1289, kSentinel}},
|
|
|
|
{{1290, 1291, kSentinel}},
|
|
|
|
{{1292, 1293, kSentinel}},
|
|
|
|
{{1294, 1295, kSentinel}}, // NOLINT
|
|
|
|
{{1296, 1297, kSentinel}},
|
|
|
|
{{1298, 1299, kSentinel}},
|
|
|
|
{{1300, 1301, kSentinel}},
|
|
|
|
{{1302, 1303, kSentinel}}, // NOLINT
|
|
|
|
{{1304, 1305, kSentinel}},
|
|
|
|
{{1306, 1307, kSentinel}},
|
|
|
|
{{1308, 1309, kSentinel}},
|
|
|
|
{{1310, 1311, kSentinel}}, // NOLINT
|
|
|
|
{{1312, 1313, kSentinel}},
|
|
|
|
{{1314, 1315, kSentinel}},
|
|
|
|
{{1316, 1317, kSentinel}},
|
|
|
|
{{1318, 1319, kSentinel}}, // NOLINT
|
|
|
|
{{1320, 1321, kSentinel}},
|
|
|
|
{{1322, 1323, kSentinel}},
|
|
|
|
{{1324, 1325, kSentinel}},
|
|
|
|
{{1326, 1327, kSentinel}}, // NOLINT
|
|
|
|
{{1329, 1377, kSentinel}},
|
|
|
|
{{1366, 1414, kSentinel}},
|
|
|
|
{{4256, 11520, kSentinel}},
|
|
|
|
{{4293, 11557, kSentinel}}, // NOLINT
|
|
|
|
{{4295, 11559, kSentinel}},
|
|
|
|
{{4301, 11565, kSentinel}},
|
|
|
|
{{7545, 42877, kSentinel}},
|
|
|
|
{{7549, 11363, kSentinel}}, // NOLINT
|
|
|
|
{{7680, 7681, kSentinel}},
|
|
|
|
{{7682, 7683, kSentinel}},
|
|
|
|
{{7684, 7685, kSentinel}},
|
|
|
|
{{7686, 7687, kSentinel}}, // NOLINT
|
|
|
|
{{7688, 7689, kSentinel}},
|
|
|
|
{{7690, 7691, kSentinel}},
|
|
|
|
{{7692, 7693, kSentinel}},
|
|
|
|
{{7694, 7695, kSentinel}}, // NOLINT
|
|
|
|
{{7696, 7697, kSentinel}},
|
|
|
|
{{7698, 7699, kSentinel}},
|
|
|
|
{{7700, 7701, kSentinel}},
|
|
|
|
{{7702, 7703, kSentinel}}, // NOLINT
|
|
|
|
{{7704, 7705, kSentinel}},
|
|
|
|
{{7706, 7707, kSentinel}},
|
|
|
|
{{7708, 7709, kSentinel}},
|
|
|
|
{{7710, 7711, kSentinel}}, // NOLINT
|
|
|
|
{{7712, 7713, kSentinel}},
|
|
|
|
{{7714, 7715, kSentinel}},
|
|
|
|
{{7716, 7717, kSentinel}},
|
|
|
|
{{7718, 7719, kSentinel}}, // NOLINT
|
|
|
|
{{7720, 7721, kSentinel}},
|
|
|
|
{{7722, 7723, kSentinel}},
|
|
|
|
{{7724, 7725, kSentinel}},
|
|
|
|
{{7726, 7727, kSentinel}}, // NOLINT
|
|
|
|
{{7728, 7729, kSentinel}},
|
|
|
|
{{7730, 7731, kSentinel}},
|
|
|
|
{{7732, 7733, kSentinel}},
|
|
|
|
{{7734, 7735, kSentinel}}, // NOLINT
|
|
|
|
{{7736, 7737, kSentinel}},
|
|
|
|
{{7738, 7739, kSentinel}},
|
|
|
|
{{7740, 7741, kSentinel}},
|
|
|
|
{{7742, 7743, kSentinel}}, // NOLINT
|
|
|
|
{{7744, 7745, kSentinel}},
|
|
|
|
{{7746, 7747, kSentinel}},
|
|
|
|
{{7748, 7749, kSentinel}},
|
|
|
|
{{7750, 7751, kSentinel}}, // NOLINT
|
|
|
|
{{7752, 7753, kSentinel}},
|
|
|
|
{{7754, 7755, kSentinel}},
|
|
|
|
{{7756, 7757, kSentinel}},
|
|
|
|
{{7758, 7759, kSentinel}}, // NOLINT
|
|
|
|
{{7760, 7761, kSentinel}},
|
|
|
|
{{7762, 7763, kSentinel}},
|
|
|
|
{{7764, 7765, kSentinel}},
|
|
|
|
{{7766, 7767, kSentinel}}, // NOLINT
|
|
|
|
{{7768, 7769, kSentinel}},
|
|
|
|
{{7770, 7771, kSentinel}},
|
|
|
|
{{7772, 7773, kSentinel}},
|
|
|
|
{{7774, 7775, kSentinel}}, // NOLINT
|
|
|
|
{{7776, 7777, 7835, kSentinel}},
|
|
|
|
{{7778, 7779, kSentinel}},
|
|
|
|
{{7780, 7781, kSentinel}},
|
|
|
|
{{7782, 7783, kSentinel}}, // NOLINT
|
|
|
|
{{7784, 7785, kSentinel}},
|
|
|
|
{{7786, 7787, kSentinel}},
|
|
|
|
{{7788, 7789, kSentinel}},
|
|
|
|
{{7790, 7791, kSentinel}}, // NOLINT
|
|
|
|
{{7792, 7793, kSentinel}},
|
|
|
|
{{7794, 7795, kSentinel}},
|
|
|
|
{{7796, 7797, kSentinel}},
|
|
|
|
{{7798, 7799, kSentinel}}, // NOLINT
|
|
|
|
{{7800, 7801, kSentinel}},
|
|
|
|
{{7802, 7803, kSentinel}},
|
|
|
|
{{7804, 7805, kSentinel}},
|
|
|
|
{{7806, 7807, kSentinel}}, // NOLINT
|
|
|
|
{{7808, 7809, kSentinel}},
|
|
|
|
{{7810, 7811, kSentinel}},
|
|
|
|
{{7812, 7813, kSentinel}},
|
|
|
|
{{7814, 7815, kSentinel}}, // NOLINT
|
|
|
|
{{7816, 7817, kSentinel}},
|
|
|
|
{{7818, 7819, kSentinel}},
|
|
|
|
{{7820, 7821, kSentinel}},
|
|
|
|
{{7822, 7823, kSentinel}}, // NOLINT
|
|
|
|
{{7824, 7825, kSentinel}},
|
|
|
|
{{7826, 7827, kSentinel}},
|
|
|
|
{{7828, 7829, kSentinel}},
|
|
|
|
{{7840, 7841, kSentinel}}, // NOLINT
|
|
|
|
{{7842, 7843, kSentinel}},
|
|
|
|
{{7844, 7845, kSentinel}},
|
|
|
|
{{7846, 7847, kSentinel}},
|
|
|
|
{{7848, 7849, kSentinel}}, // NOLINT
|
|
|
|
{{7850, 7851, kSentinel}},
|
|
|
|
{{7852, 7853, kSentinel}},
|
|
|
|
{{7854, 7855, kSentinel}},
|
|
|
|
{{7856, 7857, kSentinel}}, // NOLINT
|
|
|
|
{{7858, 7859, kSentinel}},
|
|
|
|
{{7860, 7861, kSentinel}},
|
|
|
|
{{7862, 7863, kSentinel}},
|
|
|
|
{{7864, 7865, kSentinel}}, // NOLINT
|
|
|
|
{{7866, 7867, kSentinel}},
|
|
|
|
{{7868, 7869, kSentinel}},
|
|
|
|
{{7870, 7871, kSentinel}},
|
|
|
|
{{7872, 7873, kSentinel}}, // NOLINT
|
|
|
|
{{7874, 7875, kSentinel}},
|
|
|
|
{{7876, 7877, kSentinel}},
|
|
|
|
{{7878, 7879, kSentinel}},
|
|
|
|
{{7880, 7881, kSentinel}}, // NOLINT
|
|
|
|
{{7882, 7883, kSentinel}},
|
|
|
|
{{7884, 7885, kSentinel}},
|
|
|
|
{{7886, 7887, kSentinel}},
|
|
|
|
{{7888, 7889, kSentinel}}, // NOLINT
|
|
|
|
{{7890, 7891, kSentinel}},
|
|
|
|
{{7892, 7893, kSentinel}},
|
|
|
|
{{7894, 7895, kSentinel}},
|
|
|
|
{{7896, 7897, kSentinel}}, // NOLINT
|
|
|
|
{{7898, 7899, kSentinel}},
|
|
|
|
{{7900, 7901, kSentinel}},
|
|
|
|
{{7902, 7903, kSentinel}},
|
|
|
|
{{7904, 7905, kSentinel}}, // NOLINT
|
|
|
|
{{7906, 7907, kSentinel}},
|
|
|
|
{{7908, 7909, kSentinel}},
|
|
|
|
{{7910, 7911, kSentinel}},
|
|
|
|
{{7912, 7913, kSentinel}}, // NOLINT
|
|
|
|
{{7914, 7915, kSentinel}},
|
|
|
|
{{7916, 7917, kSentinel}},
|
|
|
|
{{7918, 7919, kSentinel}},
|
|
|
|
{{7920, 7921, kSentinel}}, // NOLINT
|
|
|
|
{{7922, 7923, kSentinel}},
|
|
|
|
{{7924, 7925, kSentinel}},
|
|
|
|
{{7926, 7927, kSentinel}},
|
|
|
|
{{7928, 7929, kSentinel}}, // NOLINT
|
|
|
|
{{7930, 7931, kSentinel}},
|
|
|
|
{{7932, 7933, kSentinel}},
|
|
|
|
{{7934, 7935, kSentinel}},
|
|
|
|
{{7936, 7944, kSentinel}}, // NOLINT
|
|
|
|
{{7943, 7951, kSentinel}},
|
|
|
|
{{7952, 7960, kSentinel}},
|
|
|
|
{{7957, 7965, kSentinel}},
|
|
|
|
{{7968, 7976, kSentinel}}, // NOLINT
|
|
|
|
{{7975, 7983, kSentinel}},
|
|
|
|
{{7984, 7992, kSentinel}},
|
|
|
|
{{7991, 7999, kSentinel}},
|
|
|
|
{{8000, 8008, kSentinel}}, // NOLINT
|
|
|
|
{{8005, 8013, kSentinel}},
|
|
|
|
{{8017, 8025, kSentinel}},
|
|
|
|
{{8019, 8027, kSentinel}},
|
|
|
|
{{8021, 8029, kSentinel}}, // NOLINT
|
|
|
|
{{8023, 8031, kSentinel}},
|
|
|
|
{{8032, 8040, kSentinel}},
|
|
|
|
{{8039, 8047, kSentinel}},
|
|
|
|
{{8048, 8122, kSentinel}}, // NOLINT
|
|
|
|
{{8049, 8123, kSentinel}},
|
|
|
|
{{8050, 8136, kSentinel}},
|
|
|
|
{{8053, 8139, kSentinel}},
|
|
|
|
{{8054, 8154, kSentinel}}, // NOLINT
|
|
|
|
{{8055, 8155, kSentinel}},
|
|
|
|
{{8056, 8184, kSentinel}},
|
|
|
|
{{8057, 8185, kSentinel}},
|
|
|
|
{{8058, 8170, kSentinel}}, // NOLINT
|
|
|
|
{{8059, 8171, kSentinel}},
|
|
|
|
{{8060, 8186, kSentinel}},
|
|
|
|
{{8061, 8187, kSentinel}},
|
|
|
|
{{8112, 8120, kSentinel}}, // NOLINT
|
|
|
|
{{8113, 8121, kSentinel}},
|
|
|
|
{{8144, 8152, kSentinel}},
|
|
|
|
{{8145, 8153, kSentinel}},
|
|
|
|
{{8160, 8168, kSentinel}}, // NOLINT
|
|
|
|
{{8161, 8169, kSentinel}},
|
|
|
|
{{8165, 8172, kSentinel}},
|
|
|
|
{{kSentinel}}}; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeTable0Size = 1005; // NOLINT
|
|
|
|
static const int32_t kEcma262UnCanonicalizeTable0[2010] = {
|
|
|
|
1073741889, 1, 90, 5, 1073741921, 1,
|
|
|
|
122, 5, 181, 9, 1073742016, 13,
|
|
|
|
214, 17, 1073742040, 21, // NOLINT
|
|
|
|
222, 25, 1073742048, 13, 246, 17,
|
|
|
|
1073742072, 21, 254, 25, 255, 29,
|
|
|
|
256, 33, 257, 33, // NOLINT
|
|
|
|
258, 37, 259, 37, 260, 41,
|
|
|
|
261, 41, 262, 45, 263, 45,
|
|
|
|
264, 49, 265, 49, // NOLINT
|
|
|
|
266, 53, 267, 53, 268, 57,
|
|
|
|
269, 57, 270, 61, 271, 61,
|
|
|
|
272, 65, 273, 65, // NOLINT
|
|
|
|
274, 69, 275, 69, 276, 73,
|
|
|
|
277, 73, 278, 77, 279, 77,
|
|
|
|
280, 81, 281, 81, // NOLINT
|
|
|
|
282, 85, 283, 85, 284, 89,
|
|
|
|
285, 89, 286, 93, 287, 93,
|
|
|
|
288, 97, 289, 97, // NOLINT
|
|
|
|
290, 101, 291, 101, 292, 105,
|
|
|
|
293, 105, 294, 109, 295, 109,
|
|
|
|
296, 113, 297, 113, // NOLINT
|
|
|
|
298, 117, 299, 117, 300, 121,
|
|
|
|
301, 121, 302, 125, 303, 125,
|
|
|
|
306, 129, 307, 129, // NOLINT
|
|
|
|
308, 133, 309, 133, 310, 137,
|
|
|
|
311, 137, 313, 141, 314, 141,
|
|
|
|
315, 145, 316, 145, // NOLINT
|
|
|
|
317, 149, 318, 149, 319, 153,
|
|
|
|
320, 153, 321, 157, 322, 157,
|
|
|
|
323, 161, 324, 161, // NOLINT
|
|
|
|
325, 165, 326, 165, 327, 169,
|
|
|
|
328, 169, 330, 173, 331, 173,
|
|
|
|
332, 177, 333, 177, // NOLINT
|
|
|
|
334, 181, 335, 181, 336, 185,
|
|
|
|
337, 185, 338, 189, 339, 189,
|
|
|
|
340, 193, 341, 193, // NOLINT
|
|
|
|
342, 197, 343, 197, 344, 201,
|
|
|
|
345, 201, 346, 205, 347, 205,
|
|
|
|
348, 209, 349, 209, // NOLINT
|
|
|
|
350, 213, 351, 213, 352, 217,
|
|
|
|
353, 217, 354, 221, 355, 221,
|
|
|
|
356, 225, 357, 225, // NOLINT
|
|
|
|
358, 229, 359, 229, 360, 233,
|
|
|
|
361, 233, 362, 237, 363, 237,
|
|
|
|
364, 241, 365, 241, // NOLINT
|
|
|
|
366, 245, 367, 245, 368, 249,
|
|
|
|
369, 249, 370, 253, 371, 253,
|
|
|
|
372, 257, 373, 257, // NOLINT
|
|
|
|
374, 261, 375, 261, 376, 29,
|
|
|
|
377, 265, 378, 265, 379, 269,
|
|
|
|
380, 269, 381, 273, // NOLINT
|
|
|
|
382, 273, 384, 277, 385, 281,
|
|
|
|
386, 285, 387, 285, 388, 289,
|
|
|
|
389, 289, 390, 293, // NOLINT
|
|
|
|
391, 297, 392, 297, 1073742217, 301,
|
|
|
|
394, 305, 395, 309, 396, 309,
|
|
|
|
398, 313, 399, 317, // NOLINT
|
|
|
|
400, 321, 401, 325, 402, 325,
|
|
|
|
403, 329, 404, 333, 405, 337,
|
|
|
|
406, 341, 407, 345, // NOLINT
|
|
|
|
408, 349, 409, 349, 410, 353,
|
|
|
|
412, 357, 413, 361, 414, 365,
|
|
|
|
415, 369, 416, 373, // NOLINT
|
|
|
|
417, 373, 418, 377, 419, 377,
|
|
|
|
420, 381, 421, 381, 422, 385,
|
|
|
|
423, 389, 424, 389, // NOLINT
|
|
|
|
425, 393, 428, 397, 429, 397,
|
|
|
|
430, 401, 431, 405, 432, 405,
|
|
|
|
1073742257, 409, 434, 413, // NOLINT
|
|
|
|
435, 417, 436, 417, 437, 421,
|
|
|
|
438, 421, 439, 425, 440, 429,
|
|
|
|
441, 429, 444, 433, // NOLINT
|
|
|
|
445, 433, 447, 437, 452, 441,
|
|
|
|
453, 441, 454, 441, 455, 445,
|
|
|
|
456, 445, 457, 445, // NOLINT
|
|
|
|
458, 449, 459, 449, 460, 449,
|
|
|
|
461, 453, 462, 453, 463, 457,
|
|
|
|
464, 457, 465, 461, // NOLINT
|
|
|
|
466, 461, 467, 465, 468, 465,
|
|
|
|
469, 469, 470, 469, 471, 473,
|
|
|
|
472, 473, 473, 477, // NOLINT
|
|
|
|
474, 477, 475, 481, 476, 481,
|
|
|
|
477, 313, 478, 485, 479, 485,
|
|
|
|
480, 489, 481, 489, // NOLINT
|
|
|
|
482, 493, 483, 493, 484, 497,
|
|
|
|
485, 497, 486, 501, 487, 501,
|
|
|
|
488, 505, 489, 505, // NOLINT
|
|
|
|
490, 509, 491, 509, 492, 513,
|
|
|
|
493, 513, 494, 517, 495, 517,
|
|
|
|
497, 521, 498, 521, // NOLINT
|
|
|
|
499, 521, 500, 525, 501, 525,
|
|
|
|
502, 337, 503, 437, 504, 529,
|
|
|
|
505, 529, 506, 533, // NOLINT
|
|
|
|
507, 533, 508, 537, 509, 537,
|
|
|
|
510, 541, 511, 541, 512, 545,
|
|
|
|
513, 545, 514, 549, // NOLINT
|
|
|
|
515, 549, 516, 553, 517, 553,
|
|
|
|
518, 557, 519, 557, 520, 561,
|
|
|
|
521, 561, 522, 565, // NOLINT
|
|
|
|
523, 565, 524, 569, 525, 569,
|
|
|
|
526, 573, 527, 573, 528, 577,
|
|
|
|
529, 577, 530, 581, // NOLINT
|
|
|
|
531, 581, 532, 585, 533, 585,
|
|
|
|
534, 589, 535, 589, 536, 593,
|
|
|
|
537, 593, 538, 597, // NOLINT
|
|
|
|
539, 597, 540, 601, 541, 601,
|
|
|
|
542, 605, 543, 605, 544, 365,
|
|
|
|
546, 609, 547, 609, // NOLINT
|
|
|
|
548, 613, 549, 613, 550, 617,
|
|
|
|
551, 617, 552, 621, 553, 621,
|
|
|
|
554, 625, 555, 625, // NOLINT
|
|
|
|
556, 629, 557, 629, 558, 633,
|
|
|
|
559, 633, 560, 637, 561, 637,
|
|
|
|
562, 641, 563, 641, // NOLINT
|
|
|
|
570, 645, 571, 649, 572, 649,
|
|
|
|
573, 353, 574, 653, 1073742399, 657,
|
|
|
|
576, 661, 577, 665, // NOLINT
|
|
|
|
578, 665, 579, 277, 580, 669,
|
|
|
|
581, 673, 582, 677, 583, 677,
|
|
|
|
584, 681, 585, 681, // NOLINT
|
|
|
|
586, 685, 587, 685, 588, 689,
|
|
|
|
589, 689, 590, 693, 591, 693,
|
|
|
|
592, 697, 593, 701, // NOLINT
|
|
|
|
594, 705, 595, 281, 596, 293,
|
|
|
|
1073742422, 301, 599, 305, 601, 317,
|
|
|
|
603, 321, 604, 709, // NOLINT
|
|
|
|
608, 329, 609, 713, 611, 333,
|
|
|
|
613, 717, 614, 721, 616, 345,
|
|
|
|
617, 341, 619, 725, // NOLINT
|
|
|
|
620, 729, 623, 357, 625, 733,
|
|
|
|
626, 361, 629, 369, 637, 737,
|
|
|
|
640, 385, 643, 393, // NOLINT
|
|
|
|
647, 741, 648, 401, 649, 669,
|
|
|
|
1073742474, 409, 651, 413, 652, 673,
|
|
|
|
658, 425, 670, 745, // NOLINT
|
|
|
|
837, 749, 880, 753, 881, 753,
|
|
|
|
882, 757, 883, 757, 886, 761,
|
|
|
|
887, 761, 1073742715, 765, // NOLINT
|
|
|
|
893, 769, 895, 773, 902, 777,
|
|
|
|
1073742728, 781, 906, 785, 908, 789,
|
|
|
|
1073742734, 793, 911, 797, // NOLINT
|
|
|
|
913, 801, 914, 805, 1073742739, 809,
|
|
|
|
916, 813, 917, 817, 1073742742, 821,
|
|
|
|
919, 825, 920, 829, // NOLINT
|
|
|
|
921, 749, 922, 833, 923, 837,
|
|
|
|
924, 9, 1073742749, 841, 927, 845,
|
|
|
|
928, 849, 929, 853, // NOLINT
|
|
|
|
931, 857, 1073742756, 861, 933, 865,
|
|
|
|
934, 869, 1073742759, 873, 939, 877,
|
|
|
|
940, 777, 1073742765, 781, // NOLINT
|
|
|
|
943, 785, 945, 801, 946, 805,
|
|
|
|
1073742771, 809, 948, 813, 949, 817,
|
|
|
|
1073742774, 821, 951, 825, // NOLINT
|
|
|
|
952, 829, 953, 749, 954, 833,
|
|
|
|
955, 837, 956, 9, 1073742781, 841,
|
|
|
|
959, 845, 960, 849, // NOLINT
|
|
|
|
961, 853, 962, 857, 963, 857,
|
|
|
|
1073742788, 861, 965, 865, 966, 869,
|
|
|
|
1073742791, 873, 971, 877, // NOLINT
|
|
|
|
972, 789, 1073742797, 793, 974, 797,
|
|
|
|
975, 881, 976, 805, 977, 829,
|
|
|
|
981, 869, 982, 849, // NOLINT
|
|
|
|
983, 881, 984, 885, 985, 885,
|
|
|
|
986, 889, 987, 889, 988, 893,
|
|
|
|
989, 893, 990, 897, // NOLINT
|
|
|
|
991, 897, 992, 901, 993, 901,
|
|
|
|
994, 905, 995, 905, 996, 909,
|
|
|
|
997, 909, 998, 913, // NOLINT
|
|
|
|
999, 913, 1000, 917, 1001, 917,
|
|
|
|
1002, 921, 1003, 921, 1004, 925,
|
|
|
|
1005, 925, 1006, 929, // NOLINT
|
|
|
|
1007, 929, 1008, 833, 1009, 853,
|
|
|
|
1010, 933, 1011, 773, 1013, 817,
|
|
|
|
1015, 937, 1016, 937, // NOLINT
|
|
|
|
1017, 933, 1018, 941, 1019, 941,
|
|
|
|
1073742845, 765, 1023, 769, 1073742848, 945,
|
|
|
|
1039, 949, 1073742864, 953, // NOLINT
|
|
|
|
1071, 957, 1073742896, 953, 1103, 957,
|
|
|
|
1073742928, 945, 1119, 949, 1120, 961,
|
|
|
|
1121, 961, 1122, 965, // NOLINT
|
|
|
|
1123, 965, 1124, 969, 1125, 969,
|
|
|
|
1126, 973, 1127, 973, 1128, 977,
|
|
|
|
1129, 977, 1130, 981, // NOLINT
|
|
|
|
1131, 981, 1132, 985, 1133, 985,
|
|
|
|
1134, 989, 1135, 989, 1136, 993,
|
|
|
|
1137, 993, 1138, 997, // NOLINT
|
|
|
|
1139, 997, 1140, 1001, 1141, 1001,
|
|
|
|
1142, 1005, 1143, 1005, 1144, 1009,
|
|
|
|
1145, 1009, 1146, 1013, // NOLINT
|
|
|
|
1147, 1013, 1148, 1017, 1149, 1017,
|
|
|
|
1150, 1021, 1151, 1021, 1152, 1025,
|
|
|
|
1153, 1025, 1162, 1029, // NOLINT
|
|
|
|
1163, 1029, 1164, 1033, 1165, 1033,
|
|
|
|
1166, 1037, 1167, 1037, 1168, 1041,
|
|
|
|
1169, 1041, 1170, 1045, // NOLINT
|
|
|
|
1171, 1045, 1172, 1049, 1173, 1049,
|
|
|
|
1174, 1053, 1175, 1053, 1176, 1057,
|
|
|
|
1177, 1057, 1178, 1061, // NOLINT
|
|
|
|
1179, 1061, 1180, 1065, 1181, 1065,
|
|
|
|
1182, 1069, 1183, 1069, 1184, 1073,
|
|
|
|
1185, 1073, 1186, 1077, // NOLINT
|
|
|
|
1187, 1077, 1188, 1081, 1189, 1081,
|
|
|
|
1190, 1085, 1191, 1085, 1192, 1089,
|
|
|
|
1193, 1089, 1194, 1093, // NOLINT
|
|
|
|
1195, 1093, 1196, 1097, 1197, 1097,
|
|
|
|
1198, 1101, 1199, 1101, 1200, 1105,
|
|
|
|
1201, 1105, 1202, 1109, // NOLINT
|
|
|
|
1203, 1109, 1204, 1113, 1205, 1113,
|
|
|
|
1206, 1117, 1207, 1117, 1208, 1121,
|
|
|
|
1209, 1121, 1210, 1125, // NOLINT
|
|
|
|
1211, 1125, 1212, 1129, 1213, 1129,
|
|
|
|
1214, 1133, 1215, 1133, 1216, 1137,
|
|
|
|
1217, 1141, 1218, 1141, // NOLINT
|
|
|
|
1219, 1145, 1220, 1145, 1221, 1149,
|
|
|
|
1222, 1149, 1223, 1153, 1224, 1153,
|
|
|
|
1225, 1157, 1226, 1157, // NOLINT
|
|
|
|
1227, 1161, 1228, 1161, 1229, 1165,
|
|
|
|
1230, 1165, 1231, 1137, 1232, 1169,
|
|
|
|
1233, 1169, 1234, 1173, // NOLINT
|
|
|
|
1235, 1173, 1236, 1177, 1237, 1177,
|
|
|
|
1238, 1181, 1239, 1181, 1240, 1185,
|
|
|
|
1241, 1185, 1242, 1189, // NOLINT
|
|
|
|
1243, 1189, 1244, 1193, 1245, 1193,
|
|
|
|
1246, 1197, 1247, 1197, 1248, 1201,
|
|
|
|
1249, 1201, 1250, 1205, // NOLINT
|
|
|
|
1251, 1205, 1252, 1209, 1253, 1209,
|
|
|
|
1254, 1213, 1255, 1213, 1256, 1217,
|
|
|
|
1257, 1217, 1258, 1221, // NOLINT
|
|
|
|
1259, 1221, 1260, 1225, 1261, 1225,
|
|
|
|
1262, 1229, 1263, 1229, 1264, 1233,
|
|
|
|
1265, 1233, 1266, 1237, // NOLINT
|
|
|
|
1267, 1237, 1268, 1241, 1269, 1241,
|
|
|
|
1270, 1245, 1271, 1245, 1272, 1249,
|
|
|
|
1273, 1249, 1274, 1253, // NOLINT
|
|
|
|
1275, 1253, 1276, 1257, 1277, 1257,
|
|
|
|
1278, 1261, 1279, 1261, 1280, 1265,
|
|
|
|
1281, 1265, 1282, 1269, // NOLINT
|
|
|
|
1283, 1269, 1284, 1273, 1285, 1273,
|
|
|
|
1286, 1277, 1287, 1277, 1288, 1281,
|
|
|
|
1289, 1281, 1290, 1285, // NOLINT
|
|
|
|
1291, 1285, 1292, 1289, 1293, 1289,
|
|
|
|
1294, 1293, 1295, 1293, 1296, 1297,
|
|
|
|
1297, 1297, 1298, 1301, // NOLINT
|
|
|
|
1299, 1301, 1300, 1305, 1301, 1305,
|
|
|
|
1302, 1309, 1303, 1309, 1304, 1313,
|
|
|
|
1305, 1313, 1306, 1317, // NOLINT
|
|
|
|
1307, 1317, 1308, 1321, 1309, 1321,
|
|
|
|
1310, 1325, 1311, 1325, 1312, 1329,
|
|
|
|
1313, 1329, 1314, 1333, // NOLINT
|
|
|
|
1315, 1333, 1316, 1337, 1317, 1337,
|
|
|
|
1318, 1341, 1319, 1341, 1320, 1345,
|
|
|
|
1321, 1345, 1322, 1349, // NOLINT
|
|
|
|
1323, 1349, 1324, 1353, 1325, 1353,
|
|
|
|
1326, 1357, 1327, 1357, 1073743153, 1361,
|
|
|
|
1366, 1365, 1073743201, 1361, // NOLINT
|
|
|
|
1414, 1365, 1073746080, 1369, 4293, 1373,
|
|
|
|
4295, 1377, 4301, 1381, 7545, 1385,
|
|
|
|
7549, 1389, 7680, 1393, // NOLINT
|
|
|
|
7681, 1393, 7682, 1397, 7683, 1397,
|
|
|
|
7684, 1401, 7685, 1401, 7686, 1405,
|
|
|
|
7687, 1405, 7688, 1409, // NOLINT
|
|
|
|
7689, 1409, 7690, 1413, 7691, 1413,
|
|
|
|
7692, 1417, 7693, 1417, 7694, 1421,
|
|
|
|
7695, 1421, 7696, 1425, // NOLINT
|
|
|
|
7697, 1425, 7698, 1429, 7699, 1429,
|
|
|
|
7700, 1433, 7701, 1433, 7702, 1437,
|
|
|
|
7703, 1437, 7704, 1441, // NOLINT
|
|
|
|
7705, 1441, 7706, 1445, 7707, 1445,
|
|
|
|
7708, 1449, 7709, 1449, 7710, 1453,
|
|
|
|
7711, 1453, 7712, 1457, // NOLINT
|
|
|
|
7713, 1457, 7714, 1461, 7715, 1461,
|
|
|
|
7716, 1465, 7717, 1465, 7718, 1469,
|
|
|
|
7719, 1469, 7720, 1473, // NOLINT
|
|
|
|
7721, 1473, 7722, 1477, 7723, 1477,
|
|
|
|
7724, 1481, 7725, 1481, 7726, 1485,
|
|
|
|
7727, 1485, 7728, 1489, // NOLINT
|
|
|
|
7729, 1489, 7730, 1493, 7731, 1493,
|
|
|
|
7732, 1497, 7733, 1497, 7734, 1501,
|
|
|
|
7735, 1501, 7736, 1505, // NOLINT
|
|
|
|
7737, 1505, 7738, 1509, 7739, 1509,
|
|
|
|
7740, 1513, 7741, 1513, 7742, 1517,
|
|
|
|
7743, 1517, 7744, 1521, // NOLINT
|
|
|
|
7745, 1521, 7746, 1525, 7747, 1525,
|
|
|
|
7748, 1529, 7749, 1529, 7750, 1533,
|
|
|
|
7751, 1533, 7752, 1537, // NOLINT
|
|
|
|
7753, 1537, 7754, 1541, 7755, 1541,
|
|
|
|
7756, 1545, 7757, 1545, 7758, 1549,
|
|
|
|
7759, 1549, 7760, 1553, // NOLINT
|
|
|
|
7761, 1553, 7762, 1557, 7763, 1557,
|
|
|
|
7764, 1561, 7765, 1561, 7766, 1565,
|
|
|
|
7767, 1565, 7768, 1569, // NOLINT
|
|
|
|
7769, 1569, 7770, 1573, 7771, 1573,
|
|
|
|
7772, 1577, 7773, 1577, 7774, 1581,
|
|
|
|
7775, 1581, 7776, 1585, // NOLINT
|
|
|
|
7777, 1585, 7778, 1589, 7779, 1589,
|
|
|
|
7780, 1593, 7781, 1593, 7782, 1597,
|
|
|
|
7783, 1597, 7784, 1601, // NOLINT
|
|
|
|
7785, 1601, 7786, 1605, 7787, 1605,
|
|
|
|
7788, 1609, 7789, 1609, 7790, 1613,
|
|
|
|
7791, 1613, 7792, 1617, // NOLINT
|
|
|
|
7793, 1617, 7794, 1621, 7795, 1621,
|
|
|
|
7796, 1625, 7797, 1625, 7798, 1629,
|
|
|
|
7799, 1629, 7800, 1633, // NOLINT
|
|
|
|
7801, 1633, 7802, 1637, 7803, 1637,
|
|
|
|
7804, 1641, 7805, 1641, 7806, 1645,
|
|
|
|
7807, 1645, 7808, 1649, // NOLINT
|
|
|
|
7809, 1649, 7810, 1653, 7811, 1653,
|
|
|
|
7812, 1657, 7813, 1657, 7814, 1661,
|
|
|
|
7815, 1661, 7816, 1665, // NOLINT
|
|
|
|
7817, 1665, 7818, 1669, 7819, 1669,
|
|
|
|
7820, 1673, 7821, 1673, 7822, 1677,
|
|
|
|
7823, 1677, 7824, 1681, // NOLINT
|
|
|
|
7825, 1681, 7826, 1685, 7827, 1685,
|
|
|
|
7828, 1689, 7829, 1689, 7835, 1585,
|
|
|
|
7840, 1693, 7841, 1693, // NOLINT
|
|
|
|
7842, 1697, 7843, 1697, 7844, 1701,
|
|
|
|
7845, 1701, 7846, 1705, 7847, 1705,
|
|
|
|
7848, 1709, 7849, 1709, // NOLINT
|
|
|
|
7850, 1713, 7851, 1713, 7852, 1717,
|
|
|
|
7853, 1717, 7854, 1721, 7855, 1721,
|
|
|
|
7856, 1725, 7857, 1725, // NOLINT
|
|
|
|
7858, 1729, 7859, 1729, 7860, 1733,
|
|
|
|
7861, 1733, 7862, 1737, 7863, 1737,
|
|
|
|
7864, 1741, 7865, 1741, // NOLINT
|
|
|
|
7866, 1745, 7867, 1745, 7868, 1749,
|
|
|
|
7869, 1749, 7870, 1753, 7871, 1753,
|
|
|
|
7872, 1757, 7873, 1757, // NOLINT
|
|
|
|
7874, 1761, 7875, 1761, 7876, 1765,
|
|
|
|
7877, 1765, 7878, 1769, 7879, 1769,
|
|
|
|
7880, 1773, 7881, 1773, // NOLINT
|
|
|
|
7882, 1777, 7883, 1777, 7884, 1781,
|
|
|
|
7885, 1781, 7886, 1785, 7887, 1785,
|
|
|
|
7888, 1789, 7889, 1789, // NOLINT
|
|
|
|
7890, 1793, 7891, 1793, 7892, 1797,
|
|
|
|
7893, 1797, 7894, 1801, 7895, 1801,
|
|
|
|
7896, 1805, 7897, 1805, // NOLINT
|
|
|
|
7898, 1809, 7899, 1809, 7900, 1813,
|
|
|
|
7901, 1813, 7902, 1817, 7903, 1817,
|
|
|
|
7904, 1821, 7905, 1821, // NOLINT
|
|
|
|
7906, 1825, 7907, 1825, 7908, 1829,
|
|
|
|
7909, 1829, 7910, 1833, 7911, 1833,
|
|
|
|
7912, 1837, 7913, 1837, // NOLINT
|
|
|
|
7914, 1841, 7915, 1841, 7916, 1845,
|
|
|
|
7917, 1845, 7918, 1849, 7919, 1849,
|
|
|
|
7920, 1853, 7921, 1853, // NOLINT
|
|
|
|
7922, 1857, 7923, 1857, 7924, 1861,
|
|
|
|
7925, 1861, 7926, 1865, 7927, 1865,
|
|
|
|
7928, 1869, 7929, 1869, // NOLINT
|
|
|
|
7930, 1873, 7931, 1873, 7932, 1877,
|
|
|
|
7933, 1877, 7934, 1881, 7935, 1881,
|
|
|
|
1073749760, 1885, 7943, 1889, // NOLINT
|
|
|
|
1073749768, 1885, 7951, 1889, 1073749776, 1893,
|
|
|
|
7957, 1897, 1073749784, 1893, 7965, 1897,
|
|
|
|
1073749792, 1901, 7975, 1905, // NOLINT
|
|
|
|
1073749800, 1901, 7983, 1905, 1073749808, 1909,
|
|
|
|
7991, 1913, 1073749816, 1909, 7999, 1913,
|
|
|
|
1073749824, 1917, 8005, 1921, // NOLINT
|
|
|
|
1073749832, 1917, 8013, 1921, 8017, 1925,
|
|
|
|
8019, 1929, 8021, 1933, 8023, 1937,
|
|
|
|
8025, 1925, 8027, 1929, // NOLINT
|
|
|
|
8029, 1933, 8031, 1937, 1073749856, 1941,
|
|
|
|
8039, 1945, 1073749864, 1941, 8047, 1945,
|
|
|
|
1073749872, 1949, 8049, 1953, // NOLINT
|
|
|
|
1073749874, 1957, 8053, 1961, 1073749878, 1965,
|
|
|
|
8055, 1969, 1073749880, 1973, 8057, 1977,
|
|
|
|
1073749882, 1981, 8059, 1985, // NOLINT
|
|
|
|
1073749884, 1989, 8061, 1993, 1073749936, 1997,
|
|
|
|
8113, 2001, 1073749944, 1997, 8121, 2001,
|
|
|
|
1073749946, 1949, 8123, 1953, // NOLINT
|
|
|
|
8126, 749, 1073749960, 1957, 8139, 1961,
|
|
|
|
1073749968, 2005, 8145, 2009, 1073749976, 2005,
|
|
|
|
8153, 2009, 1073749978, 1965, // NOLINT
|
|
|
|
8155, 1969, 1073749984, 2013, 8161, 2017,
|
|
|
|
8165, 2021, 1073749992, 2013, 8169, 2017,
|
|
|
|
1073749994, 1981, 8171, 1985, // NOLINT
|
|
|
|
8172, 2021, 1073750008, 1973, 8185, 1977,
|
|
|
|
1073750010, 1989, 8187, 1993}; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeMultiStrings0Size = 507; // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
static const MultiCharacterSpecialCase<2> kEcma262UnCanonicalizeMultiStrings1[83] = { // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
{{8498, 8526}}, {{8544, 8560}}, {{8559, 8575}}, {{8579, 8580}}, // NOLINT
|
|
|
|
{{9398, 9424}}, {{9423, 9449}}, {{11264, 11312}}, {{11310, 11358}}, // NOLINT
|
|
|
|
{{11360, 11361}}, {{619, 11362}}, {{7549, 11363}}, {{637, 11364}}, // NOLINT
|
|
|
|
{{570, 11365}}, {{574, 11366}}, {{11367, 11368}}, {{11369, 11370}}, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
{{11371, 11372}}, {{593, 11373}}, {{625, 11374}}, {{592, 11375}}, // NOLINT
|
|
|
|
{{594, 11376}}, {{11378, 11379}}, {{11381, 11382}}, {{575, 11390}}, // NOLINT
|
|
|
|
{{576, 11391}}, {{11392, 11393}}, {{11394, 11395}}, {{11396, 11397}}, // NOLINT
|
|
|
|
{{11398, 11399}}, {{11400, 11401}}, {{11402, 11403}}, {{11404, 11405}}, // NOLINT
|
|
|
|
{{11406, 11407}}, {{11408, 11409}}, {{11410, 11411}}, {{11412, 11413}}, // NOLINT
|
|
|
|
{{11414, 11415}}, {{11416, 11417}}, {{11418, 11419}}, {{11420, 11421}}, // NOLINT
|
|
|
|
{{11422, 11423}}, {{11424, 11425}}, {{11426, 11427}}, {{11428, 11429}}, // NOLINT
|
|
|
|
{{11430, 11431}}, {{11432, 11433}}, {{11434, 11435}}, {{11436, 11437}}, // NOLINT
|
|
|
|
{{11438, 11439}}, {{11440, 11441}}, {{11442, 11443}}, {{11444, 11445}}, // NOLINT
|
|
|
|
{{11446, 11447}}, {{11448, 11449}}, {{11450, 11451}}, {{11452, 11453}}, // NOLINT
|
|
|
|
{{11454, 11455}}, {{11456, 11457}}, {{11458, 11459}}, {{11460, 11461}}, // NOLINT
|
|
|
|
{{11462, 11463}}, {{11464, 11465}}, {{11466, 11467}}, {{11468, 11469}}, // NOLINT
|
|
|
|
{{11470, 11471}}, {{11472, 11473}}, {{11474, 11475}}, {{11476, 11477}}, // NOLINT
|
|
|
|
{{11478, 11479}}, {{11480, 11481}}, {{11482, 11483}}, {{11484, 11485}}, // NOLINT
|
|
|
|
{{11486, 11487}}, {{11488, 11489}}, {{11490, 11491}}, {{11499, 11500}}, // NOLINT
|
|
|
|
{{11501, 11502}}, {{11506, 11507}}, {{4256, 11520}}, {{4293, 11557}}, // NOLINT
|
|
|
|
{{4295, 11559}}, {{4301, 11565}}, {{kSentinel}} }; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeTable1Size = 149; // NOLINT
|
|
|
|
static const int32_t kEcma262UnCanonicalizeTable1[298] = {
|
2010-07-30 12:59:57 +00:00
|
|
|
306, 1, 334, 1, 1073742176, 5, 367, 9, 1073742192, 5, 383, 9, 387, 13, 388, 13, // NOLINT
|
|
|
|
1073743030, 17, 1231, 21, 1073743056, 17, 1257, 21, 1073744896, 25, 3118, 29, 1073744944, 25, 3166, 29, // NOLINT
|
|
|
|
3168, 33, 3169, 33, 3170, 37, 3171, 41, 3172, 45, 3173, 49, 3174, 53, 3175, 57, // NOLINT
|
2012-03-06 09:43:12 +00:00
|
|
|
3176, 57, 3177, 61, 3178, 61, 3179, 65, 3180, 65, 3181, 69, 3182, 73, 3183, 77, // NOLINT
|
|
|
|
3184, 81, 3186, 85, 3187, 85, 3189, 89, 3190, 89, 1073745022, 93, 3199, 97, 3200, 101, // NOLINT
|
|
|
|
3201, 101, 3202, 105, 3203, 105, 3204, 109, 3205, 109, 3206, 113, 3207, 113, 3208, 117, // NOLINT
|
|
|
|
3209, 117, 3210, 121, 3211, 121, 3212, 125, 3213, 125, 3214, 129, 3215, 129, 3216, 133, // NOLINT
|
|
|
|
3217, 133, 3218, 137, 3219, 137, 3220, 141, 3221, 141, 3222, 145, 3223, 145, 3224, 149, // NOLINT
|
|
|
|
3225, 149, 3226, 153, 3227, 153, 3228, 157, 3229, 157, 3230, 161, 3231, 161, 3232, 165, // NOLINT
|
|
|
|
3233, 165, 3234, 169, 3235, 169, 3236, 173, 3237, 173, 3238, 177, 3239, 177, 3240, 181, // NOLINT
|
|
|
|
3241, 181, 3242, 185, 3243, 185, 3244, 189, 3245, 189, 3246, 193, 3247, 193, 3248, 197, // NOLINT
|
|
|
|
3249, 197, 3250, 201, 3251, 201, 3252, 205, 3253, 205, 3254, 209, 3255, 209, 3256, 213, // NOLINT
|
|
|
|
3257, 213, 3258, 217, 3259, 217, 3260, 221, 3261, 221, 3262, 225, 3263, 225, 3264, 229, // NOLINT
|
|
|
|
3265, 229, 3266, 233, 3267, 233, 3268, 237, 3269, 237, 3270, 241, 3271, 241, 3272, 245, // NOLINT
|
|
|
|
3273, 245, 3274, 249, 3275, 249, 3276, 253, 3277, 253, 3278, 257, 3279, 257, 3280, 261, // NOLINT
|
|
|
|
3281, 261, 3282, 265, 3283, 265, 3284, 269, 3285, 269, 3286, 273, 3287, 273, 3288, 277, // NOLINT
|
|
|
|
3289, 277, 3290, 281, 3291, 281, 3292, 285, 3293, 285, 3294, 289, 3295, 289, 3296, 293, // NOLINT
|
|
|
|
3297, 293, 3298, 297, 3299, 297, 3307, 301, 3308, 301, 3309, 305, 3310, 305, 3314, 309, // NOLINT
|
|
|
|
3315, 309, 1073745152, 313, 3365, 317, 3367, 321, 3373, 325 }; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeMultiStrings1Size = 83; // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
static const MultiCharacterSpecialCase<2>
|
|
|
|
kEcma262UnCanonicalizeMultiStrings5[104] = { // NOLINT
|
|
|
|
{{42560, 42561}},
|
|
|
|
{{42562, 42563}},
|
|
|
|
{{42564, 42565}},
|
|
|
|
{{42566, 42567}}, // NOLINT
|
|
|
|
{{42568, 42569}},
|
|
|
|
{{42570, 42571}},
|
|
|
|
{{42572, 42573}},
|
|
|
|
{{42574, 42575}}, // NOLINT
|
|
|
|
{{42576, 42577}},
|
|
|
|
{{42578, 42579}},
|
|
|
|
{{42580, 42581}},
|
|
|
|
{{42582, 42583}}, // NOLINT
|
|
|
|
{{42584, 42585}},
|
|
|
|
{{42586, 42587}},
|
|
|
|
{{42588, 42589}},
|
|
|
|
{{42590, 42591}}, // NOLINT
|
|
|
|
{{42592, 42593}},
|
|
|
|
{{42594, 42595}},
|
|
|
|
{{42596, 42597}},
|
|
|
|
{{42598, 42599}}, // NOLINT
|
|
|
|
{{42600, 42601}},
|
|
|
|
{{42602, 42603}},
|
|
|
|
{{42604, 42605}},
|
|
|
|
{{42624, 42625}}, // NOLINT
|
|
|
|
{{42626, 42627}},
|
|
|
|
{{42628, 42629}},
|
|
|
|
{{42630, 42631}},
|
|
|
|
{{42632, 42633}}, // NOLINT
|
|
|
|
{{42634, 42635}},
|
|
|
|
{{42636, 42637}},
|
|
|
|
{{42638, 42639}},
|
|
|
|
{{42640, 42641}}, // NOLINT
|
|
|
|
{{42642, 42643}},
|
|
|
|
{{42644, 42645}},
|
|
|
|
{{42646, 42647}},
|
|
|
|
{{42648, 42649}}, // NOLINT
|
|
|
|
{{42650, 42651}},
|
|
|
|
{{42786, 42787}},
|
|
|
|
{{42788, 42789}},
|
|
|
|
{{42790, 42791}}, // NOLINT
|
|
|
|
{{42792, 42793}},
|
|
|
|
{{42794, 42795}},
|
|
|
|
{{42796, 42797}},
|
|
|
|
{{42798, 42799}}, // NOLINT
|
|
|
|
{{42802, 42803}},
|
|
|
|
{{42804, 42805}},
|
|
|
|
{{42806, 42807}},
|
|
|
|
{{42808, 42809}}, // NOLINT
|
|
|
|
{{42810, 42811}},
|
|
|
|
{{42812, 42813}},
|
|
|
|
{{42814, 42815}},
|
|
|
|
{{42816, 42817}}, // NOLINT
|
|
|
|
{{42818, 42819}},
|
|
|
|
{{42820, 42821}},
|
|
|
|
{{42822, 42823}},
|
|
|
|
{{42824, 42825}}, // NOLINT
|
|
|
|
{{42826, 42827}},
|
|
|
|
{{42828, 42829}},
|
|
|
|
{{42830, 42831}},
|
|
|
|
{{42832, 42833}}, // NOLINT
|
|
|
|
{{42834, 42835}},
|
|
|
|
{{42836, 42837}},
|
|
|
|
{{42838, 42839}},
|
|
|
|
{{42840, 42841}}, // NOLINT
|
|
|
|
{{42842, 42843}},
|
|
|
|
{{42844, 42845}},
|
|
|
|
{{42846, 42847}},
|
|
|
|
{{42848, 42849}}, // NOLINT
|
|
|
|
{{42850, 42851}},
|
|
|
|
{{42852, 42853}},
|
|
|
|
{{42854, 42855}},
|
|
|
|
{{42856, 42857}}, // NOLINT
|
|
|
|
{{42858, 42859}},
|
|
|
|
{{42860, 42861}},
|
|
|
|
{{42862, 42863}},
|
|
|
|
{{42873, 42874}}, // NOLINT
|
|
|
|
{{42875, 42876}},
|
|
|
|
{{7545, 42877}},
|
|
|
|
{{42878, 42879}},
|
|
|
|
{{42880, 42881}}, // NOLINT
|
|
|
|
{{42882, 42883}},
|
|
|
|
{{42884, 42885}},
|
|
|
|
{{42886, 42887}},
|
|
|
|
{{42891, 42892}}, // NOLINT
|
|
|
|
{{613, 42893}},
|
|
|
|
{{42896, 42897}},
|
|
|
|
{{42898, 42899}},
|
|
|
|
{{42902, 42903}}, // NOLINT
|
|
|
|
{{42904, 42905}},
|
|
|
|
{{42906, 42907}},
|
|
|
|
{{42908, 42909}},
|
|
|
|
{{42910, 42911}}, // NOLINT
|
|
|
|
{{42912, 42913}},
|
|
|
|
{{42914, 42915}},
|
|
|
|
{{42916, 42917}},
|
|
|
|
{{42918, 42919}}, // NOLINT
|
|
|
|
{{42920, 42921}},
|
|
|
|
{{614, 42922}},
|
|
|
|
{{604, 42923}},
|
|
|
|
{{609, 42924}}, // NOLINT
|
|
|
|
{{620, 42925}},
|
|
|
|
{{670, 42928}},
|
|
|
|
{{647, 42929}},
|
|
|
|
{{kSentinel}}}; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeTable5Size = 198; // NOLINT
|
|
|
|
static const int32_t kEcma262UnCanonicalizeTable5
|
|
|
|
[396] = {1600, 1, 1601, 1, 1602, 5, 1603, 5,
|
|
|
|
1604, 9, 1605, 9, 1606, 13, 1607, 13, // NOLINT
|
|
|
|
1608, 17, 1609, 17, 1610, 21, 1611, 21,
|
|
|
|
1612, 25, 1613, 25, 1614, 29, 1615, 29, // NOLINT
|
|
|
|
1616, 33, 1617, 33, 1618, 37, 1619, 37,
|
|
|
|
1620, 41, 1621, 41, 1622, 45, 1623, 45, // NOLINT
|
|
|
|
1624, 49, 1625, 49, 1626, 53, 1627, 53,
|
|
|
|
1628, 57, 1629, 57, 1630, 61, 1631, 61, // NOLINT
|
|
|
|
1632, 65, 1633, 65, 1634, 69, 1635, 69,
|
|
|
|
1636, 73, 1637, 73, 1638, 77, 1639, 77, // NOLINT
|
|
|
|
1640, 81, 1641, 81, 1642, 85, 1643, 85,
|
|
|
|
1644, 89, 1645, 89, 1664, 93, 1665, 93, // NOLINT
|
|
|
|
1666, 97, 1667, 97, 1668, 101, 1669, 101,
|
|
|
|
1670, 105, 1671, 105, 1672, 109, 1673, 109, // NOLINT
|
|
|
|
1674, 113, 1675, 113, 1676, 117, 1677, 117,
|
|
|
|
1678, 121, 1679, 121, 1680, 125, 1681, 125, // NOLINT
|
|
|
|
1682, 129, 1683, 129, 1684, 133, 1685, 133,
|
|
|
|
1686, 137, 1687, 137, 1688, 141, 1689, 141, // NOLINT
|
|
|
|
1690, 145, 1691, 145, 1826, 149, 1827, 149,
|
|
|
|
1828, 153, 1829, 153, 1830, 157, 1831, 157, // NOLINT
|
|
|
|
1832, 161, 1833, 161, 1834, 165, 1835, 165,
|
|
|
|
1836, 169, 1837, 169, 1838, 173, 1839, 173, // NOLINT
|
|
|
|
1842, 177, 1843, 177, 1844, 181, 1845, 181,
|
|
|
|
1846, 185, 1847, 185, 1848, 189, 1849, 189, // NOLINT
|
|
|
|
1850, 193, 1851, 193, 1852, 197, 1853, 197,
|
|
|
|
1854, 201, 1855, 201, 1856, 205, 1857, 205, // NOLINT
|
|
|
|
1858, 209, 1859, 209, 1860, 213, 1861, 213,
|
|
|
|
1862, 217, 1863, 217, 1864, 221, 1865, 221, // NOLINT
|
|
|
|
1866, 225, 1867, 225, 1868, 229, 1869, 229,
|
|
|
|
1870, 233, 1871, 233, 1872, 237, 1873, 237, // NOLINT
|
|
|
|
1874, 241, 1875, 241, 1876, 245, 1877, 245,
|
|
|
|
1878, 249, 1879, 249, 1880, 253, 1881, 253, // NOLINT
|
|
|
|
1882, 257, 1883, 257, 1884, 261, 1885, 261,
|
|
|
|
1886, 265, 1887, 265, 1888, 269, 1889, 269, // NOLINT
|
|
|
|
1890, 273, 1891, 273, 1892, 277, 1893, 277,
|
|
|
|
1894, 281, 1895, 281, 1896, 285, 1897, 285, // NOLINT
|
|
|
|
1898, 289, 1899, 289, 1900, 293, 1901, 293,
|
|
|
|
1902, 297, 1903, 297, 1913, 301, 1914, 301, // NOLINT
|
|
|
|
1915, 305, 1916, 305, 1917, 309, 1918, 313,
|
|
|
|
1919, 313, 1920, 317, 1921, 317, 1922, 321, // NOLINT
|
|
|
|
1923, 321, 1924, 325, 1925, 325, 1926, 329,
|
|
|
|
1927, 329, 1931, 333, 1932, 333, 1933, 337, // NOLINT
|
|
|
|
1936, 341, 1937, 341, 1938, 345, 1939, 345,
|
|
|
|
1942, 349, 1943, 349, 1944, 353, 1945, 353, // NOLINT
|
|
|
|
1946, 357, 1947, 357, 1948, 361, 1949, 361,
|
|
|
|
1950, 365, 1951, 365, 1952, 369, 1953, 369, // NOLINT
|
|
|
|
1954, 373, 1955, 373, 1956, 377, 1957, 377,
|
|
|
|
1958, 381, 1959, 381, 1960, 385, 1961, 385, // NOLINT
|
|
|
|
1962, 389, 1963, 393, 1964, 397, 1965, 401,
|
|
|
|
1968, 405, 1969, 409}; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeMultiStrings5Size = 104; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<2> kEcma262UnCanonicalizeMultiStrings7[3] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{65313, 65345}}, {{65338, 65370}}, {{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kEcma262UnCanonicalizeTable7Size = 4; // NOLINT
|
|
|
|
static const int32_t kEcma262UnCanonicalizeTable7[8] = {
|
|
|
|
1073749793, 1, 7994, 5, 1073749825, 1, 8026, 5 }; // NOLINT
|
|
|
|
static const uint16_t kEcma262UnCanonicalizeMultiStrings7Size = 3; // NOLINT
|
2008-11-25 11:07:48 +00:00
|
|
|
int Ecma262UnCanonicalize::Convert(uchar c,
|
|
|
|
uchar n,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-11-25 11:07:48 +00:00
|
|
|
switch (chunk_index) {
|
2010-07-30 12:59:57 +00:00
|
|
|
case 0: return LookupMapping<true>(kEcma262UnCanonicalizeTable0,
|
|
|
|
kEcma262UnCanonicalizeTable0Size,
|
|
|
|
kEcma262UnCanonicalizeMultiStrings0,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 1: return LookupMapping<true>(kEcma262UnCanonicalizeTable1,
|
|
|
|
kEcma262UnCanonicalizeTable1Size,
|
|
|
|
kEcma262UnCanonicalizeMultiStrings1,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2012-03-06 09:43:12 +00:00
|
|
|
case 5: return LookupMapping<true>(kEcma262UnCanonicalizeTable5,
|
|
|
|
kEcma262UnCanonicalizeTable5Size,
|
|
|
|
kEcma262UnCanonicalizeMultiStrings5,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2010-07-30 12:59:57 +00:00
|
|
|
case 7: return LookupMapping<true>(kEcma262UnCanonicalizeTable7,
|
|
|
|
kEcma262UnCanonicalizeTable7Size,
|
|
|
|
kEcma262UnCanonicalizeMultiStrings7,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2008-11-25 11:07:48 +00:00
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-07-30 12:59:57 +00:00
|
|
|
static const MultiCharacterSpecialCase<1> kCanonicalizationRangeMultiStrings0[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kCanonicalizationRangeTable0Size = 70; // NOLINT
|
|
|
|
static const int32_t kCanonicalizationRangeTable0[140] = {
|
|
|
|
1073741889, 100, 90, 0, 1073741921, 100, 122, 0, 1073742016, 88, 214, 0, 1073742040, 24, 222, 0, // NOLINT
|
|
|
|
1073742048, 88, 246, 0, 1073742072, 24, 254, 0, 1073742715, 8, 893, 0, 1073742728, 8, 906, 0, // NOLINT
|
|
|
|
1073742749, 8, 927, 0, 1073742759, 16, 939, 0, 1073742765, 8, 943, 0, 1073742781, 8, 959, 0, // NOLINT
|
|
|
|
1073742791, 16, 971, 0, 1073742845, 8, 1023, 0, 1073742848, 60, 1039, 0, 1073742864, 124, 1071, 0, // NOLINT
|
|
|
|
1073742896, 124, 1103, 0, 1073742928, 60, 1119, 0, 1073743153, 148, 1366, 0, 1073743201, 148, 1414, 0, // NOLINT
|
|
|
|
1073746080, 148, 4293, 0, 1073749760, 28, 7943, 0, 1073749768, 28, 7951, 0, 1073749776, 20, 7957, 0, // NOLINT
|
|
|
|
1073749784, 20, 7965, 0, 1073749792, 28, 7975, 0, 1073749800, 28, 7983, 0, 1073749808, 28, 7991, 0, // NOLINT
|
|
|
|
1073749816, 28, 7999, 0, 1073749824, 20, 8005, 0, 1073749832, 20, 8013, 0, 1073749856, 28, 8039, 0, // NOLINT
|
|
|
|
1073749864, 28, 8047, 0, 1073749874, 12, 8053, 0, 1073749960, 12, 8139, 0 }; // NOLINT
|
|
|
|
static const uint16_t kCanonicalizationRangeMultiStrings0Size = 1; // NOLINT
|
|
|
|
static const MultiCharacterSpecialCase<1> kCanonicalizationRangeMultiStrings1[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kCanonicalizationRangeTable1Size = 14; // NOLINT
|
|
|
|
static const int32_t kCanonicalizationRangeTable1[28] = {
|
|
|
|
1073742176, 60, 367, 0, 1073742192, 60, 383, 0, 1073743030, 100, 1231, 0, 1073743056, 100, 1257, 0, // NOLINT
|
|
|
|
1073744896, 184, 3118, 0, 1073744944, 184, 3166, 0, 1073745152, 148, 3365, 0 }; // NOLINT
|
|
|
|
static const uint16_t kCanonicalizationRangeMultiStrings1Size = 1; // NOLINT
|
|
|
|
static const MultiCharacterSpecialCase<1> kCanonicalizationRangeMultiStrings7[1] = { // NOLINT
|
2011-01-03 10:28:39 +00:00
|
|
|
{{kSentinel}} }; // NOLINT
|
2010-07-30 12:59:57 +00:00
|
|
|
static const uint16_t kCanonicalizationRangeTable7Size = 4; // NOLINT
|
|
|
|
static const int32_t kCanonicalizationRangeTable7[8] = {
|
|
|
|
1073749793, 100, 7994, 0, 1073749825, 100, 8026, 0 }; // NOLINT
|
|
|
|
static const uint16_t kCanonicalizationRangeMultiStrings7Size = 1; // NOLINT
|
2008-11-25 11:07:48 +00:00
|
|
|
int CanonicalizationRange::Convert(uchar c,
|
|
|
|
uchar n,
|
|
|
|
uchar* result,
|
|
|
|
bool* allow_caching_ptr) {
|
2010-07-30 12:59:57 +00:00
|
|
|
int chunk_index = c >> 13;
|
2008-11-25 11:07:48 +00:00
|
|
|
switch (chunk_index) {
|
2010-07-30 12:59:57 +00:00
|
|
|
case 0: return LookupMapping<false>(kCanonicalizationRangeTable0,
|
|
|
|
kCanonicalizationRangeTable0Size,
|
|
|
|
kCanonicalizationRangeMultiStrings0,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 1: return LookupMapping<false>(kCanonicalizationRangeTable1,
|
|
|
|
kCanonicalizationRangeTable1Size,
|
|
|
|
kCanonicalizationRangeMultiStrings1,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
|
|
|
case 7: return LookupMapping<false>(kCanonicalizationRangeTable7,
|
|
|
|
kCanonicalizationRangeTable7Size,
|
|
|
|
kCanonicalizationRangeMultiStrings7,
|
|
|
|
c,
|
|
|
|
n,
|
|
|
|
result,
|
|
|
|
allow_caching_ptr);
|
2008-11-25 11:07:48 +00:00
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-03 15:10:15 +00:00
|
|
|
|
2017-12-01 03:57:07 +00:00
|
|
|
const uchar UnicodeData::kMaxCodePoint = 0xFFFD;
|
2008-07-03 15:10:15 +00:00
|
|
|
|
|
|
|
int UnicodeData::GetByteCount() {
|
2017-06-14 20:32:49 +00:00
|
|
|
#ifndef V8_INTL_SUPPORT // NOLINT
|
2014-10-08 14:55:03 +00:00
|
|
|
return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kUppercaseTable1Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kUppercaseTable5Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kUppercaseTable7Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable0Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable1Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable2Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable3Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable4Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable5Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable6Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kLetterTable7Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable0Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable1Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable2Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable3Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable4Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable5Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable6Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_StartTable7Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
|
|
|
|
+ kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
|
|
|
|
+
|
|
|
|
kToLowercaseMultiStrings0Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
|
|
|
+
|
|
|
|
kToLowercaseMultiStrings1Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kToLowercaseMultiStrings5Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kToLowercaseMultiStrings7Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kToUppercaseMultiStrings0Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
|
|
|
+
|
|
|
|
kToUppercaseMultiStrings1Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kToUppercaseMultiStrings5Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kToUppercaseMultiStrings7Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<3>) // NOLINT
|
2017-06-29 03:01:13 +00:00
|
|
|
#else
|
|
|
|
return
|
|
|
|
#endif // !V8_INTL_SUPPORT
|
2014-10-08 14:55:03 +00:00
|
|
|
+
|
|
|
|
kEcma262CanonicalizeMultiStrings0Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262CanonicalizeMultiStrings1Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262CanonicalizeMultiStrings5Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262CanonicalizeMultiStrings7Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262UnCanonicalizeMultiStrings0Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<4>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262UnCanonicalizeMultiStrings1Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262UnCanonicalizeMultiStrings5Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
|
|
|
+
|
|
|
|
kEcma262UnCanonicalizeMultiStrings7Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<2>) // NOLINT
|
|
|
|
+
|
|
|
|
kCanonicalizationRangeMultiStrings0Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kCanonicalizationRangeMultiStrings1Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>) // NOLINT
|
|
|
|
+
|
|
|
|
kCanonicalizationRangeMultiStrings7Size *
|
|
|
|
sizeof(MultiCharacterSpecialCase<1>); // NOLINT
|
2008-07-03 15:10:15 +00:00
|
|
|
}
|
|
|
|
|
2014-10-08 14:55:03 +00:00
|
|
|
} // namespace unibrow
|