Revert "[regexp] fix Latin1 ignore-case bug."
This reverts commit 8e9eba3695
.
Reason for revert:
https://build.chromium.org/p/client.v8/builders/V8%20Linux%20-%20noi18n%20-%20debug/builds/18887
Original change's description:
> [regexp] fix Latin1 ignore-case bug.
>
> R=jgruber@chromium.org
>
> Bug: v8:6703
> Change-Id: I6b251191cc00d9ac8db41b91b528c130d1b23fc6
> Reviewed-on: https://chromium-review.googlesource.com/897523
> Commit-Queue: Yang Guo <yangguo@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#51088}
TBR=yangguo@chromium.org,jgruber@chromium.org
Change-Id: I09138cedb2631640709ec717045247362f653cb2
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:6703
Reviewed-on: https://chromium-review.googlesource.com/901184
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51090}
This commit is contained in:
parent
bb253d28f4
commit
190a896879
@ -2768,13 +2768,16 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
|
||||
Vector<const uc16> quarks = elm.atom()->data();
|
||||
for (int j = 0; j < quarks.length(); j++) {
|
||||
uint16_t c = quarks[j];
|
||||
if (elm.atom()->ignore_case()) {
|
||||
c = unibrow::Latin1::TryConvertToLatin1(c);
|
||||
}
|
||||
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
|
||||
// Replace quark in case we converted to Latin-1.
|
||||
uint16_t* writable_quarks = const_cast<uint16_t*>(quarks.start());
|
||||
writable_quarks[j] = c;
|
||||
if (c <= String::kMaxOneByteCharCode) continue;
|
||||
if (!IgnoreCase(elm.atom()->flags())) return set_replacement(nullptr);
|
||||
// Here, we need to check for characters whose upper and lower cases
|
||||
// are outside the Latin-1 range.
|
||||
uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
|
||||
// Character is outside Latin-1 completely
|
||||
if (converted == 0) return set_replacement(nullptr);
|
||||
// Convert quark to Latin-1 in place.
|
||||
uint16_t* copy = const_cast<uint16_t*>(quarks.start());
|
||||
copy[j] = converted;
|
||||
}
|
||||
} else {
|
||||
DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
|
||||
@ -3206,17 +3209,10 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
|
||||
if (first_element_checked && i == 0 && j == 0) continue;
|
||||
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
|
||||
EmitCharacterFunction* emit_function = nullptr;
|
||||
uc16 quark = quarks[j];
|
||||
if (elm.atom()->ignore_case()) {
|
||||
// Everywhere else we assume that a non-Latin-1 character cannot match
|
||||
// a Latin-1 character. Avoid the cases where this is assumption is
|
||||
// invalid by using the Latin1 equivalent instead.
|
||||
quark = unibrow::Latin1::TryConvertToLatin1(quark);
|
||||
}
|
||||
switch (pass) {
|
||||
case NON_LATIN1_MATCH:
|
||||
DCHECK(one_byte);
|
||||
if (quark > String::kMaxOneByteCharCode) {
|
||||
if (quarks[j] > String::kMaxOneByteCharCode) {
|
||||
assembler->GoTo(backtrack);
|
||||
return;
|
||||
}
|
||||
@ -3236,8 +3232,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
|
||||
if (emit_function != nullptr) {
|
||||
bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
|
||||
bool bound_checked =
|
||||
emit_function(isolate, compiler, quark, backtrack, cp_offset + j,
|
||||
bounds_check, preloaded);
|
||||
emit_function(isolate, compiler, quarks[j], backtrack,
|
||||
cp_offset + j, bounds_check, preloaded);
|
||||
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
|
||||
}
|
||||
}
|
||||
|
@ -93,11 +93,15 @@ size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
|
||||
class Latin1 {
|
||||
public:
|
||||
static const unsigned kMaxChar = 0xff;
|
||||
// Convert the character to Latin-1 case equivalent if possible.
|
||||
static inline uint16_t TryConvertToLatin1(uint16_t);
|
||||
// Returns 0 if character does not convert to single latin-1 character
|
||||
// or if the character doesn't not convert back to latin-1 via inverse
|
||||
// operation (upper to lower, etc).
|
||||
static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
|
||||
};
|
||||
|
||||
uint16_t Latin1::TryConvertToLatin1(uint16_t c) {
|
||||
|
||||
uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
|
||||
DCHECK_GT(c, Latin1::kMaxChar);
|
||||
switch (c) {
|
||||
// This are equivalent characters in unicode.
|
||||
case 0x39c:
|
||||
@ -108,7 +112,7 @@ uint16_t Latin1::TryConvertToLatin1(uint16_t c) {
|
||||
case 0x178:
|
||||
return 0xff;
|
||||
}
|
||||
return c;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,23 +0,0 @@
|
||||
// Copyright 2018 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
assertTrue(/(\u039C)/i.test("\xB5"));
|
||||
assertTrue(/(\u039C)+/i.test("\xB5"));
|
||||
assertTrue(/(\u039C)/ui.test("\xB5"));
|
||||
assertTrue(/(\u039C)+/ui.test("\xB5"));
|
||||
|
||||
assertTrue(/(\u03BC)/i.test("\xB5"));
|
||||
assertTrue(/(\u03BC)+/i.test("\xB5"));
|
||||
assertTrue(/(\u03BC)/ui.test("\xB5"));
|
||||
assertTrue(/(\u03BC)+/ui.test("\xB5"));
|
||||
|
||||
assertTrue(/(\u03BC)/i.test("\u039C"));
|
||||
assertTrue(/(\u03BC)+/i.test("\u039C"));
|
||||
assertTrue(/(\u03BC)/ui.test("\u039C"));
|
||||
assertTrue(/(\u03BC)+/ui.test("\u039C"));
|
||||
|
||||
assertTrue(/(\u0178)/i.test("\xFF"));
|
||||
assertTrue(/(\u0178)+/i.test("\xFF"));
|
||||
assertTrue(/(\u0178)/ui.test("\xFF"));
|
||||
assertTrue(/(\u0178)+/ui.test("\xFF"));
|
Loading…
Reference in New Issue
Block a user