[regexp] Restrict unicode property value expressions
The unicode property escape syntax restricts unicode property names and unicode property values to consist only of characters taken from the [a-zA-Z0-9_] character class. See the spec at: https://tc39.github.io/proposal-regexp-unicode-property-escapes/ In most cases, we do not actually need to validate that this is the case, since subsequent property lookup in ICU will fail (and throw a SyntaxError) if the given property does not exist. However, there one special case. The ICU lookup takes the property name as a null-terminated string, so it will accept carefully malformed property names (e.g. '\p{Number\0[}'). This can end up confusing the regexp parser. With this CL, we explicitly restrict potential property names / values to the character set as specified. Bug: v8:4743, chromium:793793 Change-Id: Ic97deea8602571ec6793b79c4bb858e1c7597405 Reviewed-on: https://chromium-review.googlesource.com/824272 Reviewed-by: Mathias Bynens <mathias@chromium.org> Reviewed-by: Sathya Gunasekaran <gsathya@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#50130}
This commit is contained in:
parent
649ab060c0
commit
0da56e74cf
@ -1336,6 +1336,19 @@ bool IsSupportedBinaryProperty(UProperty property) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsUnicodePropertyValueCharacter(char c) {
|
||||
// https://tc39.github.io/proposal-regexp-unicode-property-escapes/
|
||||
//
|
||||
// Note that using this to validate each parsed char is quite conservative.
|
||||
// A possible alternative solution would be to only ensure the parsed
|
||||
// property name/value candidate string does not contain '\0' characters and
|
||||
// let ICU lookups trigger the final failure.
|
||||
if ('a' <= c && c <= 'z') return true;
|
||||
if ('A' <= c && c <= 'Z') return true;
|
||||
if ('0' <= c && c <= '9') return true;
|
||||
return (c == '_');
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
|
||||
@ -1353,11 +1366,13 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
|
||||
if (current() == '{') {
|
||||
// Parse \p{[PropertyName=]PropertyNameValue}
|
||||
for (Advance(); current() != '}' && current() != '='; Advance()) {
|
||||
if (!IsUnicodePropertyValueCharacter(current())) return false;
|
||||
if (!has_next()) return false;
|
||||
first_part.push_back(static_cast<char>(current()));
|
||||
}
|
||||
if (current() == '=') {
|
||||
for (Advance(); current() != '}'; Advance()) {
|
||||
if (!IsUnicodePropertyValueCharacter(current())) return false;
|
||||
if (!has_next()) return false;
|
||||
second_part.push_back(static_cast<char>(current()));
|
||||
}
|
||||
@ -1369,6 +1384,10 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
|
||||
Advance();
|
||||
first_part.push_back(0); // null-terminate string.
|
||||
|
||||
DCHECK(first_part.size() - 1 == std::strlen(first_part.data()));
|
||||
DCHECK(second_part.empty() ||
|
||||
second_part.size() - 1 == std::strlen(second_part.data()));
|
||||
|
||||
if (second_part.empty()) {
|
||||
// First attempt to interpret as general category property value name.
|
||||
const char* name = first_part.data();
|
||||
|
7
test/mjsunit/regress/regress-793793.js
Normal file
7
test/mjsunit/regress/regress-793793.js
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-regexp-property
|
||||
|
||||
assertThrows(() => new RegExp("\\1(\\P{P\0[}()/", "u"), SyntaxError);
|
Loading…
Reference in New Issue
Block a user