diff --git a/src/objects/intl-objects.cc b/src/objects/intl-objects.cc index 4623dc19c3..b0c82e49d6 100644 --- a/src/objects/intl-objects.cc +++ b/src/objects/intl-objects.cc @@ -230,7 +230,6 @@ icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle string, if (!flat.IsOneByte()) return icu::StringPiece(); int32_t length = string->length(); - DCHECK_LT(offset, length); const char* char_buffer = reinterpret_cast(flat.ToOneByteVector().begin()); if (!String::IsAscii(char_buffer, length)) { @@ -1418,10 +1417,8 @@ int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator, return UCollationResult::UCOL_EQUAL; } - // Early return for empty strings. - if (string1->length() == 0 || string2->length() == 0) { - return ToUCollationResult(string1->length() - string2->length()); - } + // We cannot return early for 0-length strings because of Unicode + // ignorable characters. See also crbug.com/1347690. string1 = String::Flatten(isolate, string1); string2 = String::Flatten(isolate, string2); diff --git a/test/intl/regress-1347690.js b/test/intl/regress-1347690.js new file mode 100644 index 0000000000..11878a5a87 --- /dev/null +++ b/test/intl/regress-1347690.js @@ -0,0 +1,19 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Comparison to empty string could return zero for certain Unicode character. + +// In all locales, some Unicode characters are ignorable. +// Unicode in C0 +assertEquals(0, (new Intl.Collator('en')).compare("","\u0001")); +// SOFT HYPHEN +assertEquals(0, (new Intl.Collator('en')).compare("","\u00AD")); +// ARABIC SIGN SAMVAT +assertEquals(0, (new Intl.Collator('en')).compare("","\u0604")); + +assertEquals(0, (new Intl.Collator('en')).compare("","\u0001\u0002\u00AD\u0604")); + +// Default Thai collation ignores punctuation. +assertEquals(0, (new Intl.Collator('th')).compare(""," ")); +assertEquals(0, (new Intl.Collator('th')).compare("","*"));