From ce9b1b2ab098d595ad40d546aa268465ba9d8d31 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Thu, 18 Aug 2022 11:20:26 -0700 Subject: [PATCH] [intl] Remove incorrect optimization for 0 length string In collator and localeCompare, we have an incorrect optimization for zero length string that compare the length and ignore the fact some non zero length string could be considered as equal to a zero length string because the content are all ignoreable. Took out this incorrect optimization with test cases. The regression is introduced in https://source.chromium.org/chromium/_/chromium/v8/v8.git/+/6fbb8bc806da7231ceb81e492d09abe3f43e320e which first appeared in 97.0.4665.0 Bug: chromium:1347690 Change-Id: Ie70feb9598b1842f8a8744c38f33b3397865abfd Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3832526 Reviewed-by: Shu-yu Guo Reviewed-by: Jakob Linke Commit-Queue: Frank Tang Cr-Commit-Position: refs/heads/main@{#82632} --- src/objects/intl-objects.cc | 7 ++----- test/intl/regress-1347690.js | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) create mode 100644 test/intl/regress-1347690.js diff --git a/src/objects/intl-objects.cc b/src/objects/intl-objects.cc index 4623dc19c3..b0c82e49d6 100644 --- a/src/objects/intl-objects.cc +++ b/src/objects/intl-objects.cc @@ -230,7 +230,6 @@ icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle string, if (!flat.IsOneByte()) return icu::StringPiece(); int32_t length = string->length(); - DCHECK_LT(offset, length); const char* char_buffer = reinterpret_cast(flat.ToOneByteVector().begin()); if (!String::IsAscii(char_buffer, length)) { @@ -1418,10 +1417,8 @@ int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator, return UCollationResult::UCOL_EQUAL; } - // Early return for empty strings. - if (string1->length() == 0 || string2->length() == 0) { - return ToUCollationResult(string1->length() - string2->length()); - } + // We cannot return early for 0-length strings because of Unicode + // ignorable characters. See also crbug.com/1347690. string1 = String::Flatten(isolate, string1); string2 = String::Flatten(isolate, string2); diff --git a/test/intl/regress-1347690.js b/test/intl/regress-1347690.js new file mode 100644 index 0000000000..11878a5a87 --- /dev/null +++ b/test/intl/regress-1347690.js @@ -0,0 +1,19 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Comparison to empty string could return zero for certain Unicode character. + +// In all locales, some Unicode characters are ignorable. +// Unicode in C0 +assertEquals(0, (new Intl.Collator('en')).compare("","\u0001")); +// SOFT HYPHEN +assertEquals(0, (new Intl.Collator('en')).compare("","\u00AD")); +// ARABIC SIGN SAMVAT +assertEquals(0, (new Intl.Collator('en')).compare("","\u0604")); + +assertEquals(0, (new Intl.Collator('en')).compare("","\u0001\u0002\u00AD\u0604")); + +// Default Thai collation ignores punctuation. +assertEquals(0, (new Intl.Collator('th')).compare(""," ")); +assertEquals(0, (new Intl.Collator('th')).compare("","*"));