[d8] Add unicode filename support on windows

This is a follow up CL for https://crrev.com/c/3538284. Tests credited to https://crrev.com/c/3468352. - Add unicode filename path test. - Convert ansi encoded filename argv to utf8 encoded on windows. Because the ansi encoded filename argv may lose some information for unicode filename, and we need to use GetCommandLineW to get the actual unicode filename argument. And we convert it to utf8 encoded to be consistent with subsequent processing. - Use REPLACEMENT CHARACTER to replace the characters which cannot be encoded with sys.stdout.encoding in progress.py. This CL should be **reverted** if new unicode filenames cause problems. Bug: v8:12541 Change-Id: Ic5c5ae342b3a5b11c3119452af03c9165d429ed7 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3568926 Reviewed-by: Clemens Backes <clemensb@chromium.org> Reviewed-by: Michael Achenbach <machenbach@chromium.org> Commit-Queue: 王澳 <wangao.james@bytedance.com> Cr-Commit-Position: refs/heads/main@{#79850}
2022-04-07 16:46:24 +08:00 · 2022-04-07 16:46:24 +08:00 · 8306599e9b
commit 8306599e9b
parent 3509f2da24
4 changed files with 75 additions and 17 deletions
--- a/src/d8/d8.cc
+++ b/src/d8/d8.cc
@ -4339,6 +4339,41 @@ void Worker::PostMessageOut(const v8::FunctionCallbackInfo<v8::Value>& args) {
  }
 }

+#if V8_TARGET_OS_WIN
+// Enable support for unicode filename path on windows.
+// We first convert ansi encoded argv[i] to utf16 encoded, and then
+// convert utf16 encoded to utf8 encoded with setting the argv[i]
+// to the utf8 encoded arg. We allocate memory for the utf8 encoded
+// arg, and we will free it and reset it to nullptr after using
+// the filename path arg. And because Execute may be called multiple
+// times, we need to free the allocated unicode filename when exit.
+
+// Save the allocated utf8 filenames, and we will free them when exit.
+std::vector<char*> utf8_filenames;
+#include <shellapi.h>
+// Convert utf-16 encoded string to utf-8 encoded.
+char* ConvertUtf16StringToUtf8(const wchar_t* str) {
+  // On Windows wchar_t must be a 16-bit value.
+  static_assert(sizeof(wchar_t) == 2, "wrong wchar_t size");
+  int len =
+      WideCharToMultiByte(CP_UTF8, 0, str, -1, nullptr, 0, nullptr, FALSE);
+  DCHECK_LT(0, len);
+  char* utf8_str = new char[len];
+  utf8_filenames.push_back(utf8_str);
+  WideCharToMultiByte(CP_UTF8, 0, str, -1, utf8_str, len, nullptr, FALSE);
+  return utf8_str;
+}
+
+// Convert ansi encoded argv[i] to utf8 encoded.
+void PreProcessUnicodeFilenameArg(char* argv[], int i) {
+  int argc;
+  wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
+  argv[i] = ConvertUtf16StringToUtf8(wargv[i]);
+  LocalFree(wargv);
+}
+
+#endif
+
 bool Shell::SetOptions(int argc, char* argv[]) {
  bool logfile_per_isolate = false;
  bool no_always_opt = false;
@ -4565,6 +4600,10 @@ bool Shell::SetOptions(int argc, char* argv[]) {
    } else if (strcmp(argv[i], "--expose-fast-api") == 0) {
      options.expose_fast_api = true;
      argv[i] = nullptr;
+    } else {
+#ifdef V8_TARGET_OS_WIN
+      PreProcessUnicodeFilenameArg(argv, i);
+#endif
    }
  }

@ -5549,6 +5588,16 @@ int Shell::Main(int argc, char* argv[]) {
    tracing_controller->StopTracing();
  }
  g_platform.reset();
+
+#ifdef V8_TARGET_OS_WIN
+  // We need to free the allocated utf8 filenames in
+  // PreProcessUnicodeFilenameArg.
+  for (char* utf8_str : utf8_filenames) {
+    delete[] utf8_str;
+  }
+  utf8_filenames.clear();
+#endif
+
  return result;
 }

--- a/test/message/unicode-filename-🎅🎄.js
+++ b/test/message/unicode-filename-🎅🎄.js
@ -0,0 +1,5 @@
+// Copyright 2022 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+print("Merry Christmas!")
--- a/test/message/unicode-filename-🎅🎄.out
+++ b/test/message/unicode-filename-🎅🎄.out
@ -0,0 +1 @@
+Merry Christmas!
--- a/tools/testrunner/testproc/progress.py
+++ b/tools/testrunner/testproc/progress.py
@ -19,7 +19,9 @@ def print_failure_header(test, is_flaky=False):
    text.append('[negative]')
  if is_flaky:
    text.append('(flaky)')
-  print('=== %s ===' % ' '.join(text))
+  output = '=== %s ===' % ' '.join(text)
+  encoding = sys.stdout.encoding or 'utf-8'
+  print(output.encode(encoding, errors='replace').decode(encoding))


 class ResultsTracker(base.TestProcObserver):
@ -147,7 +149,8 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
    self._last_printed_time = time.time()

  def _print(self, text):
-    print(text)
+    encoding = sys.stdout.encoding or 'utf-8'
+    print(text.encode(encoding, errors='replace').decode(encoding))
    sys.stdout.flush()
    self._last_printed_time = time.time()