[d8] Add unicode filename support on windows

This is a follow up CL for https://crrev.com/c/3538284.
Tests credited to https://crrev.com/c/3468352.

- Add unicode filename path test.
- Convert ansi encoded filename argv to utf8 encoded on windows. Because
the ansi encoded filename argv may lose some information for unicode
filename, and we need to use GetCommandLineW to get the actual unicode
filename argument. And we convert it to utf8 encoded to be consistent
with subsequent processing.
- Use REPLACEMENT CHARACTER to replace the characters which cannot be
encoded with sys.stdout.encoding in progress.py.

This CL should be **reverted** if new unicode filenames cause problems.

Bug: v8:12541
Change-Id: Ic5c5ae342b3a5b11c3119452af03c9165d429ed7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3568926
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Commit-Queue: 王澳 <wangao.james@bytedance.com>
Cr-Commit-Position: refs/heads/main@{#79850}
This commit is contained in:
jameslahm 2022-04-07 16:46:24 +08:00 committed by V8 LUCI CQ
parent 3509f2da24
commit 8306599e9b
4 changed files with 75 additions and 17 deletions

View File

@ -4339,6 +4339,41 @@ void Worker::PostMessageOut(const v8::FunctionCallbackInfo<v8::Value>& args) {
}
}
#if V8_TARGET_OS_WIN
// Enable support for unicode filename path on windows.
// We first convert ansi encoded argv[i] to utf16 encoded, and then
// convert utf16 encoded to utf8 encoded with setting the argv[i]
// to the utf8 encoded arg. We allocate memory for the utf8 encoded
// arg, and we will free it and reset it to nullptr after using
// the filename path arg. And because Execute may be called multiple
// times, we need to free the allocated unicode filename when exit.
// Save the allocated utf8 filenames, and we will free them when exit.
std::vector<char*> utf8_filenames;
#include <shellapi.h>
// Convert utf-16 encoded string to utf-8 encoded.
char* ConvertUtf16StringToUtf8(const wchar_t* str) {
// On Windows wchar_t must be a 16-bit value.
static_assert(sizeof(wchar_t) == 2, "wrong wchar_t size");
int len =
WideCharToMultiByte(CP_UTF8, 0, str, -1, nullptr, 0, nullptr, FALSE);
DCHECK_LT(0, len);
char* utf8_str = new char[len];
utf8_filenames.push_back(utf8_str);
WideCharToMultiByte(CP_UTF8, 0, str, -1, utf8_str, len, nullptr, FALSE);
return utf8_str;
}
// Convert ansi encoded argv[i] to utf8 encoded.
void PreProcessUnicodeFilenameArg(char* argv[], int i) {
int argc;
wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
argv[i] = ConvertUtf16StringToUtf8(wargv[i]);
LocalFree(wargv);
}
#endif
bool Shell::SetOptions(int argc, char* argv[]) {
bool logfile_per_isolate = false;
bool no_always_opt = false;
@ -4565,6 +4600,10 @@ bool Shell::SetOptions(int argc, char* argv[]) {
} else if (strcmp(argv[i], "--expose-fast-api") == 0) {
options.expose_fast_api = true;
argv[i] = nullptr;
} else {
#ifdef V8_TARGET_OS_WIN
PreProcessUnicodeFilenameArg(argv, i);
#endif
}
}
@ -5549,6 +5588,16 @@ int Shell::Main(int argc, char* argv[]) {
tracing_controller->StopTracing();
}
g_platform.reset();
#ifdef V8_TARGET_OS_WIN
// We need to free the allocated utf8 filenames in
// PreProcessUnicodeFilenameArg.
for (char* utf8_str : utf8_filenames) {
delete[] utf8_str;
}
utf8_filenames.clear();
#endif
return result;
}

View File

@ -0,0 +1,5 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
print("Merry Christmas!")

View File

@ -0,0 +1 @@
Merry Christmas!

View File

@ -19,7 +19,9 @@ def print_failure_header(test, is_flaky=False):
text.append('[negative]')
if is_flaky:
text.append('(flaky)')
print('=== %s ===' % ' '.join(text))
output = '=== %s ===' % ' '.join(text)
encoding = sys.stdout.encoding or 'utf-8'
print(output.encode(encoding, errors='replace').decode(encoding))
class ResultsTracker(base.TestProcObserver):
@ -147,7 +149,8 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
self._last_printed_time = time.time()
def _print(self, text):
print(text)
encoding = sys.stdout.encoding or 'utf-8'
print(text.encode(encoding, errors='replace').decode(encoding))
sys.stdout.flush()
self._last_printed_time = time.time()