[msvc] fix build with neon intrinsics
This compilation error was found by NodeJS when updating V8: https://github.com/nodejs/node-v8/issues/240 MSVC reports an error with "too many initializer" for type uint32x4_t. --- Under gcc/clang, this is a typedef to a builtin type. For MSVC, it is a typedef to this union: typedef union __n128 { unsigned __int64 n128_u64[2]; unsigned __int32 n128_u32[4]; ... } __n128; C++ mandates that only first member of union can be initialized at declaration. Thus, it can only be initialized with {uint64_t, uint64_t}. VS people proposed to use designated initializer instead: var = {.n128_u32={1, 2, 3, 8}} https://developercommunity.visualstudio.com/t/error-c2078-too-many-initializers-when-using-arm-n/402911 But, you need to use /std:c++20 for this, which is not the case in v8. --- Thus, the only solution is to implement a hack specifically for MSVC, where you build two uint64, from four uint32. --------------------------------------- Once solved, another error is reported: templated function extract_first_nonzero_index is specialized twice. This is because, with MSVC, uint32x4_t and uint64x2_t are typedef to the same __n128 union. The fix is to drop templates, and use explicit function names instead. Bug: v8:13312 Change-Id: I231d8cf01c05af01af319d56d5666c415f8b989b Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3913035 Reviewed-by: Igor Sheludko <ishell@chromium.org> Reviewed-by: Jakob Kummerow <jkummerow@chromium.org> Commit-Queue: Jakob Kummerow <jkummerow@chromium.org> Cr-Commit-Position: refs/heads/main@{#83404}
This commit is contained in:
parent
17359d84c8
commit
1b3a4f0c34
@ -89,21 +89,29 @@ inline uintptr_t slow_search(T* array, uintptr_t array_len, uintptr_t index,
|
||||
// max(v & mask) = 4
|
||||
// index of the first match = 4-max = 4-4 = 0
|
||||
//
|
||||
template <typename T>
|
||||
inline int extract_first_nonzero_index(T v) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <>
|
||||
V8_ALLOW_UNUSED inline int extract_first_nonzero_index(uint32x4_t v) {
|
||||
uint32x4_t mask = {4, 3, 2, 1};
|
||||
// With MSVC, uint32x4_t and uint64x2_t typedef to a union, where first member
|
||||
// is uint64_t[2], and not uint32_t[4].
|
||||
// C++ standard dictates that a union can only be initialized through its first
|
||||
// member, which forces us to have uint64_t[2] for definition.
|
||||
#if defined(_MSC_VER)
|
||||
#define PACK32x4(w, x, y, z) \
|
||||
{ ((w) + (uint64_t(x) << 32)), ((y) + (uint64_t(z) << 32)) }
|
||||
#else
|
||||
#define PACK32x4(w, x, y, z) \
|
||||
{ (w), (x), (y), (z) }
|
||||
#endif // MSVC workaround
|
||||
|
||||
V8_ALLOW_UNUSED inline int extract_first_nonzero_index_uint32x4_t(
|
||||
uint32x4_t v) {
|
||||
uint32x4_t mask = PACK32x4(4, 3, 2, 1);
|
||||
mask = vandq_u32(mask, v);
|
||||
return 4 - vmaxvq_u32(mask);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline int extract_first_nonzero_index(uint64x2_t v) {
|
||||
uint32x4_t mask = {2, 0, 1, 0}; // Could also be {2,2,1,1} or {0,2,0,1}
|
||||
inline int extract_first_nonzero_index_uint64x2_t(uint64x2_t v) {
|
||||
uint32x4_t mask =
|
||||
PACK32x4(2, 0, 1, 0); // Could also be {2,2,1,1} or {0,2,0,1}
|
||||
mask = vandq_u32(mask, vreinterpretq_u32_u64(v));
|
||||
return 2 - vmaxvq_u32(mask);
|
||||
}
|
||||
@ -122,7 +130,7 @@ inline int32_t reinterpret_vmaxvq_u64(uint64x2_t v) {
|
||||
type_load vector = *reinterpret_cast<type_load*>(&array[index]); \
|
||||
type_eq eq = cmp(vector, search_element_vec); \
|
||||
if (movemask(eq)) { \
|
||||
return index + extract_first_nonzero_index(eq); \
|
||||
return index + extract_first_nonzero_index_##type_eq(eq); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user