[ia32] Fix partial regression in Cvtsi2ss/Cvtsi2sd
In https://crrev.com/c/3131374 we switched some instructions to use macro-assembler functions which can handle AVX and SSE. However for Cvtsi2ss and Cvtsi2sd, the behavior subtly changed. The old behavior directly called cvtsi2ss/cvtsi2sd in the code-generator. The new behavior used the macro-assembler functions, which xor the dst operand. This led to more instructions and larger code size in some benchmarks. The xor is supposed to help reduce dependence chain length (see comments on Cvtsi2ss), but doesn't seem to have helped in this benchmark. So, partially revert the changes, and rename all affected IA32 opcodes back to SSE. Bug: chromium:1248509 Change-Id: Ie700e2980fe9ed083c1160bda3a28f64e1e43041 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3154349 Commit-Queue: Zhi An Ng <zhin@chromium.org> Reviewed-by: Adam Klein <adamk@chromium.org> Cr-Commit-Position: refs/heads/main@{#76775}
This commit is contained in:
parent
d7c9b31a77
commit
5f622d21a5
@ -1439,15 +1439,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
__ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
|
||||
i.TempSimd128Register(0));
|
||||
break;
|
||||
case kIA32Int32ToFloat32:
|
||||
__ Cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
case kSSEInt32ToFloat32:
|
||||
// Calling Cvtsi2ss (which does a xor) regresses some benchmarks.
|
||||
__ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
break;
|
||||
case kIA32Uint32ToFloat32:
|
||||
__ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
|
||||
i.TempRegister(0));
|
||||
break;
|
||||
case kIA32Int32ToFloat64:
|
||||
__ Cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
case kSSEInt32ToFloat64:
|
||||
// Calling Cvtsi2sd (which does a xor) regresses some benchmarks.
|
||||
__ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
|
||||
break;
|
||||
case kIA32Uint32ToFloat64:
|
||||
__ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
|
||||
|
@ -64,9 +64,9 @@ namespace compiler {
|
||||
V(IA32Float32ToUint32) \
|
||||
V(IA32Float64ToInt32) \
|
||||
V(IA32Float64ToUint32) \
|
||||
V(IA32Int32ToFloat32) \
|
||||
V(SSEInt32ToFloat32) \
|
||||
V(IA32Uint32ToFloat32) \
|
||||
V(IA32Int32ToFloat64) \
|
||||
V(SSEInt32ToFloat64) \
|
||||
V(IA32Uint32ToFloat64) \
|
||||
V(IA32Float64ExtractLowWord32) \
|
||||
V(IA32Float64ExtractHighWord32) \
|
||||
|
@ -65,9 +65,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
||||
case kIA32Float32ToUint32:
|
||||
case kIA32Float64ToInt32:
|
||||
case kIA32Float64ToUint32:
|
||||
case kIA32Int32ToFloat32:
|
||||
case kSSEInt32ToFloat32:
|
||||
case kIA32Uint32ToFloat32:
|
||||
case kIA32Int32ToFloat64:
|
||||
case kSSEInt32ToFloat64:
|
||||
case kIA32Uint32ToFloat64:
|
||||
case kIA32Float64ExtractLowWord32:
|
||||
case kIA32Float64ExtractHighWord32:
|
||||
|
@ -1128,8 +1128,8 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
|
||||
V(Word32Ctz, kIA32Tzcnt) \
|
||||
V(Word32Popcnt, kIA32Popcnt) \
|
||||
V(ChangeFloat32ToFloat64, kIA32Float32ToFloat64) \
|
||||
V(RoundInt32ToFloat32, kIA32Int32ToFloat32) \
|
||||
V(ChangeInt32ToFloat64, kIA32Int32ToFloat64) \
|
||||
V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
|
||||
V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
|
||||
V(TruncateFloat32ToInt32, kIA32Float32ToInt32) \
|
||||
V(ChangeFloat64ToInt32, kIA32Float64ToInt32) \
|
||||
V(TruncateFloat64ToFloat32, kIA32Float64ToFloat32) \
|
||||
|
Loading…
Reference in New Issue
Block a user