From 71f7cf7dc4d09b09162aee3470f6ec0964420379 Mon Sep 17 00:00:00 2001 From: Yu Yin Date: Fri, 27 Sep 2019 12:38:03 +0800 Subject: [PATCH] [mips][builtin] Small optimization. port dcf3b66 https://crrev.com/c/1825222 port 7675b95 https://crrev.com/c/1826724 [mips][wasm-simd] Implement f32x4.sqrt. port 36f2ec1 https://crrev.com/c/1808045 Change-Id: Ib714d56f2b0062d5013364eeea30294cf0b92cd4 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1826588 Reviewed-by: Santiago Aboy Solanes Reviewed-by: Zhi An Ng Commit-Queue: Yu Yin Cr-Commit-Position: refs/heads/master@{#64030} --- src/builtins/mips/builtins-mips.cc | 18 ++++++++++-------- src/builtins/mips64/builtins-mips64.cc | 18 ++++++++++-------- .../backend/mips/code-generator-mips.cc | 5 +++++ .../backend/mips/instruction-codes-mips.h | 1 + .../backend/mips/instruction-scheduler-mips.cc | 1 + .../backend/mips/instruction-selector-mips.cc | 1 + .../backend/mips64/code-generator-mips64.cc | 5 +++++ .../backend/mips64/instruction-codes-mips64.h | 1 + .../mips64/instruction-scheduler-mips64.cc | 1 + .../mips64/instruction-selector-mips64.cc | 1 + 10 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/builtins/mips/builtins-mips.cc b/src/builtins/mips/builtins-mips.cc index d3237a1c38..44de9cc698 100644 --- a/src/builtins/mips/builtins-mips.cc +++ b/src/builtins/mips/builtins-mips.cc @@ -1085,18 +1085,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { __ Push(kInterpreterBytecodeArrayRegister, t0); // Allocate the local and temporary register file on the stack. + Label stack_overflow; { // Load frame size from the BytecodeArray object. __ lw(t0, FieldMemOperand(kInterpreterBytecodeArrayRegister, BytecodeArray::kFrameSizeOffset)); // Do a stack check to ensure we don't go over the limit. - Label ok; __ Subu(t1, sp, Operand(t0)); LoadRealStackLimit(masm, a2); - __ Branch(&ok, hs, t1, Operand(a2)); - __ CallRuntime(Runtime::kThrowStackOverflow); - __ bind(&ok); + __ Branch(&stack_overflow, lo, t1, Operand(a2)); // If ok, push undefined as the initial value for all register file entries. Label loop_header; @@ -1169,6 +1167,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy); // Unreachable code. __ break_(0xCC); + + __ bind(&stack_overflow); + __ CallRuntime(Runtime::kThrowStackOverflow); + // Unreachable code. + __ break_(0xCC); } static void Generate_InterpreterPushArgs(MacroAssembler* masm, @@ -2131,7 +2134,7 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) { // -- a1 : the target to call (can be any Object). // ----------------------------------- - Label non_callable, non_function, non_smi; + Label non_callable, non_smi; __ JumpIfSmi(a1, &non_callable); __ bind(&non_smi); __ GetObjectType(a1, t1, t2); @@ -2146,12 +2149,11 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) { __ Branch(&non_callable, eq, t1, Operand(zero_reg)); // Check if target is a proxy and call CallProxy external builtin - __ Branch(&non_function, ne, t2, Operand(JS_PROXY_TYPE)); - __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET); + __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), + RelocInfo::CODE_TARGET, eq, t2, Operand(JS_PROXY_TYPE)); // 2. Call to something else, which might have a [[Call]] internal method (if // not we raise an exception). - __ bind(&non_function); // Overwrite the original receiver with the (original) target. __ Lsa(kScratchReg, sp, a0, kPointerSizeLog2); __ sw(a1, MemOperand(kScratchReg)); diff --git a/src/builtins/mips64/builtins-mips64.cc b/src/builtins/mips64/builtins-mips64.cc index 7cb66470a3..a1f4ee5182 100644 --- a/src/builtins/mips64/builtins-mips64.cc +++ b/src/builtins/mips64/builtins-mips64.cc @@ -1103,18 +1103,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { __ Push(kInterpreterBytecodeArrayRegister, a4); // Allocate the local and temporary register file on the stack. + Label stack_overflow; { // Load frame size (word) from the BytecodeArray object. __ Lw(a4, FieldMemOperand(kInterpreterBytecodeArrayRegister, BytecodeArray::kFrameSizeOffset)); // Do a stack check to ensure we don't go over the limit. - Label ok; __ Dsubu(a5, sp, Operand(a4)); LoadRealStackLimit(masm, a2); - __ Branch(&ok, hs, a5, Operand(a2)); - __ CallRuntime(Runtime::kThrowStackOverflow); - __ bind(&ok); + __ Branch(&stack_overflow, lo, a5, Operand(a2)); // If ok, push undefined as the initial value for all register file entries. Label loop_header; @@ -1188,6 +1186,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) { GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy); // Unreachable code. __ break_(0xCC); + + __ bind(&stack_overflow); + __ CallRuntime(Runtime::kThrowStackOverflow); + // Unreachable code. + __ break_(0xCC); } static void Generate_InterpreterPushArgs(MacroAssembler* masm, @@ -2170,7 +2173,7 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) { // -- a1 : the target to call (can be any Object). // ----------------------------------- - Label non_callable, non_function, non_smi; + Label non_callable, non_smi; __ JumpIfSmi(a1, &non_callable); __ bind(&non_smi); __ GetObjectType(a1, t1, t2); @@ -2184,12 +2187,11 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) { __ And(t1, t1, Operand(Map::IsCallableBit::kMask)); __ Branch(&non_callable, eq, t1, Operand(zero_reg)); - __ Branch(&non_function, ne, t2, Operand(JS_PROXY_TYPE)); - __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET); + __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), + RelocInfo::CODE_TARGET, eq, t2, Operand(JS_PROXY_TYPE)); // 2. Call to something else, which might have a [[Call]] internal method (if // not we raise an exception). - __ bind(&non_function); // Overwrite the original receiver with the (original) target. __ Dlsa(kScratchReg, sp, a0, kPointerSizeLog2); __ Sd(a1, MemOperand(kScratchReg)); diff --git a/src/compiler/backend/mips/code-generator-mips.cc b/src/compiler/backend/mips/code-generator-mips.cc index 7b922e7bc6..ee23402e69 100644 --- a/src/compiler/backend/mips/code-generator-mips.cc +++ b/src/compiler/backend/mips/code-generator-mips.cc @@ -2053,6 +2053,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31); break; } + case kMipsF32x4Sqrt: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kMipsF32x4RecipApprox: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); diff --git a/src/compiler/backend/mips/instruction-codes-mips.h b/src/compiler/backend/mips/instruction-codes-mips.h index e8020d9e89..af0774f468 100644 --- a/src/compiler/backend/mips/instruction-codes-mips.h +++ b/src/compiler/backend/mips/instruction-codes-mips.h @@ -159,6 +159,7 @@ namespace compiler { V(MipsI32x4MinU) \ V(MipsF32x4Abs) \ V(MipsF32x4Neg) \ + V(MipsF32x4Sqrt) \ V(MipsF32x4RecipApprox) \ V(MipsF32x4RecipSqrtApprox) \ V(MipsF32x4Add) \ diff --git a/src/compiler/backend/mips/instruction-scheduler-mips.cc b/src/compiler/backend/mips/instruction-scheduler-mips.cc index 4e6aef52f4..ba17ad2581 100644 --- a/src/compiler/backend/mips/instruction-scheduler-mips.cc +++ b/src/compiler/backend/mips/instruction-scheduler-mips.cc @@ -54,6 +54,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMipsF32x4Div: case kMipsF32x4Ne: case kMipsF32x4Neg: + case kMipsF32x4Sqrt: case kMipsF32x4RecipApprox: case kMipsF32x4RecipSqrtApprox: case kMipsF32x4ReplaceLane: diff --git a/src/compiler/backend/mips/instruction-selector-mips.cc b/src/compiler/backend/mips/instruction-selector-mips.cc index 49e7dfffde..7ee5c7c2c7 100644 --- a/src/compiler/backend/mips/instruction-selector-mips.cc +++ b/src/compiler/backend/mips/instruction-selector-mips.cc @@ -2018,6 +2018,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \ V(F32x4Abs, kMipsF32x4Abs) \ V(F32x4Neg, kMipsF32x4Neg) \ + V(F32x4Sqrt, kMipsF32x4Sqrt) \ V(F32x4RecipApprox, kMipsF32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \ V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \ diff --git a/src/compiler/backend/mips64/code-generator-mips64.cc b/src/compiler/backend/mips64/code-generator-mips64.cc index 170b79390f..9cec463e87 100644 --- a/src/compiler/backend/mips64/code-generator-mips64.cc +++ b/src/compiler/backend/mips64/code-generator-mips64.cc @@ -2248,6 +2248,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( __ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); break; } + case kMips64F32x4Sqrt: { + CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); + __ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); + break; + } case kMips64I32x4Neg: { CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); diff --git a/src/compiler/backend/mips64/instruction-codes-mips64.h b/src/compiler/backend/mips64/instruction-codes-mips64.h index edc8924757..bcf3532b57 100644 --- a/src/compiler/backend/mips64/instruction-codes-mips64.h +++ b/src/compiler/backend/mips64/instruction-codes-mips64.h @@ -189,6 +189,7 @@ namespace compiler { V(Mips64I32x4MinU) \ V(Mips64F32x4Abs) \ V(Mips64F32x4Neg) \ + V(Mips64F32x4Sqrt) \ V(Mips64F32x4RecipApprox) \ V(Mips64F32x4RecipSqrtApprox) \ V(Mips64F32x4Add) \ diff --git a/src/compiler/backend/mips64/instruction-scheduler-mips64.cc b/src/compiler/backend/mips64/instruction-scheduler-mips64.cc index 880b424c41..fe2d33d1db 100644 --- a/src/compiler/backend/mips64/instruction-scheduler-mips64.cc +++ b/src/compiler/backend/mips64/instruction-scheduler-mips64.cc @@ -82,6 +82,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kMips64F32x4Div: case kMips64F32x4Ne: case kMips64F32x4Neg: + case kMips64F32x4Sqrt: case kMips64F32x4RecipApprox: case kMips64F32x4RecipSqrtApprox: case kMips64F32x4ReplaceLane: diff --git a/src/compiler/backend/mips64/instruction-selector-mips64.cc b/src/compiler/backend/mips64/instruction-selector-mips64.cc index beb463ed15..dfc0ff5bad 100644 --- a/src/compiler/backend/mips64/instruction-selector-mips64.cc +++ b/src/compiler/backend/mips64/instruction-selector-mips64.cc @@ -2681,6 +2681,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4) \ V(F32x4Abs, kMips64F32x4Abs) \ V(F32x4Neg, kMips64F32x4Neg) \ + V(F32x4Sqrt, kMips64F32x4Sqrt) \ V(F32x4RecipApprox, kMips64F32x4RecipApprox) \ V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox) \ V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4) \