[mips][builtin] Small optimization.

port dcf3b66 https://crrev.com/c/1825222
port 7675b95 https://crrev.com/c/1826724

[mips][wasm-simd] Implement f32x4.sqrt.

port 36f2ec1 https://crrev.com/c/1808045

Change-Id: Ib714d56f2b0062d5013364eeea30294cf0b92cd4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1826588
Reviewed-by: Santiago Aboy Solanes <solanes@chromium.org>
Reviewed-by: Zhi An Ng <zhin@chromium.org>
Commit-Queue: Yu Yin <xwafish@gmail.com>
Cr-Commit-Position: refs/heads/master@{#64030}
This commit is contained in:
Yu Yin 2019-09-27 12:38:03 +08:00 committed by Commit Bot
parent cf1925b0f8
commit 71f7cf7dc4
10 changed files with 36 additions and 16 deletions

View File

@ -1085,18 +1085,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ Push(kInterpreterBytecodeArrayRegister, t0); __ Push(kInterpreterBytecodeArrayRegister, t0);
// Allocate the local and temporary register file on the stack. // Allocate the local and temporary register file on the stack.
Label stack_overflow;
{ {
// Load frame size from the BytecodeArray object. // Load frame size from the BytecodeArray object.
__ lw(t0, FieldMemOperand(kInterpreterBytecodeArrayRegister, __ lw(t0, FieldMemOperand(kInterpreterBytecodeArrayRegister,
BytecodeArray::kFrameSizeOffset)); BytecodeArray::kFrameSizeOffset));
// Do a stack check to ensure we don't go over the limit. // Do a stack check to ensure we don't go over the limit.
Label ok;
__ Subu(t1, sp, Operand(t0)); __ Subu(t1, sp, Operand(t0));
LoadRealStackLimit(masm, a2); LoadRealStackLimit(masm, a2);
__ Branch(&ok, hs, t1, Operand(a2)); __ Branch(&stack_overflow, lo, t1, Operand(a2));
__ CallRuntime(Runtime::kThrowStackOverflow);
__ bind(&ok);
// If ok, push undefined as the initial value for all register file entries. // If ok, push undefined as the initial value for all register file entries.
Label loop_header; Label loop_header;
@ -1169,6 +1167,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy); GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy);
// Unreachable code. // Unreachable code.
__ break_(0xCC); __ break_(0xCC);
__ bind(&stack_overflow);
__ CallRuntime(Runtime::kThrowStackOverflow);
// Unreachable code.
__ break_(0xCC);
} }
static void Generate_InterpreterPushArgs(MacroAssembler* masm, static void Generate_InterpreterPushArgs(MacroAssembler* masm,
@ -2131,7 +2134,7 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) {
// -- a1 : the target to call (can be any Object). // -- a1 : the target to call (can be any Object).
// ----------------------------------- // -----------------------------------
Label non_callable, non_function, non_smi; Label non_callable, non_smi;
__ JumpIfSmi(a1, &non_callable); __ JumpIfSmi(a1, &non_callable);
__ bind(&non_smi); __ bind(&non_smi);
__ GetObjectType(a1, t1, t2); __ GetObjectType(a1, t1, t2);
@ -2146,12 +2149,11 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) {
__ Branch(&non_callable, eq, t1, Operand(zero_reg)); __ Branch(&non_callable, eq, t1, Operand(zero_reg));
// Check if target is a proxy and call CallProxy external builtin // Check if target is a proxy and call CallProxy external builtin
__ Branch(&non_function, ne, t2, Operand(JS_PROXY_TYPE)); __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy),
__ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET); RelocInfo::CODE_TARGET, eq, t2, Operand(JS_PROXY_TYPE));
// 2. Call to something else, which might have a [[Call]] internal method (if // 2. Call to something else, which might have a [[Call]] internal method (if
// not we raise an exception). // not we raise an exception).
__ bind(&non_function);
// Overwrite the original receiver with the (original) target. // Overwrite the original receiver with the (original) target.
__ Lsa(kScratchReg, sp, a0, kPointerSizeLog2); __ Lsa(kScratchReg, sp, a0, kPointerSizeLog2);
__ sw(a1, MemOperand(kScratchReg)); __ sw(a1, MemOperand(kScratchReg));

View File

@ -1103,18 +1103,16 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
__ Push(kInterpreterBytecodeArrayRegister, a4); __ Push(kInterpreterBytecodeArrayRegister, a4);
// Allocate the local and temporary register file on the stack. // Allocate the local and temporary register file on the stack.
Label stack_overflow;
{ {
// Load frame size (word) from the BytecodeArray object. // Load frame size (word) from the BytecodeArray object.
__ Lw(a4, FieldMemOperand(kInterpreterBytecodeArrayRegister, __ Lw(a4, FieldMemOperand(kInterpreterBytecodeArrayRegister,
BytecodeArray::kFrameSizeOffset)); BytecodeArray::kFrameSizeOffset));
// Do a stack check to ensure we don't go over the limit. // Do a stack check to ensure we don't go over the limit.
Label ok;
__ Dsubu(a5, sp, Operand(a4)); __ Dsubu(a5, sp, Operand(a4));
LoadRealStackLimit(masm, a2); LoadRealStackLimit(masm, a2);
__ Branch(&ok, hs, a5, Operand(a2)); __ Branch(&stack_overflow, lo, a5, Operand(a2));
__ CallRuntime(Runtime::kThrowStackOverflow);
__ bind(&ok);
// If ok, push undefined as the initial value for all register file entries. // If ok, push undefined as the initial value for all register file entries.
Label loop_header; Label loop_header;
@ -1188,6 +1186,11 @@ void Builtins::Generate_InterpreterEntryTrampoline(MacroAssembler* masm) {
GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy); GenerateTailCallToReturnedCode(masm, Runtime::kCompileLazy);
// Unreachable code. // Unreachable code.
__ break_(0xCC); __ break_(0xCC);
__ bind(&stack_overflow);
__ CallRuntime(Runtime::kThrowStackOverflow);
// Unreachable code.
__ break_(0xCC);
} }
static void Generate_InterpreterPushArgs(MacroAssembler* masm, static void Generate_InterpreterPushArgs(MacroAssembler* masm,
@ -2170,7 +2173,7 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) {
// -- a1 : the target to call (can be any Object). // -- a1 : the target to call (can be any Object).
// ----------------------------------- // -----------------------------------
Label non_callable, non_function, non_smi; Label non_callable, non_smi;
__ JumpIfSmi(a1, &non_callable); __ JumpIfSmi(a1, &non_callable);
__ bind(&non_smi); __ bind(&non_smi);
__ GetObjectType(a1, t1, t2); __ GetObjectType(a1, t1, t2);
@ -2184,12 +2187,11 @@ void Builtins::Generate_Call(MacroAssembler* masm, ConvertReceiverMode mode) {
__ And(t1, t1, Operand(Map::IsCallableBit::kMask)); __ And(t1, t1, Operand(Map::IsCallableBit::kMask));
__ Branch(&non_callable, eq, t1, Operand(zero_reg)); __ Branch(&non_callable, eq, t1, Operand(zero_reg));
__ Branch(&non_function, ne, t2, Operand(JS_PROXY_TYPE)); __ Jump(BUILTIN_CODE(masm->isolate(), CallProxy),
__ Jump(BUILTIN_CODE(masm->isolate(), CallProxy), RelocInfo::CODE_TARGET); RelocInfo::CODE_TARGET, eq, t2, Operand(JS_PROXY_TYPE));
// 2. Call to something else, which might have a [[Call]] internal method (if // 2. Call to something else, which might have a [[Call]] internal method (if
// not we raise an exception). // not we raise an exception).
__ bind(&non_function);
// Overwrite the original receiver with the (original) target. // Overwrite the original receiver with the (original) target.
__ Dlsa(kScratchReg, sp, a0, kPointerSizeLog2); __ Dlsa(kScratchReg, sp, a0, kPointerSizeLog2);
__ Sd(a1, MemOperand(kScratchReg)); __ Sd(a1, MemOperand(kScratchReg));

View File

@ -2053,6 +2053,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31); __ bnegi_w(i.OutputSimd128Register(), i.InputSimd128Register(0), 31);
break; break;
} }
case kMipsF32x4Sqrt: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kMipsF32x4RecipApprox: { case kMipsF32x4RecipApprox: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ frcp_w(i.OutputSimd128Register(), i.InputSimd128Register(0));

View File

@ -159,6 +159,7 @@ namespace compiler {
V(MipsI32x4MinU) \ V(MipsI32x4MinU) \
V(MipsF32x4Abs) \ V(MipsF32x4Abs) \
V(MipsF32x4Neg) \ V(MipsF32x4Neg) \
V(MipsF32x4Sqrt) \
V(MipsF32x4RecipApprox) \ V(MipsF32x4RecipApprox) \
V(MipsF32x4RecipSqrtApprox) \ V(MipsF32x4RecipSqrtApprox) \
V(MipsF32x4Add) \ V(MipsF32x4Add) \

View File

@ -54,6 +54,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsF32x4Div: case kMipsF32x4Div:
case kMipsF32x4Ne: case kMipsF32x4Ne:
case kMipsF32x4Neg: case kMipsF32x4Neg:
case kMipsF32x4Sqrt:
case kMipsF32x4RecipApprox: case kMipsF32x4RecipApprox:
case kMipsF32x4RecipSqrtApprox: case kMipsF32x4RecipSqrtApprox:
case kMipsF32x4ReplaceLane: case kMipsF32x4ReplaceLane:

View File

@ -2018,6 +2018,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \ V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \
V(F32x4Abs, kMipsF32x4Abs) \ V(F32x4Abs, kMipsF32x4Abs) \
V(F32x4Neg, kMipsF32x4Neg) \ V(F32x4Neg, kMipsF32x4Neg) \
V(F32x4Sqrt, kMipsF32x4Sqrt) \
V(F32x4RecipApprox, kMipsF32x4RecipApprox) \ V(F32x4RecipApprox, kMipsF32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \ V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \
V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \ V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \

View File

@ -2248,6 +2248,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ ftrunc_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
break; break;
} }
case kMips64F32x4Sqrt: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fsqrt_w(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kMips64I32x4Neg: { case kMips64I32x4Neg: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero); __ xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);

View File

@ -189,6 +189,7 @@ namespace compiler {
V(Mips64I32x4MinU) \ V(Mips64I32x4MinU) \
V(Mips64F32x4Abs) \ V(Mips64F32x4Abs) \
V(Mips64F32x4Neg) \ V(Mips64F32x4Neg) \
V(Mips64F32x4Sqrt) \
V(Mips64F32x4RecipApprox) \ V(Mips64F32x4RecipApprox) \
V(Mips64F32x4RecipSqrtApprox) \ V(Mips64F32x4RecipSqrtApprox) \
V(Mips64F32x4Add) \ V(Mips64F32x4Add) \

View File

@ -82,6 +82,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64F32x4Div: case kMips64F32x4Div:
case kMips64F32x4Ne: case kMips64F32x4Ne:
case kMips64F32x4Neg: case kMips64F32x4Neg:
case kMips64F32x4Sqrt:
case kMips64F32x4RecipApprox: case kMips64F32x4RecipApprox:
case kMips64F32x4RecipSqrtApprox: case kMips64F32x4RecipSqrtApprox:
case kMips64F32x4ReplaceLane: case kMips64F32x4ReplaceLane:

View File

@ -2681,6 +2681,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4) \ V(F32x4UConvertI32x4, kMips64F32x4UConvertI32x4) \
V(F32x4Abs, kMips64F32x4Abs) \ V(F32x4Abs, kMips64F32x4Abs) \
V(F32x4Neg, kMips64F32x4Neg) \ V(F32x4Neg, kMips64F32x4Neg) \
V(F32x4Sqrt, kMips64F32x4Sqrt) \
V(F32x4RecipApprox, kMips64F32x4RecipApprox) \ V(F32x4RecipApprox, kMips64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox) \ V(F32x4RecipSqrtApprox, kMips64F32x4RecipSqrtApprox) \
V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4) \ V(I32x4SConvertF32x4, kMips64I32x4SConvertF32x4) \