[cleanup] Use brace initialization instead of a static cast

We change a bunch of static_cast to a cleaner and shorter brace
initialization. I did not change every use of static_cast in the files,
just those that relate to SIMD, so as to not cause churn in the
diff/blame.

Change-Id: I7e90c1b81f09a1e7a3ae7c9825db4fdbd21db919
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2159737
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67373}
This commit is contained in:
Ng Zhi An 2020-04-22 13:52:23 -07:00 committed by Commit Bot
parent 390ed4b934
commit 9f856f6375
2 changed files with 98 additions and 101 deletions

View File

@ -493,21 +493,21 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
__ cmov(zero, dst, tmp); \
} while (false)
#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
if (HasImmediateInput(instr, 1)) { \
__ opcode(dst, dst, static_cast<byte>(i.InputInt##width(1))); \
} else { \
XMMRegister tmp = i.TempSimd128Register(0); \
Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ mov(tmp_shift, i.InputRegister(1)); \
__ and_(tmp_shift, Immediate(mask)); \
__ Movd(tmp, tmp_shift); \
__ opcode(dst, dst, tmp); \
} \
#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
if (HasImmediateInput(instr, 1)) { \
__ opcode(dst, dst, byte{i.InputInt##width(1)}); \
} else { \
XMMRegister tmp = i.TempSimd128Register(0); \
Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ mov(tmp_shift, i.InputRegister(1)); \
__ and_(tmp_shift, Immediate(mask)); \
__ Movd(tmp, tmp_shift); \
__ opcode(dst, dst, tmp); \
} \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() {
@ -3168,7 +3168,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (HasImmediateInput(instr, 1)) {
// Perform 16-bit shift, then mask away low bits.
uint8_t shift = i.InputInt3(1);
__ Psllw(dst, dst, static_cast<byte>(shift));
__ Psllw(dst, dst, byte{shift});
uint8_t bmask = static_cast<uint8_t>(0xff << shift);
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
@ -3469,7 +3469,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (HasImmediateInput(instr, 1)) {
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = i.InputInt3(1);
__ Psrlw(dst, dst, static_cast<byte>(shift));
__ Psrlw(dst, dst, byte{shift});
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
@ -3715,7 +3715,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32S16x8LoadSplat: {
__ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<uint8_t>(0));
uint8_t{0});
__ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}

View File

@ -600,21 +600,21 @@ void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
// This macro will directly emit the opcode if the shift is an immediate - the
// shift value will be taken modulo 2^width. Otherwise, it will emit code to
// perform the modulus operation.
#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
if (HasImmediateInput(instr, 1)) { \
__ opcode(dst, static_cast<byte>(i.InputInt##width(1))); \
} else { \
XMMRegister tmp = i.TempSimd128Register(0); \
Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ movq(tmp_shift, i.InputRegister(1)); \
__ andq(tmp_shift, Immediate(mask)); \
__ Movq(tmp, tmp_shift); \
__ opcode(dst, tmp); \
} \
#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
if (HasImmediateInput(instr, 1)) { \
__ opcode(dst, byte{i.InputInt##width(1)}); \
} else { \
XMMRegister tmp = i.TempSimd128Register(0); \
Register tmp_shift = i.TempRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ movq(tmp_shift, i.InputRegister(1)); \
__ andq(tmp_shift, Immediate(mask)); \
__ Movq(tmp, tmp_shift); \
__ opcode(dst, tmp); \
} \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() {
@ -672,9 +672,9 @@ void AdjustStackPointerForTailCall(TurboAssembler* assembler,
void SetupShuffleMaskInTempRegister(TurboAssembler* assembler, uint32_t* mask,
XMMRegister tmp) {
uint64_t shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
uint64_t shuffle_mask = (mask[0]) | (uint64_t{mask[1]} << 32);
assembler->Move(tmp, shuffle_mask);
shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
shuffle_mask = (mask[2]) | (uint64_t{mask[3]} << 32);
assembler->movq(kScratchRegister, shuffle_mask);
assembler->Pinsrq(tmp, kScratchRegister, int8_t{1});
}
@ -2331,7 +2331,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// propagate -0's and NaNs, which may be non-canonical.
__ Orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
@ -2353,7 +2353,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
break;
@ -2412,7 +2412,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ Movss(dst, i.InputOperand(0));
}
__ Shufps(dst, dst, static_cast<byte>(0x0));
__ Shufps(dst, dst, byte{0x0});
break;
}
case kX64F32x4ExtractLane: {
@ -2441,12 +2441,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
XMMRegister dst = i.OutputSimd128Register();
__ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
__ Pblendw(kScratchDoubleReg, dst,
static_cast<uint8_t>(0x55)); // get lo 16 bits
__ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55}); // get lo 16 bits
__ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
__ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
__ Psrld(dst,
static_cast<byte>(1)); // divide by 2 to get in unsigned range
__ Psrld(dst, byte{1}); // divide by 2 to get in unsigned range
__ Cvtdq2ps(dst, dst); // convert hi exactly
__ Addps(dst, dst); // double hi, exactly
__ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
@ -2457,11 +2455,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Psrld(kScratchDoubleReg, static_cast<byte>(1));
__ Psrld(kScratchDoubleReg, byte{1});
__ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Psrld(dst, static_cast<byte>(1));
__ Psrld(dst, byte{1});
__ Andps(dst, i.InputSimd128Register(0));
}
break;
@ -2471,11 +2469,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ Pslld(kScratchDoubleReg, static_cast<byte>(31));
__ Pslld(kScratchDoubleReg, byte{31});
__ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ Pcmpeqd(dst, dst);
__ Pslld(dst, static_cast<byte>(31));
__ Pslld(dst, byte{31});
__ Xorps(dst, i.InputSimd128Register(0));
}
break;
@ -2529,9 +2527,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// propagate -0's and NaNs, which may be non-canonical.
__ Orps(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
__ Orps(kScratchDoubleReg, dst);
__ Psrld(dst, static_cast<byte>(10));
__ Psrld(dst, byte{10});
__ Andnps(dst, kScratchDoubleReg);
break;
}
@ -2551,21 +2549,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subps(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
__ Psrld(dst, static_cast<byte>(10));
__ Cmpps(dst, kScratchDoubleReg, int8_t{3});
__ Psrld(dst, byte{10});
__ Andnps(dst, kScratchDoubleReg);
break;
}
case kX64F32x4Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
static_cast<int8_t>(0x0));
int8_t{0x0});
break;
}
case kX64F32x4Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
static_cast<int8_t>(0x4));
int8_t{0x4});
break;
}
case kX64F32x4Lt: {
@ -2652,14 +2650,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Modulo 64 not required as sarq_cl will mask cl to 6 bits.
// lower quadword
__ Pextrq(tmp, src, static_cast<int8_t>(0x0));
__ Pextrq(tmp, src, int8_t{0x0});
__ sarq_cl(tmp);
__ Pinsrq(dst, tmp, static_cast<int8_t>(0x0));
__ Pinsrq(dst, tmp, int8_t{0x0});
// upper quadword
__ Pextrq(tmp, src, static_cast<int8_t>(0x1));
__ Pextrq(tmp, src, int8_t{0x1});
__ sarq_cl(tmp);
__ Pinsrq(dst, tmp, static_cast<int8_t>(0x1));
__ Pinsrq(dst, tmp, int8_t{0x1});
break;
}
case kX64I64x2Add: {
@ -2877,7 +2875,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ Movd(dst, i.InputOperand(0));
}
__ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
__ Pshufd(dst, dst, uint8_t{0x0});
break;
}
case kX64I32x4ExtractLane: {
@ -2907,7 +2905,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ Pand(tmp, dst);
__ Psrad(tmp, static_cast<byte>(31));
__ Psrad(tmp, byte{31});
// Set positive overflow lanes to 0x7FFFFFFF
__ Pxor(dst, tmp);
break;
@ -2918,7 +2916,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4SConvertI16x8High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
__ Pmovsxwd(dst, dst);
break;
}
@ -3000,7 +2998,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxps(dst, tmp2);
// scratch: float representation of max_signed
__ Pcmpeqd(tmp2, tmp2);
__ Psrld(tmp2, static_cast<uint8_t>(1)); // 0x7fffffff
__ Psrld(tmp2, uint8_t{1}); // 0x7fffffff
__ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
// tmp: convert (src-max_signed).
// Positive overflow lanes -> 0x7FFFFFFF
@ -3024,7 +3022,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4UConvertI16x8High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
__ Pmovzxwd(dst, dst);
break;
}
@ -3078,8 +3076,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ Movd(dst, i.InputOperand(0));
}
__ Pshuflw(dst, dst, static_cast<uint8_t>(0x0));
__ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
__ Pshuflw(dst, dst, uint8_t{0x0});
__ Pshufd(dst, dst, uint8_t{0x0});
break;
}
case kX64I16x8ExtractLaneU: {
@ -3108,7 +3106,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8SConvertI8x16High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
__ Pmovsxbw(dst, dst);
break;
}
@ -3199,7 +3197,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I16x8UConvertI8x16High: {
XMMRegister dst = i.OutputSimd128Register();
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
__ Pmovzxbw(dst, dst);
break;
}
@ -3319,13 +3317,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (HasImmediateInput(instr, 1)) {
// Perform 16-bit shift, then mask away low bits.
uint8_t shift = i.InputInt3(1);
__ Psllw(dst, static_cast<byte>(shift));
__ Psllw(dst, byte{shift});
uint8_t bmask = static_cast<uint8_t>(0xff << shift);
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
__ movl(tmp, Immediate(mask));
__ Movd(tmp_simd, tmp);
__ Pshufd(tmp_simd, tmp_simd, static_cast<uint8_t>(0));
__ Pshufd(tmp_simd, tmp_simd, uint8_t{0});
__ Pand(dst, tmp_simd);
} else {
// Mask off the unwanted bits before word-shifting.
@ -3403,10 +3401,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// s = 00BB 00BB ... 00BB 00BB
__ Movaps(tmp, dst);
__ Movaps(kScratchDoubleReg, right);
__ Psrlw(tmp, static_cast<byte>(8));
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
__ Psrlw(tmp, byte{8});
__ Psrlw(kScratchDoubleReg, byte{8});
// dst = left * 256
__ Psllw(dst, static_cast<byte>(8));
__ Psllw(dst, byte{8});
// t = I16x8Mul(t, s)
// => __PP __PP ... __PP __PP
__ Pmullw(tmp, kScratchDoubleReg);
@ -3415,10 +3413,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pmullw(dst, right);
// t = I16x8Shl(t, 8)
// => PP00 PP00 ... PP00 PP00
__ Psllw(tmp, static_cast<byte>(8));
__ Psllw(tmp, byte{8});
// dst = I16x8Shr(dst, 8)
// => 00pp 00pp ... 00pp 00pp
__ Psrlw(dst, static_cast<byte>(8));
__ Psrlw(dst, byte{8});
// dst = I16x8Or(dst, t)
// => PPpp PPpp ... PPpp PPpp
__ Por(dst, tmp);
@ -3469,13 +3467,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (HasImmediateInput(instr, 1)) {
// Perform 16-bit shift, then mask away high bits.
uint8_t shift = i.InputInt3(1);
__ Psrlw(dst, static_cast<byte>(shift));
__ Psrlw(dst, byte{shift});
uint8_t bmask = 0xff >> shift;
uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
__ movl(tmp, Immediate(mask));
__ Movd(tmp_simd, tmp);
__ Pshufd(tmp_simd, tmp_simd, static_cast<byte>(0));
__ Pshufd(tmp_simd, tmp_simd, byte{0});
__ Pand(dst, tmp_simd);
} else {
__ Punpckhbw(kScratchDoubleReg, dst);
@ -3587,8 +3585,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Out-of-range indices should return 0, add 112 so that any value > 15
// saturates to 128 (top bit set), so pshufb will zero that lane.
__ Move(mask, static_cast<uint32_t>(0x70707070));
__ Pshufd(mask, mask, static_cast<uint8_t>(0x0));
__ Move(mask, uint32_t{0x70707070});
__ Pshufd(mask, mask, uint8_t{0x0});
__ Paddusb(mask, i.InputSimd128Register(1));
__ Pshufb(dst, mask);
break;
@ -3649,7 +3647,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
__ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
__ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<uint8_t>(0));
uint8_t{0});
__ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
break;
}
@ -3661,7 +3659,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} else {
__ Movss(i.OutputSimd128Register(), i.MemoryOperand());
__ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
static_cast<byte>(0));
byte{0});
}
break;
}
@ -3745,10 +3743,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
if (lane < 4) {
ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
__ Pshufd(dst, dst, static_cast<uint8_t>(0));
__ Pshufd(dst, dst, uint8_t{0});
} else {
ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
__ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
__ Pshufd(dst, dst, uint8_t{0xaa});
}
break;
}
@ -3766,10 +3764,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
if (lane < 4) {
__ Pshuflw(dst, dst, half_dup);
__ Pshufd(dst, dst, static_cast<uint8_t>(0));
__ Pshufd(dst, dst, uint8_t{0});
} else {
__ Pshufhw(dst, dst, half_dup);
__ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
__ Pshufd(dst, dst, uint8_t{0xaa});
}
break;
}
@ -3803,10 +3801,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
if (instr->InputCount() == 2) {
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
__ Psrld(kScratchDoubleReg, static_cast<byte>(16));
__ Psrld(kScratchDoubleReg, byte{16});
src2 = kScratchDoubleReg;
}
__ Psrld(dst, static_cast<byte>(16));
__ Psrld(dst, byte{16});
__ Packusdw(dst, src2);
break;
}
@ -3816,11 +3814,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Pxor(kScratchDoubleReg, kScratchDoubleReg);
if (instr->InputCount() == 2) {
ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1,
static_cast<uint8_t>(0x55));
ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
src2 = kScratchDoubleReg;
}
__ Pblendw(dst, kScratchDoubleReg, static_cast<uint8_t>(0xaa));
__ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
__ Packusdw(dst, src2);
break;
}
@ -3830,10 +3827,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
if (instr->InputCount() == 2) {
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
__ Psrlw(kScratchDoubleReg, byte{8});
src2 = kScratchDoubleReg;
}
__ Psrlw(dst, static_cast<byte>(8));
__ Psrlw(dst, byte{8});
__ Packuswb(dst, src2);
break;
}
@ -3843,42 +3840,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0));
if (instr->InputCount() == 2) {
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
__ Psllw(kScratchDoubleReg, byte{8});
__ Psrlw(kScratchDoubleReg, byte{8});
src2 = kScratchDoubleReg;
}
__ Psllw(dst, static_cast<byte>(8));
__ Psrlw(dst, static_cast<byte>(8));
__ Psllw(dst, byte{8});
__ Psrlw(dst, byte{8});
__ Packuswb(dst, src2);
break;
}
case kX64S8x16TransposeLow: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Psllw(dst, static_cast<byte>(8));
__ Psllw(dst, byte{8});
if (instr->InputCount() == 1) {
__ Movups(kScratchDoubleReg, dst);
} else {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
__ Psllw(kScratchDoubleReg, byte{8});
}
__ Psrlw(dst, static_cast<byte>(8));
__ Psrlw(dst, byte{8});
__ Por(dst, kScratchDoubleReg);
break;
}
case kX64S8x16TransposeHigh: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
__ Psrlw(dst, static_cast<byte>(8));
__ Psrlw(dst, byte{8});
if (instr->InputCount() == 1) {
__ Movups(kScratchDoubleReg, dst);
} else {
DCHECK_EQ(2, instr->InputCount());
ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
__ Psrlw(kScratchDoubleReg, byte{8});
}
__ Psllw(kScratchDoubleReg, static_cast<byte>(8));
__ Psllw(kScratchDoubleReg, byte{8});
__ Por(dst, kScratchDoubleReg);
break;
}
@ -3895,8 +3892,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pshufhw(dst, dst, shuffle_mask);
}
__ Movaps(kScratchDoubleReg, dst);
__ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
__ Psllw(dst, static_cast<byte>(8));
__ Psrlw(kScratchDoubleReg, byte{8});
__ Psllw(dst, byte{8});
__ Por(dst, kScratchDoubleReg);
break;
}