[x64] Improve loading float constants
Avoid loading to a general purpose register if the bit pattern consists of a consecutive block of 1 bits. Drive-by: Change a parameter from int8_t to byte such that the AVX_OP macro works on these methods. R=mstarzinger@chromium.org Change-Id: Ib469ddd29d92ddeabe98460d2951b01159a6548a Reviewed-on: https://chromium-review.googlesource.com/969123 Commit-Queue: Clemens Hammacher <clemensh@chromium.org> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org> Cr-Commit-Position: refs/heads/master@{#52077}
This commit is contained in:
parent
61e4679906
commit
2ad4373b79
@ -1629,85 +1629,85 @@ class Assembler : public AssemblerBase {
|
||||
void vlddqu(XMMRegister dst, Operand src) {
|
||||
vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
|
||||
}
|
||||
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrb(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
XMMRegister idst = XMMRegister::from_code(dst.code());
|
||||
vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrb(Operand dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrb(Operand dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrw(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
XMMRegister idst = XMMRegister::from_code(dst.code());
|
||||
vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrw(Operand dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrw(Operand dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrd(Register dst, XMMRegister src, uint8_t imm8) {
|
||||
XMMRegister idst = XMMRegister::from_code(dst.code());
|
||||
vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpextrd(Operand dst, XMMRegister src, int8_t imm8) {
|
||||
void vpextrd(Operand dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
||||
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
|
||||
XMMRegister isrc = XMMRegister::from_code(src2.code());
|
||||
vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
|
||||
void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
|
||||
vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
||||
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
|
||||
XMMRegister isrc = XMMRegister::from_code(src2.code());
|
||||
vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
|
||||
void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
|
||||
vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
|
||||
XMMRegister isrc = XMMRegister::from_code(src2.code());
|
||||
vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
|
||||
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
|
||||
vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
|
||||
emit(imm8);
|
||||
}
|
||||
void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
|
||||
void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
|
||||
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
|
||||
emit(imm8);
|
||||
}
|
||||
|
@ -1144,15 +1144,19 @@ void TurboAssembler::MoveNumber(Register dst, double value) {
|
||||
|
||||
void TurboAssembler::Move(XMMRegister dst, uint32_t src) {
|
||||
if (src == 0) {
|
||||
Xorpd(dst, dst);
|
||||
Xorps(dst, dst);
|
||||
} else {
|
||||
unsigned nlz = base::bits::CountLeadingZeros(src);
|
||||
unsigned ntz = base::bits::CountTrailingZeros(src);
|
||||
unsigned pop = base::bits::CountPopulation(src);
|
||||
DCHECK_NE(0u, pop);
|
||||
if (pop == 32) {
|
||||
if (pop + ntz + nlz == 32) {
|
||||
Pcmpeqd(dst, dst);
|
||||
if (ntz) Pslld(dst, static_cast<byte>(ntz + nlz));
|
||||
if (nlz) Psrld(dst, static_cast<byte>(nlz));
|
||||
} else {
|
||||
movl(kScratchRegister, Immediate(src));
|
||||
Movq(dst, kScratchRegister);
|
||||
Movd(dst, kScratchRegister);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1165,14 +1169,10 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
|
||||
unsigned ntz = base::bits::CountTrailingZeros(src);
|
||||
unsigned pop = base::bits::CountPopulation(src);
|
||||
DCHECK_NE(0u, pop);
|
||||
if (pop == 64) {
|
||||
if (pop + ntz + nlz == 64) {
|
||||
Pcmpeqd(dst, dst);
|
||||
} else if (pop + ntz == 64) {
|
||||
Pcmpeqd(dst, dst);
|
||||
Psllq(dst, static_cast<byte>(ntz));
|
||||
} else if (pop + nlz == 64) {
|
||||
Pcmpeqd(dst, dst);
|
||||
Psrlq(dst, static_cast<byte>(nlz));
|
||||
if (ntz) Psllq(dst, static_cast<byte>(ntz + nlz));
|
||||
if (nlz) Psrlq(dst, static_cast<byte>(nlz));
|
||||
} else {
|
||||
uint32_t lower = static_cast<uint32_t>(src);
|
||||
uint32_t upper = static_cast<uint32_t>(src >> 32);
|
||||
|
@ -187,7 +187,9 @@ class TurboAssembler : public Assembler {
|
||||
AVX_OP(Movss, movss)
|
||||
AVX_OP(Movsd, movsd)
|
||||
AVX_OP(Pcmpeqd, pcmpeqd)
|
||||
AVX_OP(Pslld, pslld)
|
||||
AVX_OP(Psllq, psllq)
|
||||
AVX_OP(Psrld, psrld)
|
||||
AVX_OP(Psrlq, psrlq)
|
||||
AVX_OP(Addsd, addsd)
|
||||
AVX_OP(Mulsd, mulsd)
|
||||
|
Loading…
Reference in New Issue
Block a user