[x64] Improve loading float constants

Avoid loading to a general purpose register if the bit pattern consists
of a consecutive block of 1 bits.

Drive-by: Change a parameter from int8_t to byte such that the AVX_OP
macro works on these methods.

R=mstarzinger@chromium.org

Change-Id: Ib469ddd29d92ddeabe98460d2951b01159a6548a
Reviewed-on: https://chromium-review.googlesource.com/969123
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#52077}
This commit is contained in:
Clemens Hammacher 2018-03-20 17:40:53 +01:00 committed by Commit Bot
parent 61e4679906
commit 2ad4373b79
3 changed files with 31 additions and 29 deletions

View File

@ -1629,85 +1629,85 @@ class Assembler : public AssemblerBase {
void vlddqu(XMMRegister dst, Operand src) {
vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
}
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
emit(imm8);
}
void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
void vpextrb(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
emit(imm8);
}
void vpextrb(Operand dst, XMMRegister src, int8_t imm8) {
void vpextrb(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8);
}
void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
void vpextrw(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
emit(imm8);
}
void vpextrw(Operand dst, XMMRegister src, int8_t imm8) {
void vpextrw(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8);
}
void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
void vpextrd(Register dst, XMMRegister src, uint8_t imm8) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
emit(imm8);
}
void vpextrd(Operand dst, XMMRegister src, int8_t imm8) {
void vpextrd(Operand dst, XMMRegister src, uint8_t imm8) {
vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
emit(imm8);
}
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
emit(imm8);
}
void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
emit(imm8);
}
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
emit(imm8);
}
void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
emit(imm8);
}
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
emit(imm8);
}
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
emit(imm8);
}
void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8);
}

View File

@ -1144,15 +1144,19 @@ void TurboAssembler::MoveNumber(Register dst, double value) {
void TurboAssembler::Move(XMMRegister dst, uint32_t src) {
if (src == 0) {
Xorpd(dst, dst);
Xorps(dst, dst);
} else {
unsigned nlz = base::bits::CountLeadingZeros(src);
unsigned ntz = base::bits::CountTrailingZeros(src);
unsigned pop = base::bits::CountPopulation(src);
DCHECK_NE(0u, pop);
if (pop == 32) {
if (pop + ntz + nlz == 32) {
Pcmpeqd(dst, dst);
if (ntz) Pslld(dst, static_cast<byte>(ntz + nlz));
if (nlz) Psrld(dst, static_cast<byte>(nlz));
} else {
movl(kScratchRegister, Immediate(src));
Movq(dst, kScratchRegister);
Movd(dst, kScratchRegister);
}
}
}
@ -1165,14 +1169,10 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
unsigned ntz = base::bits::CountTrailingZeros(src);
unsigned pop = base::bits::CountPopulation(src);
DCHECK_NE(0u, pop);
if (pop == 64) {
if (pop + ntz + nlz == 64) {
Pcmpeqd(dst, dst);
} else if (pop + ntz == 64) {
Pcmpeqd(dst, dst);
Psllq(dst, static_cast<byte>(ntz));
} else if (pop + nlz == 64) {
Pcmpeqd(dst, dst);
Psrlq(dst, static_cast<byte>(nlz));
if (ntz) Psllq(dst, static_cast<byte>(ntz + nlz));
if (nlz) Psrlq(dst, static_cast<byte>(nlz));
} else {
uint32_t lower = static_cast<uint32_t>(src);
uint32_t upper = static_cast<uint32_t>(src >> 32);

View File

@ -187,7 +187,9 @@ class TurboAssembler : public Assembler {
AVX_OP(Movss, movss)
AVX_OP(Movsd, movsd)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pslld, pslld)
AVX_OP(Psllq, psllq)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)