PPC [liftoff]: optimize unsigned byte reverse ops
This CL optimizes ByteReverse 16/32/64 ops on PPC<10 as well as PPC_10_PLUS. A 32bit sign extension is also added to `ByteRev32` in codegen. Change-Id: I6379ac4222f3574ab226971546238142039fe977 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3298308 Commit-Queue: Milad Fa <mfarazma@redhat.com> Reviewed-by: Junliang Yan <junyan@redhat.com> Cr-Commit-Position: refs/heads/main@{#78048}
This commit is contained in:
parent
50c808d4b3
commit
407922fad8
@ -3579,21 +3579,37 @@ void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
|
||||
addi(sp, sp, Operand(2 * kSimd128Size));
|
||||
}
|
||||
|
||||
void TurboAssembler::ByteReverseU16(Register dst, Register val) {
|
||||
subi(sp, sp, Operand(kSystemPointerSize));
|
||||
sth(val, MemOperand(sp));
|
||||
lhbrx(dst, MemOperand(r0, sp));
|
||||
addi(sp, sp, Operand(kSystemPointerSize));
|
||||
void TurboAssembler::ByteReverseU16(Register dst, Register val,
|
||||
Register scratch) {
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
brh(dst, val);
|
||||
ZeroExtHalfWord(dst, dst);
|
||||
return;
|
||||
}
|
||||
rlwinm(scratch, val, 8, 16, 23);
|
||||
rlwinm(dst, val, 24, 24, 31);
|
||||
orx(dst, scratch, dst);
|
||||
ZeroExtHalfWord(dst, dst);
|
||||
}
|
||||
|
||||
void TurboAssembler::ByteReverseU32(Register dst, Register val) {
|
||||
subi(sp, sp, Operand(kSystemPointerSize));
|
||||
stw(val, MemOperand(sp));
|
||||
lwbrx(dst, MemOperand(r0, sp));
|
||||
addi(sp, sp, Operand(kSystemPointerSize));
|
||||
void TurboAssembler::ByteReverseU32(Register dst, Register val,
|
||||
Register scratch) {
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
brw(dst, val);
|
||||
ZeroExtWord32(dst, dst);
|
||||
return;
|
||||
}
|
||||
rotlwi(scratch, val, 8);
|
||||
rlwimi(scratch, val, 24, 0, 7);
|
||||
rlwimi(scratch, val, 24, 16, 23);
|
||||
ZeroExtWord32(dst, dst);
|
||||
}
|
||||
|
||||
void TurboAssembler::ByteReverseU64(Register dst, Register val) {
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
brd(dst, val);
|
||||
return;
|
||||
}
|
||||
subi(sp, sp, Operand(kSystemPointerSize));
|
||||
std(val, MemOperand(sp));
|
||||
ldbrx(dst, MemOperand(r0, sp));
|
||||
@ -3826,7 +3842,7 @@ void TurboAssembler::ReverseBitsU64(Register dst, Register src,
|
||||
|
||||
void TurboAssembler::ReverseBitsU32(Register dst, Register src,
|
||||
Register scratch1, Register scratch2) {
|
||||
ByteReverseU32(dst, src);
|
||||
ByteReverseU32(dst, src, scratch1);
|
||||
for (int i = 4; i < 8; i++) {
|
||||
ReverseBitsInSingleByteU64(dst, dst, scratch1, scratch2, i);
|
||||
}
|
||||
|
@ -612,8 +612,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
|
||||
Simd128Register scratch);
|
||||
void SwapSimd128(MemOperand src, MemOperand dst, Simd128Register scratch);
|
||||
|
||||
void ByteReverseU16(Register dst, Register val);
|
||||
void ByteReverseU32(Register dst, Register val);
|
||||
void ByteReverseU16(Register dst, Register val, Register scratch);
|
||||
void ByteReverseU32(Register dst, Register val, Register scratch);
|
||||
void ByteReverseU64(Register dst, Register val);
|
||||
|
||||
// Before calling a C-function from generated code, align arguments on stack.
|
||||
|
@ -2075,6 +2075,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
||||
Register temp1 = r0;
|
||||
if (CpuFeatures::IsSupported(PPC_10_PLUS)) {
|
||||
__ brw(output, input);
|
||||
__ extsw(output, output);
|
||||
break;
|
||||
}
|
||||
__ rotlwi(temp1, input, 8);
|
||||
|
@ -559,9 +559,9 @@ constexpr bool is_be = false;
|
||||
case StoreType::kI64Store16: { \
|
||||
auto op_func = [&](Register dst, Register lhs, Register rhs) { \
|
||||
if (is_be) { \
|
||||
ByteReverseU16(dst, lhs); \
|
||||
ByteReverseU16(dst, lhs, r0); \
|
||||
instr(dst, dst, rhs); \
|
||||
ByteReverseU16(dst, dst); \
|
||||
ByteReverseU16(dst, dst, r0); \
|
||||
} else { \
|
||||
instr(dst, lhs, rhs); \
|
||||
} \
|
||||
@ -573,9 +573,9 @@ constexpr bool is_be = false;
|
||||
case StoreType::kI64Store32: { \
|
||||
auto op_func = [&](Register dst, Register lhs, Register rhs) { \
|
||||
if (is_be) { \
|
||||
ByteReverseU32(dst, lhs); \
|
||||
ByteReverseU32(dst, lhs, r0); \
|
||||
instr(dst, dst, rhs); \
|
||||
ByteReverseU32(dst, dst); \
|
||||
ByteReverseU32(dst, dst, r0); \
|
||||
} else { \
|
||||
instr(dst, lhs, rhs); \
|
||||
} \
|
||||
@ -657,9 +657,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
|
||||
case StoreType::kI32Store16:
|
||||
case StoreType::kI64Store16: {
|
||||
if (is_be) {
|
||||
ByteReverseU16(r0, value.gp());
|
||||
ByteReverseU16(r0, value.gp(), ip);
|
||||
TurboAssembler::AtomicExchange<uint16_t>(dst, r0, result.gp());
|
||||
ByteReverseU16(result.gp(), result.gp());
|
||||
ByteReverseU16(result.gp(), result.gp(), ip);
|
||||
} else {
|
||||
TurboAssembler::AtomicExchange<uint16_t>(dst, value.gp(), result.gp());
|
||||
}
|
||||
@ -668,9 +668,9 @@ void LiftoffAssembler::AtomicExchange(Register dst_addr, Register offset_reg,
|
||||
case StoreType::kI32Store:
|
||||
case StoreType::kI64Store32: {
|
||||
if (is_be) {
|
||||
ByteReverseU32(r0, value.gp());
|
||||
ByteReverseU32(r0, value.gp(), ip);
|
||||
TurboAssembler::AtomicExchange<uint32_t>(dst, r0, result.gp());
|
||||
ByteReverseU32(result.gp(), result.gp());
|
||||
ByteReverseU32(result.gp(), result.gp(), ip);
|
||||
} else {
|
||||
TurboAssembler::AtomicExchange<uint32_t>(dst, value.gp(), result.gp());
|
||||
}
|
||||
@ -719,11 +719,11 @@ void LiftoffAssembler::AtomicCompareExchange(
|
||||
case StoreType::kI64Store16: {
|
||||
if (is_be) {
|
||||
Push(new_value.gp(), expected.gp());
|
||||
ByteReverseU16(new_value.gp(), new_value.gp());
|
||||
ByteReverseU16(expected.gp(), expected.gp());
|
||||
ByteReverseU16(new_value.gp(), new_value.gp(), r0);
|
||||
ByteReverseU16(expected.gp(), expected.gp(), r0);
|
||||
TurboAssembler::AtomicCompareExchange<uint16_t>(
|
||||
dst, expected.gp(), new_value.gp(), result.gp(), r0);
|
||||
ByteReverseU16(result.gp(), result.gp());
|
||||
ByteReverseU16(result.gp(), result.gp(), r0);
|
||||
Pop(new_value.gp(), expected.gp());
|
||||
} else {
|
||||
TurboAssembler::AtomicCompareExchange<uint16_t>(
|
||||
@ -735,11 +735,11 @@ void LiftoffAssembler::AtomicCompareExchange(
|
||||
case StoreType::kI64Store32: {
|
||||
if (is_be) {
|
||||
Push(new_value.gp(), expected.gp());
|
||||
ByteReverseU32(new_value.gp(), new_value.gp());
|
||||
ByteReverseU32(expected.gp(), expected.gp());
|
||||
ByteReverseU32(new_value.gp(), new_value.gp(), r0);
|
||||
ByteReverseU32(expected.gp(), expected.gp(), r0);
|
||||
TurboAssembler::AtomicCompareExchange<uint32_t>(
|
||||
dst, expected.gp(), new_value.gp(), result.gp(), r0);
|
||||
ByteReverseU32(result.gp(), result.gp());
|
||||
ByteReverseU32(result.gp(), result.gp(), r0);
|
||||
Pop(new_value.gp(), expected.gp());
|
||||
} else {
|
||||
TurboAssembler::AtomicCompareExchange<uint32_t>(
|
||||
|
Loading…
Reference in New Issue
Block a user