change of parameter from [0, 1) to [-1, 1) for better lerp
Using the extra bit improves the difference rate from .38% to .19%. I think this uses the full resolution of all the values, and there is no more accuracy by trying to get more bits in use. The maximum difference is still 1. This gives the same answer with the neon_vqrdmulhq_s16 and ssse3_mm_mulhrs_epi16 instructions. Change-Id: Ie801b68aead408c5e1845aaf94ea660121e414b9 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/447719 Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
56efcf2d5b
commit
fe678d9b1c
@ -59,6 +59,20 @@ static int16_t ssse3_lerp(float t, int16_t a, int16_t b) {
|
||||
return (answer[0] + half) >> logPixelScale;
|
||||
}
|
||||
|
||||
// Change of parameters on t from [0, 1) to [-1, 1). This cuts the number if differences in half.
|
||||
template <int logPixelScale>
|
||||
static int16_t balanced_lerp(float t, int16_t a, int16_t b) {
|
||||
const int16_t half = 1 << logPixelScale;
|
||||
// t on [-1, 1).
|
||||
Q15 qt (floor(t * 65536.0f - 32768.0f + 0.5f));
|
||||
// need to pick logPixelScale to scale by addition 1/2.
|
||||
Q15 qw ((b - a) << logPixelScale);
|
||||
Q15 qm ((a + b) << logPixelScale);
|
||||
Q15 answer = simulate_ssse3_mm_mulhrs_epi16(qt, qw) + qm;
|
||||
// Extra shift to divide by 2.
|
||||
return (answer[0] + half) >> (logPixelScale + 1);
|
||||
}
|
||||
|
||||
template <typename Lerp>
|
||||
static Stats check_lerp(Lerp lerp) {
|
||||
Stats stats;
|
||||
@ -112,6 +126,10 @@ int main() {
|
||||
stats = check_scaled_lerp(ssse3_lerp<1>);
|
||||
stats.print();
|
||||
|
||||
printf("\nInterval [-1, 1) mm_mulhrs_epi16...\n");
|
||||
stats = check_lerp(balanced_lerp<7>);
|
||||
stats.print();
|
||||
|
||||
printf("Done.");
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user