From 3848427d884b72114854c8eef9662691f23fae7b Mon Sep 17 00:00:00 2001 From: mtklein Date: Fri, 7 Aug 2015 08:48:12 -0700 Subject: [PATCH] The compiler can generate smulbb perfectly well nowadays. BUG=skia:4117 Review URL: https://codereview.chromium.org/1273203002 --- include/core/SkColorPriv.h | 4 ++-- include/core/SkMath.h | 32 ++-------------------------- include/core/SkPreConfig.h | 6 ------ src/core/SkMathPriv.h | 4 ++-- src/core/SkScan_Antihair.cpp | 22 +++++++++---------- src/opts/SkBlitRow_opts_arm_neon.cpp | 6 +++--- src/opts/SkBlitRow_opts_mips_dsp.cpp | 9 +++----- 7 files changed, 23 insertions(+), 60 deletions(-) diff --git a/include/core/SkColorPriv.h b/include/core/SkColorPriv.h index f9c5d928a0..3dec49b73e 100644 --- a/include/core/SkColorPriv.h +++ b/include/core/SkColorPriv.h @@ -193,7 +193,7 @@ static inline unsigned Sk255To256(U8CPU value) { /** Multiplify value by 0..256, and shift the result down 8 (i.e. return (value * alpha256) >> 8) */ -#define SkAlphaMul(value, alpha256) (SkMulS16(value, alpha256) >> 8) +#define SkAlphaMul(value, alpha256) (((value) * (alpha256)) >> 8) // The caller may want negative values, so keep all params signed (int) // so we don't accidentally slip into unsigned math and lose the sign @@ -213,7 +213,7 @@ static inline int SkAlphaBlend255(S16CPU src, S16CPU dst, U8CPU alpha) { SkASSERT((int16_t)dst == dst); SkASSERT((uint8_t)alpha == alpha); - int prod = SkMulS16(src - dst, alpha) + 128; + int prod = (src - dst) * alpha + 128; prod = (prod + (prod >> 8)) >> 8; return dst + prod; } diff --git a/include/core/SkMath.h b/include/core/SkMath.h index d1d0e360d4..e5069592d0 100644 --- a/include/core/SkMath.h +++ b/include/core/SkMath.h @@ -156,34 +156,6 @@ template inline bool SkIsPow2(T value) { /////////////////////////////////////////////////////////////////////////////// -/** - * SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t. - * With this requirement, we can generate faster instructions on some - * architectures. - */ -#ifdef SK_ARM_HAS_EDSP - static inline int32_t SkMulS16(S16CPU x, S16CPU y) { - SkASSERT((int16_t)x == x); - SkASSERT((int16_t)y == y); - int32_t product; - asm("smulbb %0, %1, %2 \n" - : "=r"(product) - : "r"(x), "r"(y) - ); - return product; - } -#else - #ifdef SK_DEBUG - static inline int32_t SkMulS16(S16CPU x, S16CPU y) { - SkASSERT((int16_t)x == x); - SkASSERT((int16_t)y == y); - return x * y; - } - #else - #define SkMulS16(x, y) ((x) * (y)) - #endif -#endif - /** * Return a*b/((1 << shift) - 1), rounding any fractional bits. * Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8 @@ -192,7 +164,7 @@ static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { SkASSERT(a <= 32767); SkASSERT(b <= 32767); SkASSERT(shift > 0 && shift <= 8); - unsigned prod = SkMulS16(a, b) + (1 << (shift - 1)); + unsigned prod = a*b + (1 << (shift - 1)); return (prod + (prod >> shift)) >> shift; } @@ -203,7 +175,7 @@ static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) { static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) { SkASSERT(a <= 32767); SkASSERT(b <= 32767); - unsigned prod = SkMulS16(a, b) + 128; + unsigned prod = a*b + 128; return (prod + (prod >> 8)) >> 8; } diff --git a/include/core/SkPreConfig.h b/include/core/SkPreConfig.h index 19363427a0..7a849f566e 100644 --- a/include/core/SkPreConfig.h +++ b/include/core/SkPreConfig.h @@ -172,12 +172,6 @@ #else #define SK_ARM_ARCH 3 #endif - - #if defined(__thumb2__) && (SK_ARM_ARCH >= 6) \ - || !defined(__thumb__) && ((SK_ARM_ARCH > 5) || defined(__ARM_ARCH_5E__) \ - || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)) - #define SK_ARM_HAS_EDSP - #endif #endif #endif diff --git a/src/core/SkMathPriv.h b/src/core/SkMathPriv.h index 345815354c..b9184a0726 100644 --- a/src/core/SkMathPriv.h +++ b/src/core/SkMathPriv.h @@ -57,7 +57,7 @@ static inline unsigned SkClampUMax(unsigned value, unsigned max) { static inline U8CPU SkMulDiv255Trunc(U8CPU a, U8CPU b) { SkASSERT((uint8_t)a == a); SkASSERT((uint8_t)b == b); - unsigned prod = SkMulS16(a, b) + 1; + unsigned prod = a*b + 1; return (prod + (prod >> 8)) >> 8; } @@ -67,7 +67,7 @@ static inline U8CPU SkMulDiv255Trunc(U8CPU a, U8CPU b) { static inline U8CPU SkMulDiv255Ceiling(U8CPU a, U8CPU b) { SkASSERT((uint8_t)a == a); SkASSERT((uint8_t)b == b); - unsigned prod = SkMulS16(a, b) + 255; + unsigned prod = a*b + 255; return (prod + (prod >> 8)) >> 8; } diff --git a/src/core/SkScan_Antihair.cpp b/src/core/SkScan_Antihair.cpp index 546ced0072..2bcb4c64c1 100644 --- a/src/core/SkScan_Antihair.cpp +++ b/src/core/SkScan_Antihair.cpp @@ -34,7 +34,7 @@ static inline int SmallDot6Scale(int value, int dot6) { SkASSERT((int16_t)value == value); SkASSERT((unsigned)dot6 <= 64); - return SkMulS16(value, dot6) >> 6; + return (value * dot6) >> 6; } //#define TEST_GAMMA @@ -155,19 +155,19 @@ class Horish_SkAntiHairBlitter : public SkAntiHairBlitter { public: SkFixed drawCap(int x, SkFixed fy, SkFixed dy, int mod64) override { fy += SK_Fixed1/2; - + int lower_y = fy >> 16; uint8_t a = (uint8_t)(fy >> 8); unsigned a0 = SmallDot6Scale(255 - a, mod64); unsigned a1 = SmallDot6Scale(a, mod64); this->getBlitter()->blitAntiV2(x, lower_y - 1, a0, a1); - + return fy + dy - SK_Fixed1/2; } - + SkFixed drawLine(int x, int stopx, SkFixed fy, SkFixed dy) override { SkASSERT(x < stopx); - + fy += SK_Fixed1/2; SkBlitter* blitter = this->getBlitter(); do { @@ -176,7 +176,7 @@ public: blitter->blitAntiV2(x, lower_y - 1, 255 - a, a); fy += dy; } while (++x < stopx); - + return fy - SK_Fixed1/2; } }; @@ -226,15 +226,15 @@ class Vertish_SkAntiHairBlitter : public SkAntiHairBlitter { public: SkFixed drawCap(int y, SkFixed fx, SkFixed dx, int mod64) override { fx += SK_Fixed1/2; - + int x = fx >> 16; uint8_t a = (uint8_t)(fx >> 8); this->getBlitter()->blitAntiH2(x - 1, y, SmallDot6Scale(255 - a, mod64), SmallDot6Scale(a, mod64)); - + return fx + dx - SK_Fixed1/2; } - + SkFixed drawLine(int y, int stopy, SkFixed fx, SkFixed dx) override { SkASSERT(y < stopy); fx += SK_Fixed1/2; @@ -244,7 +244,7 @@ public: this->getBlitter()->blitAntiH2(x - 1, y, 255 - a, a); fx += dx; } while (++y < stopy); - + return fx - SK_Fixed1/2; } }; @@ -540,7 +540,7 @@ void SkScan::AntiHairLineRgn(const SkPoint array[], int arrayCount, const SkRegi clipBounds.set(clip->getBounds()); /* We perform integral clipping later on, but we do a scalar clip first to ensure that our coordinates are expressible in fixed/integers. - + antialiased hairlines can draw up to 1/2 of a pixel outside of their bounds, so we need to outset the clip before calling the clipper. To make the numerics safer, we outset by a whole pixel, diff --git a/src/opts/SkBlitRow_opts_arm_neon.cpp b/src/opts/SkBlitRow_opts_arm_neon.cpp index ca67469808..4db82a5070 100644 --- a/src/opts/SkBlitRow_opts_arm_neon.cpp +++ b/src/opts/SkBlitRow_opts_arm_neon.cpp @@ -714,9 +714,9 @@ void S32A_D565_Blend_neon(uint16_t* SK_RESTRICT dst, if (sc) { uint16_t dc = *dst; unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); - unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + SkMulS16(SkGetPackedR16(dc), dst_scale); - unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + SkMulS16(SkGetPackedG16(dc), dst_scale); - unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + SkMulS16(SkGetPackedB16(dc), dst_scale); + unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale); + unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale); + unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale); *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); } dst += 1; diff --git a/src/opts/SkBlitRow_opts_mips_dsp.cpp b/src/opts/SkBlitRow_opts_mips_dsp.cpp index 869a04a4af..c6747f0960 100644 --- a/src/opts/SkBlitRow_opts_mips_dsp.cpp +++ b/src/opts/SkBlitRow_opts_mips_dsp.cpp @@ -753,12 +753,9 @@ static void S32A_D565_Blend_mips_dsp(uint16_t* SK_RESTRICT dst, if (sc) { uint16_t dc = *dst; unsigned dst_scale = 255 - SkMulDiv255Round(SkGetPackedA32(sc), alpha); - unsigned dr = SkMulS16(SkPacked32ToR16(sc), alpha) + - SkMulS16(SkGetPackedR16(dc), dst_scale); - unsigned dg = SkMulS16(SkPacked32ToG16(sc), alpha) + - SkMulS16(SkGetPackedG16(dc), dst_scale); - unsigned db = SkMulS16(SkPacked32ToB16(sc), alpha) + - SkMulS16(SkGetPackedB16(dc), dst_scale); + unsigned dr = (SkPacked32ToR16(sc) * alpha) + (SkGetPackedR16(dc) * dst_scale); + unsigned dg = (SkPacked32ToG16(sc) * alpha) + (SkGetPackedG16(dc) * dst_scale); + unsigned db = (SkPacked32ToB16(sc) * alpha) + (SkGetPackedB16(dc) * dst_scale); *dst = SkPackRGB16(SkDiv255Round(dr), SkDiv255Round(dg), SkDiv255Round(db)); } dst += 1;