Roll skia/third_party/skcms fa2161cf791f..c851ac96663d (4 commits)

https://skia.googlesource.com/skcms.git/+log/fa2161cf791f..c851ac96663d 2018-10-17 mtklein@google.com fix CMYK alpha channel 2018-10-17 mtklein@google.com add ->sRGB dump to profile tests 2018-10-17 mtklein@google.com fold together table ops 2018-10-17 mtklein@google.com fold together clut<8> and clut<16> The AutoRoll server is located here: https://autoroll.skia.org/r/skcms-skia-autoroll Documentation for the AutoRoller is here: https://skia.googlesource.com/buildbot/+/master/autoroll/README.md If the roll is causing failures, please contact the current sheriff, who should be CC'd on the roll, and stop the roller if necessary. CQ_INCLUDE_TRYBOTS=master.tryserver.blink:linux_trusty_blink_rel TBR=egdaniel@google.com Change-Id: I4dd588480037c200f73864719ccede99ca11d214 Reviewed-on: https://skia-review.googlesource.com/c/163065 Reviewed-by: skia-autoroll <skia-autoroll@skia-public.iam.gserviceaccount.com> Commit-Queue: skia-autoroll <skia-autoroll@skia-public.iam.gserviceaccount.com>
2018-10-17 15:29:14 +00:00 · 2018-10-17 15:29:14 +00:00 · 6fa9926f8b
commit 6fa9926f8b
parent 64021dd2c2
3 changed files with 46 additions and 123 deletions
--- a/third_party/skcms/skcms.cc
+++ b/third_party/skcms/skcms.cc
@ -1781,24 +1781,12 @@ typedef enum {
    Op_tf_b,
    Op_tf_a,

-    Op_table_8_r,
-    Op_table_8_g,
-    Op_table_8_b,
-    Op_table_8_a,
+    Op_table_r,
+    Op_table_g,
+    Op_table_b,
+    Op_table_a,

-    Op_table_16_r,
-    Op_table_16_g,
-    Op_table_16_b,
-    Op_table_16_a,
-
-    Op_clut_1D_8,
-    Op_clut_1D_16,
-    Op_clut_2D_8,
-    Op_clut_2D_16,
-    Op_clut_3D_8,
-    Op_clut_3D_16,
-    Op_clut_4D_8,
-    Op_clut_4D_16,
+    Op_clut,

    Op_store_a8,
    Op_store_g8,
@ -1971,11 +1959,11 @@ typedef struct {
 } OpAndArg;

 static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
-    static const struct { Op parametric, table_8, table_16; } ops[] = {
-        { Op_tf_r, Op_table_8_r, Op_table_16_r },
-        { Op_tf_g, Op_table_8_g, Op_table_16_g },
-        { Op_tf_b, Op_table_8_b, Op_table_16_b },
-        { Op_tf_a, Op_table_8_a, Op_table_16_a },
+    static const struct { Op parametric, table; } ops[] = {
+        { Op_tf_r, Op_table_r },
+        { Op_tf_g, Op_table_g },
+        { Op_tf_b, Op_table_b },
+        { Op_tf_a, Op_table_a },
    };

    const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
@ -1984,14 +1972,9 @@ static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
        return is_identity_tf(&curve->parametric)
            ? noop
            : OpAndArg{ ops[channel].parametric, &curve->parametric };
-    } else if (curve->table_8) {
-        return OpAndArg{ ops[channel].table_8,  curve };
-    } else if (curve->table_16) {
-        return OpAndArg{ ops[channel].table_16, curve };
    }

-    assert(false);
-    return noop;
+    return OpAndArg{ ops[channel].table, curve };
 }

 static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
@ -2135,13 +2118,7 @@ bool skcms_Transform(const void*             src,
                    }
                }
                *ops++ = Op_clamp;
-                switch (srcProfile->A2B.input_channels) {
-                    case 1: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_1D_8 : Op_clut_1D_16; break;
-                    case 2: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_2D_8 : Op_clut_2D_16; break;
-                    case 3: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_3D_8 : Op_clut_3D_16; break;
-                    case 4: *ops++ = srcProfile->A2B.grid_8 ? Op_clut_4D_8 : Op_clut_4D_16; break;
-                    default: return false;
-                }
+                *ops++ = Op_clut;
                *args++ = &srcProfile->A2B;
            }

--- a/third_party/skcms/src/Transform_inl.h
+++ b/third_party/skcms/src/Transform_inl.h
@ -476,7 +476,7 @@ SI F minus_1_ulp(F v) {
    return bit_pun<F>( bit_pun<I32>(v) - 1 );
 }

-SI F table_8(const skcms_Curve* curve, F v) {
+SI F table(const skcms_Curve* curve, F v) {
    // Clamp the input to [0,1], then scale to a table index.
    F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);

@ -489,30 +489,18 @@ SI F table_8(const skcms_Curve* curve, F v) {
    // the same as in 'l' or adjacent.  We have a rough idea that's it'd always be safe
    // to read adjacent entries and perhaps underflow the table by a byte or two
    // (it'd be junk, but always safe to read).  Not sure how to lerp yet.
-    F l = F_from_U8(gather_8(curve->table_8, lo)),
-      h = F_from_U8(gather_8(curve->table_8, hi));
+    F l,h;
+    if (curve->table_8) {
+        l = F_from_U8(gather_8(curve->table_8, lo));
+        h = F_from_U8(gather_8(curve->table_8, hi));
+    } else {
+        l = F_from_U16_BE(gather_16(curve->table_16, lo));
+        h = F_from_U16_BE(gather_16(curve->table_16, hi));
+    }
    return l + (h-l)*t;
 }

-SI F table_16(const skcms_Curve* curve, F v) {
-    // All just as in table_8() until the gathers.
-    F ix = max_(F0, min_(v, F1)) * (float)(curve->table_entries - 1);
-
-    I32 lo = cast<I32>(            ix      ),
-        hi = cast<I32>(minus_1_ulp(ix+1.0f));
-    F t = ix - cast<F>(lo);
-
-    // TODO: as above, load l and h simultaneously?
-    // Here we could even use AVX2-style 32-bit gathers.
-    F l = F_from_U16_BE(gather_16(curve->table_16, lo)),
-      h = F_from_U16_BE(gather_16(curve->table_16, hi));
-    return l + (h-l)*t;
-}
-
-template <int>
-static void sample_clut(const skcms_A2B*, I32 ix, F* r, F* g, F* b);
-
-template <> void sample_clut<8>(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b) {
+SI void sample_clut_8(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b) {
    U32 rgb = gather_24(a2b->grid_8, ix);

    *r = cast<F>((rgb >>  0) & 0xff) * (1/255.0f);
@ -520,7 +508,7 @@ template <> void sample_clut<8>(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b)
    *b = cast<F>((rgb >> 16) & 0xff) * (1/255.0f);
 }

-template <> void sample_clut<16>(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b) {
+SI void sample_clut_16(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b) {
 #if defined(__arm__)
    // This is up to 2x faster on 32-bit ARM than the #else-case fast path.
    *r = F_from_U16_BE(gather_16(a2b->grid_16, 3*ix+0));
@ -546,9 +534,9 @@ template <> void sample_clut<16>(const skcms_A2B* a2b, I32 ix, F* r, F* g, F* b)
    #define MAYBE_NOINLINE
 #endif

-template <int kBitDepth>
 MAYBE_NOINLINE
-static void clut(const skcms_A2B* a2b, int dim, F* r, F* g, F* b, F a) {
+static void clut(const skcms_A2B* a2b, F* r, F* g, F* b, F a) {
+    const int dim = (int)a2b->input_channels;
    assert (0 < dim && dim <= 4);

    // Each of these arrays is really foo[dim], but we use foo[4] since we know dim <= 4.
@ -574,10 +562,7 @@ static void clut(const skcms_A2B* a2b, int dim, F* r, F* g, F* b, F a) {
        }
    }

-    // It's sometimes a little faster to accumulate into R,G,B than into *r,*g,*b.
-    F R = F0,
-      G = F0,
-      B = F0;
+    *r = *g = *b = F0;

    // We'll sample 2^dim == 1<<dim table entries per pixel,
    // in all combinations of low and high in each dimension.
@ -595,17 +580,17 @@ static void clut(const skcms_A2B* a2b, int dim, F* r, F* g, F* b, F a) {
            }
        }

-        F sR,sG,sB;
-        sample_clut<kBitDepth>(a2b,ix, &sR,&sG,&sB);
+        F R,G,B;
+        if (a2b->grid_8) {
+            sample_clut_8 (a2b,ix, &R,&G,&B);
+        } else {
+            sample_clut_16(a2b,ix, &R,&G,&B);
+        }

-        R += w*sR;
-        G += w*sG;
-        B += w*sB;
+        *r += w*R;
+        *g += w*G;
+        *b += w*B;
    }
-
-    *r = R;
-    *g = G;
-    *b = B;
 }

 static void exec_ops(const Op* ops, const void** args,
@ -922,58 +907,19 @@ static void exec_ops(const Op* ops, const void** args,
            case Op_tf_b:{ b = apply_tf((const skcms_TransferFunction*)*args++, b); } break;
            case Op_tf_a:{ a = apply_tf((const skcms_TransferFunction*)*args++, a); } break;

-            case Op_table_8_r: { r = table_8((const skcms_Curve*)*args++, r); } break;
-            case Op_table_8_g: { g = table_8((const skcms_Curve*)*args++, g); } break;
-            case Op_table_8_b: { b = table_8((const skcms_Curve*)*args++, b); } break;
-            case Op_table_8_a: { a = table_8((const skcms_Curve*)*args++, a); } break;
+            case Op_table_r: { r = table((const skcms_Curve*)*args++, r); } break;
+            case Op_table_g: { g = table((const skcms_Curve*)*args++, g); } break;
+            case Op_table_b: { b = table((const skcms_Curve*)*args++, b); } break;
+            case Op_table_a: { a = table((const skcms_Curve*)*args++, a); } break;

-            case Op_table_16_r:{ r = table_16((const skcms_Curve*)*args++, r); } break;
-            case Op_table_16_g:{ g = table_16((const skcms_Curve*)*args++, g); } break;
-            case Op_table_16_b:{ b = table_16((const skcms_Curve*)*args++, b); } break;
-            case Op_table_16_a:{ a = table_16((const skcms_Curve*)*args++, a); } break;
-
-            case Op_clut_1D_8:{
+            case Op_clut: {
                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<8>(a2b, 1, &r,&g,&b,a);
-            } break;
+                clut(a2b, &r,&g,&b,a);

-            case Op_clut_1D_16:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<16>(a2b, 1,  &r,&g,&b,a);
-            } break;
-
-            case Op_clut_2D_8:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<8>(a2b, 2,  &r,&g,&b,a);
-            } break;
-
-            case Op_clut_2D_16:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<16>(a2b, 2,  &r,&g,&b,a);
-            } break;
-
-            case Op_clut_3D_8:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<8>(a2b, 3,  &r,&g,&b,a);
-            } break;
-
-            case Op_clut_3D_16:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<16>(a2b, 3,  &r,&g,&b,a);
-            } break;
-
-            case Op_clut_4D_8:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<8>(a2b, 4,  &r,&g,&b,a);
-                // 'a' was really a CMYK K, so our output is actually opaque.
-                a = F1;
-            } break;
-
-            case Op_clut_4D_16:{
-                const skcms_A2B* a2b = (const skcms_A2B*) *args++;
-                clut<16>(a2b, 4,  &r,&g,&b,a);
-                // 'a' was really a CMYK K, so our output is actually opaque.
-                a = F1;
+                if (a2b->input_channels == 4) {
+                    // CMYK is opaque.
+                    a = F1;
+                }
            } break;

    // Notice, from here on down the store_ ops all return, ending the loop.
--- a/third_party/skcms/version.sha1
+++ b/third_party/skcms/version.sha1
@ -1 +1 @@
-fa2161cf791f45a44ebcc84d04857a49edb90d01
+c851ac96663d44af3ea97c9d79543a670c7756d9