Roll skia/third_party/skcms f3e50cfac5bc..1ce6da5c8d99 (1 commits)

https://skia.googlesource.com/skcms.git/+log/f3e50cfac5bc..1ce6da5c8d99

2018-10-18 mtklein@google.com add paletted source support


The AutoRoll server is located here: https://autoroll.skia.org/r/skcms-skia-autoroll

Documentation for the AutoRoller is here:
https://skia.googlesource.com/buildbot/+/master/autoroll/README.md

If the roll is causing failures, please contact the current sheriff, who should
be CC'd on the roll, and stop the roller if necessary.



CQ_INCLUDE_TRYBOTS=master.tryserver.blink:linux_trusty_blink_rel
TBR=egdaniel@google.com

Change-Id: Idf0112765cdb5a87cdfe0b3a9ad6455783ba0ec8
Reviewed-on: https://skia-review.googlesource.com/c/163508
Reviewed-by: skia-autoroll <skia-autoroll@skia-public.iam.gserviceaccount.com>
Commit-Queue: skia-autoroll <skia-autoroll@skia-public.iam.gserviceaccount.com>
This commit is contained in:
skia-autoroll@skia-public.iam.gserviceaccount.com 2018-10-18 19:20:14 +00:00 committed by Skia Commit-Bot
parent 6cfcc58eb7
commit 1cb0e3a86d
5 changed files with 126 additions and 46 deletions

View File

@ -1752,6 +1752,7 @@ bool skcms_ApproximateCurve(const skcms_Curve* curve,
typedef enum {
Op_load_a8,
Op_load_g8,
Op_load_8888_palette8,
Op_load_4444,
Op_load_565,
Op_load_888,
@ -1979,21 +1980,22 @@ static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
switch (fmt >> 1) { // ignore rgb/bgr
case skcms_PixelFormat_A_8 >> 1: return 1;
case skcms_PixelFormat_G_8 >> 1: return 1;
case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
case skcms_PixelFormat_RGB_565 >> 1: return 2;
case skcms_PixelFormat_RGB_888 >> 1: return 3;
case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
case skcms_PixelFormat_RGB_hhh >> 1: return 6;
case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
case skcms_PixelFormat_RGB_fff >> 1: return 12;
case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
case skcms_PixelFormat_A_8 >> 1: return 1;
case skcms_PixelFormat_G_8 >> 1: return 1;
case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
case skcms_PixelFormat_RGB_565 >> 1: return 2;
case skcms_PixelFormat_RGB_888 >> 1: return 3;
case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
case skcms_PixelFormat_RGB_hhh >> 1: return 6;
case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
case skcms_PixelFormat_RGB_fff >> 1: return 12;
case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
}
assert(false);
return 0;
@ -2025,7 +2027,22 @@ bool skcms_Transform(const void* src,
skcms_PixelFormat dstFmt,
skcms_AlphaFormat dstAlpha,
const skcms_ICCProfile* dstProfile,
size_t nz) {
size_t npixels) {
return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
dst, dstFmt, dstAlpha, dstProfile,
npixels, nullptr);
}
bool skcms_TransformWithPalette(const void* src,
skcms_PixelFormat srcFmt,
skcms_AlphaFormat srcAlpha,
const skcms_ICCProfile* srcProfile,
void* dst,
skcms_PixelFormat dstFmt,
skcms_AlphaFormat dstAlpha,
const skcms_ICCProfile* dstProfile,
size_t nz,
const void* palette) {
const size_t dst_bpp = bytes_per_pixel(dstFmt),
src_bpp = bytes_per_pixel(srcFmt);
// Let's just refuse if the request is absurdly big.
@ -2048,6 +2065,10 @@ bool skcms_Transform(const void* src,
}
// TODO: more careful alias rejection (like, dst == src + 1)?
if (needs_palette(srcFmt) && !palette) {
return false;
}
Op program [32];
const void* arguments[32];
@ -2074,6 +2095,10 @@ bool skcms_Transform(const void* src,
case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
*args++ = palette;
break;
}
if (srcFmt & 1) {
*ops++ = Op_swap_rb;

View File

@ -175,6 +175,8 @@ typedef enum skcms_PixelFormat {
skcms_PixelFormat_A_8_,
skcms_PixelFormat_G_8,
skcms_PixelFormat_G_8_,
skcms_PixelFormat_RGBA_8888_Palette8,
skcms_PixelFormat_BGRA_8888_Palette8,
skcms_PixelFormat_RGB_565,
skcms_PixelFormat_BGR_565,
@ -248,6 +250,18 @@ SKCMS_API bool skcms_Transform(const void* src,
const skcms_ICCProfile* dstProfile,
size_t npixels);
// As skcms_Transform(), supporting srcFmts with a palette.
SKCMS_API bool skcms_TransformWithPalette(const void* src,
skcms_PixelFormat srcFmt,
skcms_AlphaFormat srcAlpha,
const skcms_ICCProfile* srcProfile,
void* dst,
skcms_PixelFormat dstFmt,
skcms_AlphaFormat dstAlpha,
const skcms_ICCProfile* dstProfile,
size_t npixels,
const void* palette);
// If profile can be used as a destination in skcms_Transform, return true. Otherwise, attempt to
// rewrite it with approximations where reasonable. If successful, return true. If no reasonable
// approximation exists, leave the profile unchanged and return false.

View File

@ -37,6 +37,11 @@ extern "C" {
static inline float fabsf_(float x) { return x < 0 ? -x : x; }
float powf_(float, float);
// ~~~~ Does this pixel format need a palette pointer to be usable? ~~~~
static inline bool needs_palette(skcms_PixelFormat fmt) {
return (fmt >> 1) == (skcms_PixelFormat_RGBA_8888_Palette8 >> 1);
}
#ifdef __cplusplus
}
#endif

View File

@ -348,52 +348,69 @@ SI U8 gather_8(const uint8_t* p, I32 ix) {
return v;
}
// Helper for gather_16(), loading the ix'th 16-bit value from p.
SI uint16_t load_16(const uint8_t* p, int ix) {
return load<uint16_t>(p + 2*ix);
}
SI U16 gather_16(const uint8_t* p, I32 ix) {
// Load the i'th 16-bit value from p.
auto load_16 = [p](int i) {
return load<uint16_t>(p + 2*i);
};
#if N == 1
U16 v = load_16(p,ix);
U16 v = load_16(ix);
#elif N == 4
U16 v = { load_16(p,ix[0]), load_16(p,ix[1]), load_16(p,ix[2]), load_16(p,ix[3]) };
U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]) };
#elif N == 8
U16 v = { load_16(p,ix[0]), load_16(p,ix[1]), load_16(p,ix[2]), load_16(p,ix[3]),
load_16(p,ix[4]), load_16(p,ix[5]), load_16(p,ix[6]), load_16(p,ix[7]) };
U16 v = { load_16(ix[0]), load_16(ix[1]), load_16(ix[2]), load_16(ix[3]),
load_16(ix[4]), load_16(ix[5]), load_16(ix[6]), load_16(ix[7]) };
#elif N == 16
U16 v = { load_16(p,ix[ 0]), load_16(p,ix[ 1]), load_16(p,ix[ 2]), load_16(p,ix[ 3]),
load_16(p,ix[ 4]), load_16(p,ix[ 5]), load_16(p,ix[ 6]), load_16(p,ix[ 7]),
load_16(p,ix[ 8]), load_16(p,ix[ 9]), load_16(p,ix[10]), load_16(p,ix[11]),
load_16(p,ix[12]), load_16(p,ix[13]), load_16(p,ix[14]), load_16(p,ix[15]) };
U16 v = { load_16(ix[ 0]), load_16(ix[ 1]), load_16(ix[ 2]), load_16(ix[ 3]),
load_16(ix[ 4]), load_16(ix[ 5]), load_16(ix[ 6]), load_16(ix[ 7]),
load_16(ix[ 8]), load_16(ix[ 9]), load_16(ix[10]), load_16(ix[11]),
load_16(ix[12]), load_16(ix[13]), load_16(ix[14]), load_16(ix[15]) };
#endif
return v;
}
#if !defined(USING_AVX2)
// Helpers for gather_24/48(), loading the ix'th 24/48-bit value from p, and 1/2 extra bytes.
SI uint32_t load_24_32(const uint8_t* p, int ix) {
return load<uint32_t>(p + 3*ix);
}
SI uint64_t load_48_64(const uint8_t* p, int ix) {
return load<uint64_t>(p + 6*ix);
}
SI U32 gather_32(const uint8_t* p, I32 ix) {
// Load the i'th 32-bit value from p.
auto load_32 = [p](int i) {
return load<uint32_t>(p + 4*i);
};
#if N == 1
U32 v = load_32(ix);
#elif N == 4
U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]) };
#elif N == 8
U32 v = { load_32(ix[0]), load_32(ix[1]), load_32(ix[2]), load_32(ix[3]),
load_32(ix[4]), load_32(ix[5]), load_32(ix[6]), load_32(ix[7]) };
#elif N == 16
U32 v = { load_32(ix[ 0]), load_32(ix[ 1]), load_32(ix[ 2]), load_32(ix[ 3]),
load_32(ix[ 4]), load_32(ix[ 5]), load_32(ix[ 6]), load_32(ix[ 7]),
load_32(ix[ 8]), load_32(ix[ 9]), load_32(ix[10]), load_32(ix[11]),
load_32(ix[12]), load_32(ix[13]), load_32(ix[14]), load_32(ix[15]) };
#endif
// TODO: AVX2 and AVX-512 gathers (c.f. gather_24).
return v;
}
SI U32 gather_24(const uint8_t* p, I32 ix) {
// First, back up a byte. Any place we're gathering from has a safe junk byte to read
// in front of it, either a previous table value, or some tag metadata.
p -= 1;
// Load the i'th 24-bit value from p, and 1 extra byte.
auto load_24_32 = [p](int i) {
return load<uint32_t>(p + 3*i);
};
// Now load multiples of 4 bytes (a junk byte, then r,g,b).
#if N == 1
U32 v = load_24_32(p,ix);
U32 v = load_24_32(ix);
#elif N == 4
U32 v = { load_24_32(p,ix[0]), load_24_32(p,ix[1]), load_24_32(p,ix[2]), load_24_32(p,ix[3]) };
U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]) };
#elif N == 8 && !defined(USING_AVX2)
U32 v = { load_24_32(p,ix[0]), load_24_32(p,ix[1]), load_24_32(p,ix[2]), load_24_32(p,ix[3]),
load_24_32(p,ix[4]), load_24_32(p,ix[5]), load_24_32(p,ix[6]), load_24_32(p,ix[7]) };
U32 v = { load_24_32(ix[0]), load_24_32(ix[1]), load_24_32(ix[2]), load_24_32(ix[3]),
load_24_32(ix[4]), load_24_32(ix[5]), load_24_32(ix[6]), load_24_32(ix[7]) };
#elif N == 8
(void)load_24_32;
// The gather instruction here doesn't need any particular alignment,
// but the intrinsic takes a const int*.
const int* p4 = bit_pun<const int*>(p);
@ -405,6 +422,7 @@ SI U32 gather_24(const uint8_t* p, I32 ix) {
U32 v = (U32)__builtin_ia32_gathersiv8si(zero, p4, 3*ix, mask, 1);
#endif
#elif N == 16
(void)load_24_32;
// The intrinsic is supposed to take const void* now, but it takes const int*, just like AVX2.
// And AVX-512 swapped the order of arguments. :/
const int* p4 = bit_pun<const int*>(p);
@ -420,18 +438,24 @@ SI U32 gather_24(const uint8_t* p, I32 ix) {
// As in gather_24(), with everything doubled.
p -= 2;
// Load the i'th 48-bit value from p, and 2 extra bytes.
auto load_48_64 = [p](int i) {
return load<uint64_t>(p + 6*i);
};
#if N == 1
*v = load_48_64(p,ix);
*v = load_48_64(ix);
#elif N == 4
*v = U64{
load_48_64(p,ix[0]), load_48_64(p,ix[1]), load_48_64(p,ix[2]), load_48_64(p,ix[3]),
load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
};
#elif N == 8 && !defined(USING_AVX2)
*v = U64{
load_48_64(p,ix[0]), load_48_64(p,ix[1]), load_48_64(p,ix[2]), load_48_64(p,ix[3]),
load_48_64(p,ix[4]), load_48_64(p,ix[5]), load_48_64(p,ix[6]), load_48_64(p,ix[7]),
load_48_64(ix[0]), load_48_64(ix[1]), load_48_64(ix[2]), load_48_64(ix[3]),
load_48_64(ix[4]), load_48_64(ix[5]), load_48_64(ix[6]), load_48_64(ix[7]),
};
#elif N == 8
(void)load_48_64;
typedef int32_t __attribute__((vector_size(16))) Half_I32;
typedef long long __attribute__((vector_size(32))) Half_I64;
@ -456,6 +480,7 @@ SI U32 gather_24(const uint8_t* p, I32 ix) {
store((char*)v + 0, lo);
store((char*)v + 32, hi);
#elif N == 16
(void)load_48_64;
const long long int* p8 = bit_pun<const long long int*>(p);
__m512i lo = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 0), p8, 1),
hi = _mm512_i32gather_epi64(_mm512_extracti32x8_epi32((__m512i)(6*ix), 1), p8, 1);
@ -676,6 +701,17 @@ static void exec_ops(const Op* ops, const void** args,
a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
} break;
case Op_load_8888_palette8:{
const uint8_t* palette = (const uint8_t*) *args++;
I32 ix = cast<I32>(load<U8>(src + 1*i));
U32 rgba = gather_32(palette, ix);
r = cast<F>((rgba >> 0) & 0xff) * (1/255.0f);
g = cast<F>((rgba >> 8) & 0xff) * (1/255.0f);
b = cast<F>((rgba >> 16) & 0xff) * (1/255.0f);
a = cast<F>((rgba >> 24) & 0xff) * (1/255.0f);
} break;
case Op_load_1010102:{
U32 rgba = load<U32>(src + 4*i);

View File

@ -1 +1 @@
f3e50cfac5bc924321f50b3e9ded4f625b7b0d15
1ce6da5c8d9968170297b81af2cbff48cc4b9919