compress_r11eac_blocks() required more alignment than dst has.
This shouldn't change any behavior except that the stores to dst will no longer require 8-byte alignment. Empirically it seems like we can use 4-byte alignment here, but u8 (i.e. 1-byte alignment) is always safe. BUG=skia:5637 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2264103002 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2264103002
This commit is contained in:
parent
2ec417257b
commit
a61b6d4f9e
@ -898,9 +898,7 @@
|
|||||||
"_",
|
"_",
|
||||||
"image",
|
"image",
|
||||||
"_",
|
"_",
|
||||||
".SRW",
|
".SRW"
|
||||||
"--match",
|
|
||||||
"~CompressCheckerboard"
|
|
||||||
],
|
],
|
||||||
"env": {
|
"env": {
|
||||||
"ANDROID_HOME": "[SLAVE_BUILD]/android_sdk/android-sdk",
|
"ANDROID_HOME": "[SLAVE_BUILD]/android_sdk/android-sdk",
|
||||||
|
@ -298,9 +298,6 @@ def dm_flags(bot):
|
|||||||
if 'Valgrind' in bot: # skia:3021
|
if 'Valgrind' in bot: # skia:3021
|
||||||
match.append('~Threaded')
|
match.append('~Threaded')
|
||||||
|
|
||||||
if 'Nexus6' in bot: # skia:5637
|
|
||||||
match.append('~CompressCheckerboard')
|
|
||||||
|
|
||||||
if 'GalaxyS3' in bot: # skia:1699
|
if 'GalaxyS3' in bot: # skia:1699
|
||||||
match.append('~WritePixels')
|
match.append('~WritePixels')
|
||||||
|
|
||||||
|
@ -150,14 +150,14 @@ namespace SK_OPTS_NS {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void compress_r11eac_blocks(uint64_t* dst, const uint8_t* src, size_t rowBytes) {
|
static void compress_r11eac_blocks(uint8_t* dst, const uint8_t* src, size_t rowBytes) {
|
||||||
// Try to avoid switching between vector and non-vector ops...
|
// Try to avoid switching between vector and non-vector ops...
|
||||||
const uint8_t *const src1 = src;
|
const uint8_t *const src1 = src;
|
||||||
const uint8_t *const src2 = src + rowBytes;
|
const uint8_t *const src2 = src + rowBytes;
|
||||||
const uint8_t *const src3 = src + 2*rowBytes;
|
const uint8_t *const src3 = src + 2*rowBytes;
|
||||||
const uint8_t *const src4 = src + 3*rowBytes;
|
const uint8_t *const src4 = src + 3*rowBytes;
|
||||||
uint64_t *const dst1 = dst;
|
uint8_t *const dst1 = dst;
|
||||||
uint64_t *const dst2 = dst + 2;
|
uint8_t *const dst2 = dst + 16;
|
||||||
|
|
||||||
const uint8x16_t alphaRow1 = vld1q_u8(src1);
|
const uint8x16_t alphaRow1 = vld1q_u8(src1);
|
||||||
const uint8x16_t alphaRow2 = vld1q_u8(src2);
|
const uint8x16_t alphaRow2 = vld1q_u8(src2);
|
||||||
@ -175,12 +175,12 @@ namespace SK_OPTS_NS {
|
|||||||
if (is_zero(alphaRow1)) {
|
if (is_zero(alphaRow1)) {
|
||||||
static const uint64x2_t kTransparent = { 0x0020000000002000ULL,
|
static const uint64x2_t kTransparent = { 0x0020000000002000ULL,
|
||||||
0x0020000000002000ULL };
|
0x0020000000002000ULL };
|
||||||
vst1q_u64(dst1, kTransparent);
|
vst1q_u8(dst1, vreinterpretq_u8_u64(kTransparent));
|
||||||
vst1q_u64(dst2, kTransparent);
|
vst1q_u8(dst2, vreinterpretq_u8_u64(kTransparent));
|
||||||
return;
|
return;
|
||||||
} else if (is_zero(nAlphaRow1)) {
|
} else if (is_zero(nAlphaRow1)) {
|
||||||
vst1q_u64(dst1, vreinterpretq_u64_u8(cmp));
|
vst1q_u8(dst1, cmp);
|
||||||
vst1q_u64(dst2, vreinterpretq_u64_u8(cmp));
|
vst1q_u8(dst2, cmp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -205,8 +205,8 @@ namespace SK_OPTS_NS {
|
|||||||
|
|
||||||
const uint64x2_t d1 = vcombine_u64(vget_low_u64(indicesLeft), vget_low_u64(indicesRight));
|
const uint64x2_t d1 = vcombine_u64(vget_low_u64(indicesLeft), vget_low_u64(indicesRight));
|
||||||
const uint64x2_t d2 = vcombine_u64(vget_high_u64(indicesLeft), vget_high_u64(indicesRight));
|
const uint64x2_t d2 = vcombine_u64(vget_high_u64(indicesLeft), vget_high_u64(indicesRight));
|
||||||
vst1q_u64(dst1, d1);
|
vst1q_u8(dst1, vreinterpretq_u8_u64(d1));
|
||||||
vst1q_u64(dst2, d2);
|
vst1q_u8(dst2, vreinterpretq_u8_u64(d2));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool compress_a8_r11eac(uint8_t* dst, const uint8_t* src,
|
static bool compress_a8_r11eac(uint8_t* dst, const uint8_t* src,
|
||||||
@ -224,12 +224,11 @@ namespace SK_OPTS_NS {
|
|||||||
|
|
||||||
SkASSERT((blocksX % 4) == 0);
|
SkASSERT((blocksX % 4) == 0);
|
||||||
|
|
||||||
uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
|
|
||||||
for (int y = 0; y < blocksY; ++y) {
|
for (int y = 0; y < blocksY; ++y) {
|
||||||
for (int x = 0; x < blocksX; x+=4) {
|
for (int x = 0; x < blocksX; x+=4) {
|
||||||
// Compress it
|
// Compress it
|
||||||
compress_r11eac_blocks(encPtr, src + 4*x, rowBytes);
|
compress_r11eac_blocks(dst, src + 4*x, rowBytes);
|
||||||
encPtr += 4;
|
dst += 32;
|
||||||
}
|
}
|
||||||
src += 4 * rowBytes;
|
src += 4 * rowBytes;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user