Optimize SkWuffsCodec pixbuf zero-initialization
Pushing the zero-initialization into the sk_malloc_etc call can be more efficient than a two-step malloc-and-then-memset, e.g. if the memory allocator gives us something from an already-known-zeroed region. Some microbenchmark numbers (milliseconds per decode) from running Chromium's image_decode_bench program on a few GIF files: Before After Ratio Filename Width x Height = Pixels 0.109 0.107 1.02 hat.gif 90 x 112 = 10080 7.062 6.879 1.03 harvesters.gif 1165 x 859 = 1000735 33.342 30.544 1.09 droids.gif 2560 x 1920 = 4915200 Ratio > 1.00 means a performance improvement. image_decode_bench comes from: https://cs.chromium.org/chromium/src/third_party/blink/renderer/platform/testing/image_decode_bench.cc hat.gif and harvesters.gif come from: https://github.com/google/wuffs/tree/master/test/data droids.gif comes from: https://cs.chromium.org/chromium/src/third_party/blink/perf_tests/image_decoder/resources/ These are x86_64 Broadwell numbers. I don't have ARM numbers readily available, but nothing about this commit should be arch-specific. Bug: skia:8235 Change-Id: Icf45b7ac5b7194eaab455628e44d4aedf30ddbfe Reviewed-on: https://skia-review.googlesource.com/c/skia/+/254836 Commit-Queue: Leon Scroggins <scroggo@google.com> Reviewed-by: Leon Scroggins <scroggo@google.com>
This commit is contained in:
parent
301015ce1e
commit
39da10b556
@ -132,7 +132,6 @@ static SkCodec::Result reset_and_decode_image_config(wuffs_gif__decoder* d
|
|||||||
return SkCodec::kSuccess;
|
return SkCodec::kSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------- Class definitions
|
// -------------------------------- Class definitions
|
||||||
|
|
||||||
class SkWuffsCodec;
|
class SkWuffsCodec;
|
||||||
@ -179,6 +178,7 @@ public:
|
|||||||
std::unique_ptr<SkStream> stream,
|
std::unique_ptr<SkStream> stream,
|
||||||
std::unique_ptr<wuffs_gif__decoder, decltype(&sk_free)> dec,
|
std::unique_ptr<wuffs_gif__decoder, decltype(&sk_free)> dec,
|
||||||
std::unique_ptr<uint8_t, decltype(&sk_free)> pixbuf_ptr,
|
std::unique_ptr<uint8_t, decltype(&sk_free)> pixbuf_ptr,
|
||||||
|
bool pixbuf_zeroed,
|
||||||
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr,
|
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr,
|
||||||
size_t workbuf_len,
|
size_t workbuf_len,
|
||||||
wuffs_base__image_config imgcfg,
|
wuffs_base__image_config imgcfg,
|
||||||
@ -266,6 +266,10 @@ private:
|
|||||||
std::vector<SkWuffsFrame> fFrames;
|
std::vector<SkWuffsFrame> fFrames;
|
||||||
bool fFramesComplete;
|
bool fFramesComplete;
|
||||||
|
|
||||||
|
// True if fPixelBuffer's contents are known to be already zeroed. This is
|
||||||
|
// conservative, and may be false even if the buffer is zeroed.
|
||||||
|
bool fPixbufZeroed;
|
||||||
|
|
||||||
// If calling an fDecoders[which] method returns an incomplete status, then
|
// If calling an fDecoders[which] method returns an incomplete status, then
|
||||||
// fDecoders[which] is suspended in a coroutine (i.e. waiting on I/O or
|
// fDecoders[which] is suspended in a coroutine (i.e. waiting on I/O or
|
||||||
// halted on a non-recoverable error). To keep its internal proof-of-safety
|
// halted on a non-recoverable error). To keep its internal proof-of-safety
|
||||||
@ -333,6 +337,7 @@ SkWuffsCodec::SkWuffsCodec(SkEncodedInfo&&
|
|||||||
std::unique_ptr<SkStream> stream,
|
std::unique_ptr<SkStream> stream,
|
||||||
std::unique_ptr<wuffs_gif__decoder, decltype(&sk_free)> dec,
|
std::unique_ptr<wuffs_gif__decoder, decltype(&sk_free)> dec,
|
||||||
std::unique_ptr<uint8_t, decltype(&sk_free)> pixbuf_ptr,
|
std::unique_ptr<uint8_t, decltype(&sk_free)> pixbuf_ptr,
|
||||||
|
bool pixbuf_zeroed,
|
||||||
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr,
|
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr,
|
||||||
size_t workbuf_len,
|
size_t workbuf_len,
|
||||||
wuffs_base__image_config imgcfg,
|
wuffs_base__image_config imgcfg,
|
||||||
@ -367,6 +372,7 @@ SkWuffsCodec::SkWuffsCodec(SkEncodedInfo&&
|
|||||||
fFrameCountReaderIOPosition(0),
|
fFrameCountReaderIOPosition(0),
|
||||||
fNumFullyReceivedFrames(0),
|
fNumFullyReceivedFrames(0),
|
||||||
fFramesComplete(false),
|
fFramesComplete(false),
|
||||||
|
fPixbufZeroed(pixbuf_zeroed),
|
||||||
fDecoderIsSuspended{
|
fDecoderIsSuspended{
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
@ -443,12 +449,20 @@ SkCodec::Result SkWuffsCodec::onStartIncrementalDecode(const SkImageInfo& d
|
|||||||
size_t src_bytes_per_pixel = src_bits_per_pixel / 8;
|
size_t src_bytes_per_pixel = src_bits_per_pixel / 8;
|
||||||
|
|
||||||
// Zero-initialize Wuffs' buffer covering the frame rect.
|
// Zero-initialize Wuffs' buffer covering the frame rect.
|
||||||
wuffs_base__rect_ie_u32 frame_rect = fFrameConfigs[WhichDecoder::kIncrDecode].bounds();
|
if (!fPixbufZeroed) {
|
||||||
wuffs_base__table_u8 pixels = fPixelBuffer.plane(0);
|
wuffs_base__rect_ie_u32 frame_rect = fFrameConfigs[WhichDecoder::kIncrDecode].bounds();
|
||||||
for (uint32_t y = frame_rect.min_incl_y; y < frame_rect.max_excl_y; y++) {
|
wuffs_base__table_u8 pixels = fPixelBuffer.plane(0);
|
||||||
sk_bzero(pixels.ptr + (y * pixels.stride) + (frame_rect.min_incl_x * src_bytes_per_pixel),
|
for (uint32_t y = frame_rect.min_incl_y; y < frame_rect.max_excl_y; y++) {
|
||||||
frame_rect.width() * src_bytes_per_pixel);
|
sk_bzero(
|
||||||
|
pixels.ptr + (y * pixels.stride) + (frame_rect.min_incl_x * src_bytes_per_pixel),
|
||||||
|
frame_rect.width() * src_bytes_per_pixel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// The buffer is zeroed now, but this onStartIncrementalDecode call will
|
||||||
|
// almost certainly be followed by some onIncrementalDecode calls that can
|
||||||
|
// modify fPixelBuffer's contents. We set fPixbufZeroed to false so that
|
||||||
|
// the next onStartIncrementalDecode call will zero-initialize the buffer.
|
||||||
|
fPixbufZeroed = false;
|
||||||
|
|
||||||
fIncrDecDst = static_cast<uint8_t*>(dst);
|
fIncrDecDst = static_cast<uint8_t*>(dst);
|
||||||
fIncrDecReaderIOPosition = fIOBuffer.reader_io_position();
|
fIncrDecReaderIOPosition = fIOBuffer.reader_io_position();
|
||||||
@ -886,8 +900,10 @@ std::unique_ptr<SkCodec> SkWuffsCodec_MakeFromStream(std::unique_ptr<SkStream> s
|
|||||||
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr(
|
std::unique_ptr<uint8_t, decltype(&sk_free)> workbuf_ptr(
|
||||||
reinterpret_cast<uint8_t*>(workbuf_ptr_raw), &sk_free);
|
reinterpret_cast<uint8_t*>(workbuf_ptr_raw), &sk_free);
|
||||||
|
|
||||||
uint64_t pixbuf_len = imgcfg.pixcfg.pixbuf_len();
|
constexpr int pixbuf_sk_malloc_flags = SK_MALLOC_ZERO_INITIALIZE;
|
||||||
void* pixbuf_ptr_raw = pixbuf_len <= SIZE_MAX ? sk_malloc_canfail(pixbuf_len) : nullptr;
|
uint64_t pixbuf_len = imgcfg.pixcfg.pixbuf_len();
|
||||||
|
void* pixbuf_ptr_raw =
|
||||||
|
pixbuf_len <= SIZE_MAX ? sk_malloc_flags(pixbuf_len, pixbuf_sk_malloc_flags) : nullptr;
|
||||||
if (!pixbuf_ptr_raw) {
|
if (!pixbuf_ptr_raw) {
|
||||||
*result = SkCodec::kInternalError;
|
*result = SkCodec::kInternalError;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -919,5 +935,6 @@ std::unique_ptr<SkCodec> SkWuffsCodec_MakeFromStream(std::unique_ptr<SkStream> s
|
|||||||
*result = SkCodec::kSuccess;
|
*result = SkCodec::kSuccess;
|
||||||
return std::unique_ptr<SkCodec>(new SkWuffsCodec(
|
return std::unique_ptr<SkCodec>(new SkWuffsCodec(
|
||||||
std::move(encodedInfo), std::move(stream), std::move(decoder), std::move(pixbuf_ptr),
|
std::move(encodedInfo), std::move(stream), std::move(decoder), std::move(pixbuf_ptr),
|
||||||
std::move(workbuf_ptr), workbuf_len, imgcfg, pixbuf, iobuf));
|
(pixbuf_sk_malloc_flags & SK_MALLOC_ZERO_INITIALIZE) != 0, std::move(workbuf_ptr),
|
||||||
|
workbuf_len, imgcfg, pixbuf, iobuf));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user