Store LZW dictionary suffixes in larger chunks

To write out an LZW dictionary entry each byte requires two table lookups, one of which must complete before the next lookup can begin. This change makes it possible to write up to eight bytes per pair of lookups. Testing with image_decode_bench show performance improvement by 20-25% on Elm and Kevin Chromebooks except for small images with a gradient, which show a slowdown. However, the patch will introduce some memory hungry code which lasts only while decoding a frame. The change in memory requirement is from 4096 bytes to 4096*8 bytes. BUG=skia:8361 BUG=chromium:859454" Signed-off-by: tushar khandelwal <tushar.khandelwal@arm.com> Change-Id: Icf9e0adf215062ef796b5ff9e6347cace5f0af47 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/152580 Reviewed-by: Leon Scroggins <scroggo@google.com> Commit-Queue: Leon Scroggins <scroggo@google.com>
2019-02-20 13:55:45 +00:00 · 2019-02-20 13:55:45 +00:00 · 3f7f6c1ccf
commit 3f7f6c1ccf
parent a695da9305
2 changed files with 65 additions and 32 deletions
--- a/third_party/gif/SkGifImageReader.cpp
+++ b/third_party/gif/SkGifImageReader.cpp
@ -211,10 +211,9 @@ void SkGIFLZWContext::outputRow(const unsigned char* rowBegin)
 // Otherwise, decoding failed; returns false in this case, which will always cause the SkGifImageReader to set the "decode failed" flag.
 bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock)
 {
-    const int width = m_frameContext->width();
-
    if (rowIter == rowBuffer.end())
        return true;
+    const int width = m_frameContext->width();

    for (const unsigned char* ch = block; bytesInBlock-- > 0; ch++) {
        // Feed the next byte into the decoder's 32-bit input buffer.
@ -246,40 +245,71 @@ bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock)
            }

            const int tempCode = code;
-            unsigned short codeLength = 0;
-            if (code < avail) {
-                // This is a pre-existing code, so we already know what it
-                // encodes.
-                codeLength = suffixLength[code];
-                rowIter += codeLength;
-            } else if (code == avail && oldcode != -1) {
-                // This is a new code just being added to the dictionary.
-                // It must encode the contents of the previous code, plus
-                // the first character of the previous code again.
-                codeLength = suffixLength[oldcode] + 1;
-                rowIter += codeLength;
-                *--rowIter = firstchar;
-                code = oldcode;
-            } else {
+            if (code > avail) {
                // This is an invalid code. The dictionary is just initialized
                // and the code is incomplete. We don't know how to handle
                // this case.
                return false;
            }

-            while (code >= clearCode) {
-                *--rowIter = suffix[code];
-                code = prefix[code];
+            if (code == avail) {
+                if (oldcode != -1) {
+                    // This is a new code just being added to the dictionary.
+                    // It must encode the contents of the previous code, plus
+                    // the first character of the previous code again.
+                    // Now we know avail is the new code we can use oldcode
+                    // value to get the code related to that.
+                    code = oldcode;
+                } else {
+                    // This is an invalid code. The dictionary is just initialized
+                    // and the code is incomplete. We don't know how to handle
+                    // this case.
+                    return false;
+                }
            }

-            *--rowIter = firstchar = suffix[code];
+            // code length of the oldcode for new code which is
+            // avail = oldcode + firstchar of the oldcode
+            int remaining = suffixLength[code];
+
+            // Round remaining up to multiple of SK_DICTIONARY_WORD_SIZE, because that's
+            // the granularity of the chunks we copy.  The last chunk may contain
+            // some garbage but it'll be overwritten by the next code or left unused.
+            // The working buffer is padded to account for this.
+            remaining += -remaining & (SK_DICTIONARY_WORD_SIZE - 1) ;
+            unsigned char* p = rowIter + remaining;
+
+            // Place rowIter so that after writing pixels rowIter can be set to firstchar, thereby
+            // completing the code.
+            rowIter += suffixLength[code];
+
+            while (remaining > 0) {
+                p -= SK_DICTIONARY_WORD_SIZE;
+                std::copy_n(suffix[code].begin(), SK_DICTIONARY_WORD_SIZE, p);
+                code = prefix[code];
+                remaining -= SK_DICTIONARY_WORD_SIZE;
+            }
+            const int firstchar = static_cast<unsigned char>(code);  // (strictly `suffix[code][0]`)
+
+            // This completes the new code avail and writing the corresponding
+            // pixels on target.
+            if (tempCode == avail) {
+                *rowIter++ = firstchar;
+            }

            // Define a new codeword in the dictionary as long as we've read
            // more than one value from the stream.
            if (avail < SK_MAX_DICTIONARY_ENTRIES && oldcode != -1) {
-                prefix[avail] = oldcode;
-                suffix[avail] = firstchar;
-                suffixLength[avail] = suffixLength[oldcode] + 1;
+                // now add avail to the dictionary for future reference
+                unsigned short codeLength = suffixLength[oldcode] + 1;
+                int l = (codeLength - 1) & (SK_DICTIONARY_WORD_SIZE - 1);
+                // If the suffix buffer is full (l == 0) then oldcode becomes the new
+                // prefix, otherwise copy and extend oldcode's buffer and use the same
+                // prefix as oldcode used.
+                prefix[avail] = (l == 0) ? oldcode : prefix[oldcode];
+                suffix[avail] = suffix[oldcode];
+                suffix[avail][l] = firstchar;
+                suffixLength[avail] = codeLength;
                ++avail;

                // If we've used up all the codewords of a given length
@ -291,7 +321,6 @@ bool SkGIFLZWContext::doLZW(const unsigned char* block, size_t bytesInBlock)
                }
            }
            oldcode = tempCode;
-            rowIter += codeLength;

            // Output as many rows as possible.
            unsigned char* rowBegin = rowBuffer.begin();
@ -905,20 +934,24 @@ bool SkGIFLZWContext::prepareToDecode()
    // the longest sequence (SK_MAX_DICTIONARY_ENTIRES + 1) - 2 values long. Since
    // each value is a byte, this is also the number of bytes in the longest
    // encodable sequence.
-    const size_t maxBytes = SK_MAX_DICTIONARY_ENTRIES - 1;
+    constexpr size_t kMaxSequence = SK_MAX_DICTIONARY_ENTRIES - 1;
+    constexpr size_t kMaxBytes = (kMaxSequence + SK_DICTIONARY_WORD_SIZE - 1)
+                         & ~(SK_DICTIONARY_WORD_SIZE - 1);

    // Now allocate the output buffer. We decode directly into this buffer
    // until we have at least one row worth of data, then call outputRow().
    // This means worst case we may have (row width - 1) bytes in the buffer
-    // and then decode a sequence |maxBytes| long to append.
-    rowBuffer.reset(m_frameContext->width() - 1 + maxBytes);
+    // and then decode a sequence |kMaxBytes| long to append.
+    rowBuffer.reset(m_frameContext->width() - 1 + kMaxBytes);
    rowIter = rowBuffer.begin();
    rowsRemaining = m_frameContext->height();

    // Clearing the whole suffix table lets us be more tolerant of bad data.
    for (int i = 0; i < clearCode; ++i) {
-        suffix[i] = i;
+        std::fill_n(suffix[i].begin(), SK_DICTIONARY_WORD_SIZE, 0);
+        suffix[i][0] = i;
        suffixLength[i] = 1;
+        prefix[i] = i;  // ensure that we have a place to find firstchar
    }
    return true;
 }
--- a/third_party/gif/SkGifImageReader.h
+++ b/third_party/gif/SkGifImageReader.h
@ -60,6 +60,7 @@ typedef SkTArray<unsigned char, true> SkGIFRow;
 #define SK_MAX_DICTIONARY_ENTRIES    4096 // 2^SK_MAX_DICTIONARY_ENTRY_BITS
 #define SK_MAX_COLORS                256
 #define SK_BYTES_PER_COLORMAP_ENTRY  3
+#define SK_DICTIONARY_WORD_SIZE      8

 // List of possible parsing states.
 enum SkGIFState {
@ -97,7 +98,6 @@ public:
        , clearCode(0)
        , avail(0)
        , oldcode(0)
-        , firstchar(0)
        , bits(0)
        , datum(0)
        , ipass(0)
@ -120,7 +120,6 @@ private:
    int clearCode; // Codeword used to trigger dictionary reset.
    int avail; // Index of next available slot in dictionary.
    int oldcode;
-    unsigned char firstchar;
    int bits; // Number of unread bits in "datum".
    int datum; // 32-bit input buffer.
    int ipass; // Interlace pass; Ranges 1-4 if interlaced.
@ -128,7 +127,8 @@ private:
    size_t rowsRemaining; // Rows remaining to be output.

    unsigned short prefix[SK_MAX_DICTIONARY_ENTRIES];
-    unsigned char suffix[SK_MAX_DICTIONARY_ENTRIES];
+    std::array<std::array<unsigned char, SK_DICTIONARY_WORD_SIZE>,
+                SK_MAX_DICTIONARY_ENTRIES> suffix;
    unsigned short suffixLength[SK_MAX_DICTIONARY_ENTRIES];
    SkGIFRow rowBuffer; // Single scanline temporary buffer.
    unsigned char* rowIter;