AuroraRuntime/Source/Compression/AuCompression.hpp

/***
    Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.

    File: AuCompression.hpp
    Date: 2021-6-17
    Author: Reece
***/
#pragma once

namespace Aurora::Compression
{
#if defined(AURORA_IS_SERVER) || defined(AURORA_16_KIB_IN_OUT_BUFFERS_32K)
    // servers can have a multiple of these per client, if streams are not to be recycled.
    // XMPP, for example, does not reset. zlib and others expect genuine compression streams.
    // HTTP, libdeflate, and others expect flush framesm and then for the stateless stream to be recreated each IO tick.
    // let's assume we are a server;
    //  most of our IO is done under protocol stacks that'll handle almost all of the buffering,
    //  changing this value by 2-8 will apply to the law of large numbers on a server,
    //  changing this value by an order of magnitude is a cost of 10x memory per client (tho you can afford the memory these days),
    //  the underlying compressor object may not even be accessing these temp[kChunkSize] optimizations - some can directly read from AuByteBuffers,
    //  some compression libraries are internally alloc heavy, which delgates the usage area to malloc/free callbacks...
    //  ...these, like zstd, demand almost zero buffered data. a frame fragment is almost nothing.
    // therefore, it makes sense to keep these values down as much as possible
    static const AuUInt64 kChunkSize  = 16 * 1024;
#elif defined(AURORA_IS_CLIENT)
    // for builds explicitly earmarked to go out to end users, 2x the usual buffer size
    static const AuUInt64 kChunkSize  = 256 * 1024;
#else
    // and general purpose.
    static const AuUInt64 kChunkSize  = 128 * 1024;

    // also note: zlib docs state up to 256k is efficient, but you can start at around 16k
    // https://www.zlib.net/zlib_how.html Last modified 8 February 2023. approx 30 Oct 2004 - 11 Dec 2005.
    // general infrastructure and requirements havent changed in 2 decades.
    // in 2001 and 2021 you can afford to buy a server with a few tens of gigabytes of ram
    // in 2001 and 2021 you can afford to buy a gigabit switch
    // in 2001 and 2021 you're still running on deflate algorithms
    // in 2004 and 2024 you're still struggling to parallelize compression without N x tar balls on N x  cores (N ~= 2-6 would be reasonable)
    // in 2012-2014 you could get yourself an ivybridge or a sandbridge, in 2024 you could get yourself an AMD page aligned atomic acker or Oxide Inside 14th gen.
    //  with ~4th gen being, in theory, multiple single digits slower than 14th gen. and, yes, comparing 4th gen to 9th gen compilation times will result in 4-8x boosts,
    //  on a very fundamental level, we havent moved that much in terms of single threaded performance.
    //  that also applies to compression. it's stil the same old slow task calculating bit patterns, looking through big precomputed (or not) datasets, and making in-order & non-parallelizable changes
    // With hardware not progressing **that** much, the fundamental algorithms staying the exact same, and the same libraries being used, call me a bit insane to think maybe an article from 2005 isnt that far off from reality.
    // 16k*128k (clients) ~= 2gb (x2 for local in/out? [= 4GiB] x4 for additional in/out stream pairing? [= 8GiB]) of ram for a heavyweight server (plus many other user implementation overheads)
    // 16k per stream / 256MB on an embedded tablet or games console
    // 16k per stream / 256 - 2048MB per app / 1024 - 4096 per home appliance mid 2000s (laptop, pc, etc)
    // 16k per stream / 256 - 2048MB per app / 4096 - 32768 per home appliance mid 2020s (laptop, pc, etc)
    // 64k (2^16) IPv4 UDP ports, including system region and others
    // 2x IPv6 address = 128k
    // AAA software can see peaks of hundreds of thousands to millions per clients per platform, so x2, x10, x20, x100 this (more responsive stateful, proprietary, xmpp, etc protocols)
    // WebShid "technology" works on stateless packets, so your memory requirements are basically just this in the network stream buffers, and with the rest being temporary allocations (more expensive & wasteful for stream-like connections).
    // Seems fair...? seems like the world we should be still living in...? maybe we can 10x or 100x it no matter the era for increased throughput...? would zlib even care...?
#endif

    // sometimes on stack
    static const AuUInt64 kChunkSize2 = 128 * 1024;

    /// * compression type + bits -> internal zlib windowBits
    static bool CompressionLevelFromExternalApi(const DecompressInfo &info, AuInt8 &out)
    {
        out = 0;

        if (!info.uOptWindowBits)
        {
            if (info.alg == ECompressionType::eGZip)
            {
                out = 15 | 16;
            }
            else if (info.alg == ECompressionType::eDeflate)
            {
                out = -15;
            }
            else
            {
                out = 15;
            }

            return true;
        }

        if (info.uOptWindowBits.value() < 0)
        {
            return {};
        }

        if (info.uOptWindowBits.value() > 15)
        {
            return {};
        }

        if (info.alg == ECompressionType::eGZip)
        {
            out = info.uOptWindowBits.value() | 16;
        }
        else if (info.alg == ECompressionType::eDeflate)
        {
            out = 0 - info.uOptWindowBits.value();
        }
        else
        {
            out = info.uOptWindowBits.value();
        }

        return true;
    }

    /// * compression type + bits -> internal zlib windowBits
    static bool CompressionLevelFromExternalApi(const CompressInfo &info, AuInt8 &out)
    {
        out = 0;

        if (!info.uOptWindowBits)
        {
            if (info.type == ECompressionType::eGZip)
            {
                out = 15 | 16;
            }
            else if (info.type == ECompressionType::eDeflate)
            {
                out = -15;
            }
            else
            {
                out = 15;
            }

            return true;
        }

        if (info.type == ECompressionType::eGZip)
        {
            out = info.uOptWindowBits.value() | 16;
        }
        else if (info.type == ECompressionType::eDeflate)
        {
            out = 0 - info.uOptWindowBits.value();
        }
        else
        {
            out = info.uOptWindowBits.value();
        }

        return true;
    }

}