146 lines
6.3 KiB
C++
146 lines
6.3 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: AuCompression.hpp
|
|
Date: 2021-6-17
|
|
Author: Reece
|
|
***/
|
|
#pragma once
|
|
|
|
namespace Aurora::Compression
|
|
{
|
|
#if defined(AURORA_IS_SERVER) || defined(AURORA_16_KIB_IN_OUT_BUFFERS_32K)
|
|
// servers can have a multiple of these per client, if streams are not to be recycled.
|
|
// XMPP, for example, does not reset. zlib and others expect genuine compression streams.
|
|
// HTTP, libdeflate, and others expect flush framesm and then for the stateless stream to be recreated each IO tick.
|
|
// let's assume we are a server;
|
|
// most of our IO is done under protocol stacks that'll handle almost all of the buffering,
|
|
// changing this value by 2-8 will apply to the law of large numbers on a server,
|
|
// changing this value by an order of magnitude is a cost of 10x memory per client (tho you can afford the memory these days),
|
|
// the underlying compressor object may not even be accessing these temp[kChunkSize] optimizations - some can directly read from AuByteBuffers,
|
|
// some compression libraries are internally alloc heavy, which delgates the usage area to malloc/free callbacks...
|
|
// ...these, like zstd, demand almost zero buffered data. a frame fragment is almost nothing.
|
|
// therefore, it makes sense to keep these values down as much as possible
|
|
static const AuUInt64 kChunkSize = 16 * 1024;
|
|
#elif defined(AURORA_IS_CLIENT)
|
|
// for builds explicitly earmarked to go out to end users, 2x the usual buffer size
|
|
static const AuUInt64 kChunkSize = 256 * 1024;
|
|
#else
|
|
// and general purpose.
|
|
static const AuUInt64 kChunkSize = 128 * 1024;
|
|
|
|
// also note: zlib docs state up to 256k is efficient, but you can start at around 16k
|
|
// https://www.zlib.net/zlib_how.html Last modified 8 February 2023. approx 30 Oct 2004 - 11 Dec 2005.
|
|
// general infrastructure and requirements havent changed in 2 decades.
|
|
// in 2001 and 2021 you can afford to buy a server with a few tens of gigabytes of ram
|
|
// in 2001 and 2021 you can afford to buy a gigabit switch
|
|
// in 2001 and 2021 you're still running on deflate algorithms
|
|
// in 2004 and 2024 you're still struggling to parallelize compression without N x tar balls on N x cores (N ~= 2-6 would be reasonable)
|
|
// in 2012-2014 you could get yourself an ivybridge or a sandbridge, in 2024 you could get yourself an AMD page aligned atomic acker or Oxide Inside 14th gen.
|
|
// with ~4th gen being, in theory, multiple single digits slower than 14th gen. and, yes, comparing 4th gen to 9th gen compilation times will result in 4-8x boosts,
|
|
// on a very fundamental level, we havent moved that much in terms of single threaded performance.
|
|
// that also applies to compression. it's stil the same old slow task calculating bit patterns, looking through big precomputed (or not) datasets, and making in-order & non-parallelizable changes
|
|
// With hardware not progressing **that** much, the fundamental algorithms staying the exact same, and the same libraries being used, call me a bit insane to think maybe an article from 2005 isnt that far off from reality.
|
|
// 16k*128k (clients) ~= 2gb (x2 for local in/out? [= 4GiB] x4 for additional in/out stream pairing? [= 8GiB]) of ram for a heavyweight server (plus many other user implementation overheads)
|
|
// 16k per stream / 256MB on an embedded tablet or games console
|
|
// 16k per stream / 256 - 2048MB per app / 1024 - 4096 per home appliance mid 2000s (laptop, pc, etc)
|
|
// 16k per stream / 256 - 2048MB per app / 4096 - 32768 per home appliance mid 2020s (laptop, pc, etc)
|
|
// 64k (2^16) IPv4 UDP ports, including system region and others
|
|
// 2x IPv6 address = 128k
|
|
// AAA software can see peaks of hundreds of thousands to millions per clients per platform, so x2, x10, x20, x100 this (more responsive stateful, proprietary, xmpp, etc protocols)
|
|
// WebShid "technology" works on stateless packets, so your memory requirements are basically just this in the network stream buffers, and with the rest being temporary allocations (more expensive & wasteful for stream-like connections).
|
|
// Seems fair...? seems like the world we should be still living in...? maybe we can 10x or 100x it no matter the era for increased throughput...? would zlib even care...?
|
|
#endif
|
|
|
|
// sometimes on stack
|
|
static const AuUInt64 kChunkSize2 = 128 * 1024;
|
|
|
|
/// * compression type + bits -> internal zlib windowBits
|
|
static bool CompressionLevelFromExternalApi(const DecompressInfo &info, AuInt8 &out)
|
|
{
|
|
out = 0;
|
|
|
|
if (!info.uOptWindowBits)
|
|
{
|
|
if (info.alg == ECompressionType::eGZip)
|
|
{
|
|
out = 15 | 16;
|
|
}
|
|
else if (info.alg == ECompressionType::eDeflate)
|
|
{
|
|
out = -15;
|
|
}
|
|
else
|
|
{
|
|
out = 15;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
if (info.uOptWindowBits.value() < 0)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (info.uOptWindowBits.value() > 15)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (info.alg == ECompressionType::eGZip)
|
|
{
|
|
out = info.uOptWindowBits.value() | 16;
|
|
}
|
|
else if (info.alg == ECompressionType::eDeflate)
|
|
{
|
|
out = 0 - info.uOptWindowBits.value();
|
|
}
|
|
else
|
|
{
|
|
out = info.uOptWindowBits.value();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// * compression type + bits -> internal zlib windowBits
|
|
static bool CompressionLevelFromExternalApi(const CompressInfo &info, AuInt8 &out)
|
|
{
|
|
out = 0;
|
|
|
|
if (!info.uOptWindowBits)
|
|
{
|
|
if (info.type == ECompressionType::eGZip)
|
|
{
|
|
out = 15 | 16;
|
|
}
|
|
else if (info.type == ECompressionType::eDeflate)
|
|
{
|
|
out = -15;
|
|
}
|
|
else
|
|
{
|
|
out = 15;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
if (info.type == ECompressionType::eGZip)
|
|
{
|
|
out = info.uOptWindowBits.value() | 16;
|
|
}
|
|
else if (info.type == ECompressionType::eDeflate)
|
|
{
|
|
out = 0 - info.uOptWindowBits.value();
|
|
}
|
|
else
|
|
{
|
|
out = info.uOptWindowBits.value();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} |