AuroraRuntime/Source/Locale/Encoding/Encoding.hpp
Reece 99c5e1fa65 A pretty large patch not worth breaking up into separate commits
[*] Split up Aurora Async
[*] Split Async app into seperate ThreadPool concept
[*] Fix various OSThread bugs and tls transfer issues
[*] Set default affinity to 0xFFFFFFFF
[*] Update Build script
[+] Add AuTuplePopFront
[+] New Network Interface (unimplemented)
[*] Stub out the interfaces required for a better logger
[*] Fix Win32 ShellExecute bug; windows 11 struggles without explicit com init per the docs - now deferring to thread pool
[*] Update gitignore
[*] Follow XDG home standard
[*] Refactor some namespaces to use the shorthand aliases
[*] Various stability fixes
2021-11-05 17:34:23 +00:00

134 lines
4.3 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Encoding.hpp
Date: 2021-8-18
Author: Reece
***/
#pragma once
#include "GBK/GBK.hpp"
#include "SJIS/SJIS.hpp"
#include "UTFn/AuUTF8.hpp"
#include "UTFn/AuUTF16.hpp"
#include "UTFn/AuUTF32.hpp"
namespace Aurora::Locale::Encoding
{
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page = ECodePage::eUnsupported);
struct TextStreamProcessor
{
bool readHeader {};
ECodePage page = ECodePage::eUnsupported;
ECodePage defaultPage = ECodePage::eUnsupported;
EncoderAdapter state;
TextStreamProcessor(ECodePage page = ECodePage::eSysUnk) : defaultPage(page) {}
using TypeIn_t = const void *;
using TypeCast_t = const AuUInt8 *;
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen)
{
int offset = 0;
if (!binary) return {};
if (!binaryLength) return {};
if (!std::exchange(readHeader, true))
{
if (page == ECodePage::eUnsupported)
{
auto header = DecodeBOM(Memory::MemoryViewRead(binary, binaryLength));
if (header.length)
{
page = header.page;
offset = header.length;
}
else
{
if ((defaultPage != ECodePage::eUnsupported))
{
page = defaultPage;
}
else
{
page = GetInternalCodePage();
}
}
state.Init(page, true);
}
}
if (page == ECodePage::eUnsupported)
{
return {};
}
binaryLength = binaryLength - offset;
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength, utf8, utfLen);
return AuMakePair(real.first + offset, real.second);
}
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out)
{
auto preemptive = EncodeUTF8(binary, binaryLength, nullptr, 0);
if (!AuTryResize(out, preemptive.second)) return {};
auto main = EncodeUTF8(binary, preemptive.second, out.data(), out.size());
if (main.second == 0) return {};
if (!AuTryResize(out, main.second)) return {};
out.shrink_to_fit();
return main;
}
};
struct TextStreamEncoder
{
ECodePage page;
EncoderAdapter state;
TextStreamEncoder(ECodePage page = ECodePage::eUTF32) : page(page)
{
state.Init(page, false);
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 length, AuString &out)
{
if (page == ECodePage::eUnsupported)
{
return {};
}
if (!utf8In)
{
return {};
}
if (!length)
{
return {};
}
auto preemptive = state.UTF8ToCp(utf8In, length, nullptr, 0);
auto written = state.UTF8ToCp(utf8In, preemptive.second, out.data(), AuUInt32(out.size()));
out.resize(written.second);
out.shrink_to_fit();
return written;
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 utf8Length, void *binaryOut, AuUInt32 binaryLength)
{
if (page == ECodePage::eUnsupported)
{
return {};
}
return state.UTF8ToCp(utf8In, utf8Length, binaryOut, binaryLength);
}
};
/// 'TextStreamProcessor', a stateful wrapper around DecodeUTF8
/// Using this you can handle a stateful, optionally bom prefixed, stream
/// Initialization (ie: setting a default codepage) is optional
}