AuroraRuntime/Source/Locale/Encoding/Encoding.hpp

136 lines
4.3 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Encoding.hpp
Date: 2021-8-18
Author: Reece
***/
#pragma once
#include "../Locale.hpp"
#include "GBK/GBK.hpp"
#include "SJIS/SJIS.hpp"
#include "UTFn/AuUTF8.hpp"
#include "UTFn/AuUTF16.hpp"
#include "UTFn/AuUTF32.hpp"
namespace Aurora::Locale::Encoding
{
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page = ECodePage::eEnumInvalid);
struct TextStreamProcessor
{
bool readHeader {};
ECodePage page = ECodePage::eEnumInvalid;
ECodePage defaultPage = ECodePage::eEnumInvalid;
EncoderAdapter state;
TextStreamProcessor(ECodePage page = ECodePage::eSysUnk) : defaultPage(page) {}
using TypeIn_t = const void *;
using TypeCast_t = const AuUInt8 *;
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen)
{
int offset = 0;
if (!binary) return {};
if (!binaryLength) return {};
if (!AuExchange(readHeader, true))
{
if (page == ECodePage::eEnumInvalid)
{
auto header = DecodeBOM(Memory::MemoryViewRead(binary, binaryLength));
if (header.length)
{
page = header.page;
offset = header.length;
}
else
{
if ((defaultPage != ECodePage::eEnumInvalid))
{
page = defaultPage;
}
else
{
page = GetInternalCodePage();
}
}
state.Init(page, true);
}
}
if (page == ECodePage::eEnumInvalid)
{
return {};
}
binaryLength = binaryLength - offset;
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength, utf8, utfLen);
return AuMakePair(real.first + offset, real.second);
}
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out)
{
auto preemptive = EncodeUTF8(binary, binaryLength, nullptr, 0);
if (!AuTryResize(out, preemptive.second)) return {};
auto main = EncodeUTF8(binary, preemptive.second, out.data(), AuUInt32(out.size()));
if (main.second == 0) return {};
if (!AuTryResize(out, main.second)) return {};
out.shrink_to_fit();
return main;
}
};
struct TextStreamEncoder
{
ECodePage page;
EncoderAdapter state;
TextStreamEncoder(ECodePage page = ECodePage::eUTF32) : page(page)
{
state.Init(page, false);
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 length, AuString &out)
{
if (page == ECodePage::eEnumInvalid)
{
return {};
}
if (!utf8In)
{
return {};
}
if (!length)
{
return {};
}
auto preemptive = state.UTF8ToCp(utf8In, length, nullptr, 0);
auto written = state.UTF8ToCp(utf8In, preemptive.second, out.data(), AuUInt32(out.size()));
out.resize(written.second);
out.shrink_to_fit();
return written;
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 utf8Length, void *binaryOut, AuUInt32 binaryLength)
{
if (page == ECodePage::eEnumInvalid)
{
return {};
}
return state.UTF8ToCp(utf8In, utf8Length, binaryOut, binaryLength);
}
};
/// 'TextStreamProcessor', a stateful wrapper around DecodeUTF8
/// Using this you can handle a stateful, optionally bom prefixed, stream
/// Initialization (ie: setting a default codepage) is optional
}