AuroraRuntime/Source/Locale/Encoding/Encoding.hpp

134 lines
4.3 KiB
C++
Raw Normal View History

2021-09-06 10:58:08 +00:00
/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: Encoding.hpp
Date: 2021-8-18
Author: Reece
***/
#pragma once
#include "GBK/GBK.hpp"
#include "SJIS/SJIS.hpp"
#include "UTFn/AuUTF8.hpp"
#include "UTFn/AuUTF16.hpp"
#include "UTFn/AuUTF32.hpp"
2021-09-06 10:58:08 +00:00
namespace Aurora::Locale::Encoding
{
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page = ECodePage::eUnsupported);
struct TextStreamProcessor
2021-09-06 10:58:08 +00:00
{
bool readHeader {};
ECodePage page = ECodePage::eUnsupported;
ECodePage defaultPage = ECodePage::eUnsupported;
EncoderAdapter state;
TextStreamProcessor(ECodePage page = ECodePage::eSysUnk) : defaultPage(page) {}
2021-09-06 10:58:08 +00:00
using TypeIn_t = const void *;
using TypeCast_t = const AuUInt8 *;
2021-09-06 10:58:08 +00:00
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen)
2021-09-06 10:58:08 +00:00
{
int offset = 0;
2021-09-06 13:17:41 +00:00
if (!binary) return {};
if (!binaryLength) return {};
2021-09-06 10:58:08 +00:00
if (!std::exchange(readHeader, true))
{
if (page == ECodePage::eUnsupported)
{
auto header = DecodeBOM(Memory::MemoryViewRead(binary, binaryLength));
if (header.length)
2021-09-06 10:58:08 +00:00
{
page = header.page;
offset = header.length;
2021-09-06 10:58:08 +00:00
}
else
{
if ((defaultPage != ECodePage::eUnsupported))
{
page = defaultPage;
}
else
{
page = GetInternalCodePage();
}
}
state.Init(page, true);
}
}
if (page == ECodePage::eUnsupported)
{
return {};
}
2021-09-06 13:03:45 +00:00
binaryLength = binaryLength - offset;
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength, utf8, utfLen);
2021-09-06 10:58:08 +00:00
return AuMakePair(real.first + offset, real.second);
}
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out)
2021-09-06 10:58:08 +00:00
{
2021-09-06 13:17:41 +00:00
auto preemptive = EncodeUTF8(binary, binaryLength, nullptr, 0);
if (!AuTryResize(out, preemptive.second)) return {};
auto main = EncodeUTF8(binary, preemptive.second, out.data(), out.size());
2021-09-06 10:58:08 +00:00
if (main.second == 0) return {};
if (!AuTryResize(out, main.second)) return {};
out.shrink_to_fit();
return main;
}
};
struct TextStreamEncoder
{
ECodePage page;
EncoderAdapter state;
TextStreamEncoder(ECodePage page = ECodePage::eUTF32) : page(page)
{
state.Init(page, false);
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 length, AuString &out)
{
if (page == ECodePage::eUnsupported)
{
return {};
}
2021-09-06 13:17:41 +00:00
if (!utf8In)
{
return {};
}
if (!length)
{
return {};
}
auto preemptive = state.UTF8ToCp(utf8In, length, nullptr, 0);
auto written = state.UTF8ToCp(utf8In, preemptive.second, out.data(), AuUInt32(out.size()));
2021-09-06 10:58:08 +00:00
out.resize(written.second);
out.shrink_to_fit();
return written;
}
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 utf8Length, void *binaryOut, AuUInt32 binaryLength)
{
if (page == ECodePage::eUnsupported)
{
return {};
}
return state.UTF8ToCp(utf8In, utf8Length, binaryOut, binaryLength);
}
};
/// 'TextStreamProcessor', a stateful wrapper around DecodeUTF8
/// Using this you can handle a stateful, optionally bom prefixed, stream
/// Initialization (ie: setting a default codepage) is optional
}