136 lines
4.3 KiB
C++
136 lines
4.3 KiB
C++
/***
|
|
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
|
|
|
|
File: Encoding.hpp
|
|
Date: 2021-8-18
|
|
Author: Reece
|
|
***/
|
|
#pragma once
|
|
|
|
#include "../Locale.hpp"
|
|
|
|
#include "GBK/GBK.hpp"
|
|
#include "SJIS/SJIS.hpp"
|
|
#include "UTFn/AuUTF8.hpp"
|
|
#include "UTFn/AuUTF16.hpp"
|
|
#include "UTFn/AuUTF32.hpp"
|
|
|
|
namespace Aurora::Locale::Encoding
|
|
{
|
|
AuStreamReadWrittenPair_t DecodeUTF8(void *binary, AuUInt32 binaryLength, AuString &out, ECodePage page = ECodePage::eEnumInvalid);
|
|
|
|
struct TextStreamProcessor
|
|
{
|
|
bool readHeader {};
|
|
ECodePage page = ECodePage::eEnumInvalid;
|
|
ECodePage defaultPage = ECodePage::eEnumInvalid;
|
|
EncoderAdapter state;
|
|
|
|
TextStreamProcessor(ECodePage page = ECodePage::eSysUnk) : defaultPage(page) {}
|
|
|
|
using TypeIn_t = const void *;
|
|
using TypeCast_t = const AuUInt8 *;
|
|
|
|
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, void *utf8, AuUInt32 utfLen)
|
|
{
|
|
int offset = 0;
|
|
|
|
if (!binary) return {};
|
|
if (!binaryLength) return {};
|
|
|
|
if (!AuExchange(readHeader, true))
|
|
{
|
|
if (page == ECodePage::eEnumInvalid)
|
|
{
|
|
auto header = DecodeBOM(Memory::MemoryViewRead(binary, binaryLength));
|
|
if (header.length)
|
|
{
|
|
page = header.page;
|
|
offset = header.length;
|
|
}
|
|
else
|
|
{
|
|
if ((defaultPage != ECodePage::eEnumInvalid))
|
|
{
|
|
page = defaultPage;
|
|
}
|
|
else
|
|
{
|
|
page = GetInternalCodePage();
|
|
}
|
|
}
|
|
state.Init(page, true);
|
|
}
|
|
}
|
|
|
|
if (page == ECodePage::eEnumInvalid)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
binaryLength = binaryLength - offset;
|
|
auto real = state.CPToUTF8(reinterpret_cast<TypeCast_t>(binary) + offset, binaryLength, utf8, utfLen);
|
|
return AuMakePair(real.first + offset, real.second);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t EncodeUTF8(const void *binary, AuUInt32 binaryLength, AuString &out)
|
|
{
|
|
auto preemptive = EncodeUTF8(binary, binaryLength, nullptr, 0);
|
|
if (!AuTryResize(out, preemptive.second)) return {};
|
|
auto main = EncodeUTF8(binary, preemptive.second, out.data(), AuUInt32(out.size()));
|
|
if (main.second == 0) return {};
|
|
if (!AuTryResize(out, main.second)) return {};
|
|
out.shrink_to_fit();
|
|
return main;
|
|
}
|
|
};
|
|
|
|
struct TextStreamEncoder
|
|
{
|
|
ECodePage page;
|
|
EncoderAdapter state;
|
|
|
|
TextStreamEncoder(ECodePage page = ECodePage::eUTF32) : page(page)
|
|
{
|
|
state.Init(page, false);
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 length, AuString &out)
|
|
{
|
|
if (page == ECodePage::eEnumInvalid)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (!utf8In)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
if (!length)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
auto preemptive = state.UTF8ToCp(utf8In, length, nullptr, 0);
|
|
auto written = state.UTF8ToCp(utf8In, preemptive.second, out.data(), AuUInt32(out.size()));
|
|
out.resize(written.second);
|
|
out.shrink_to_fit();
|
|
return written;
|
|
}
|
|
|
|
AuStreamReadWrittenPair_t DecodeUTF8(const void *utf8In, AuUInt32 utf8Length, void *binaryOut, AuUInt32 binaryLength)
|
|
{
|
|
if (page == ECodePage::eEnumInvalid)
|
|
{
|
|
return {};
|
|
}
|
|
|
|
return state.UTF8ToCp(utf8In, utf8Length, binaryOut, binaryLength);
|
|
}
|
|
};
|
|
|
|
/// 'TextStreamProcessor', a stateful wrapper around DecodeUTF8
|
|
/// Using this you can handle a stateful, optionally bom prefixed, stream
|
|
/// Initialization (ie: setting a default codepage) is optional
|
|
} |